8 files changed, 261 insertions, 38 deletions
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
new file mode 100644
index 00000000000..239f542d48b
--- /dev/null
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -0,0 +1,167 @@
+Using hlist_nulls to protect read-mostly linked lists and
+objects using SLAB_DESTROY_BY_RCU allocations.
+
+Please read the basics in Documentation/RCU/listRCU.txt
+
+Using special makers (called 'nulls') is a convenient way
+to solve following problem :
+
+A typical RCU linked list managing objects which are
+allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+use following algos :
+
+1) Lookup algo
+--------------
+rcu_read_lock()
+begin:
+obj = lockless_lookup(key);
+if (obj) {
+  if (!try_get_ref(obj)) // might fail for free objects
+    goto begin;
+  /*
+   * Because a writer could delete object, and a writer could
+   * reuse these object before the RCU grace period, we
+   * must check key after geting the reference on object
+   */
+  if (obj->key != key) { // not the object we expected
+     put_ref(obj);
+     goto begin;
+   }
+}
+rcu_read_unlock();
+
+Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
+but a version with an additional memory barrier (smp_rmb())
+
+lockless_lookup(key)
+{
+   struct hlist_node *node, *next;
+   for (pos = rcu_dereference((head)->first);
+          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
+          ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+          pos = rcu_dereference(next))
+      if (obj->key == key)
+         return obj;
+   return NULL;
+
+And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb() :
+
+   struct hlist_node *node;
+   for (pos = rcu_dereference((head)->first);
+		pos && ({ prefetch(pos->next); 1; }) &&
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+		pos = rcu_dereference(pos->next))
+      if (obj->key == key)
+         return obj;
+   return NULL;
+}
+
+Quoting Corey Minyard :
+
+"If the object is moved from one list to another list in-between the
+ time the hash is calculated and the next field is accessed, and the
+ object has moved to the end of a new list, the traversal will not
+ complete properly on the list it should have, since the object will
+ be on the end of the new list and there's not a way to tell it's on a
+ new list and restart the list traversal.  I think that this can be
+ solved by pre-fetching the "next" field (with proper barriers) before
+ checking the key."
+
+2) Insert algo :
+----------------
+
+We need to make sure a reader cannot read the new 'obj->obj_next' value
+and previous value of 'obj->key'. Or else, an item could be deleted
+from a chain, and inserted into another chain. If new chain was empty
+before the move, 'next' pointer is NULL, and lockless reader can
+not detect it missed following items in original chain.
+
+/*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+obj = kmem_cache_alloc(...);
+lock_chain(); // typically a spin_lock()
+obj->key = key;
+atomic_inc(&obj->refcnt);
+/*
+ * we need to make sure obj->key is updated before obj->next
+ */
+smp_wmb();
+hlist_add_head_rcu(&obj->obj_node, list);
+unlock_chain(); // typically a spin_unlock()
+
+
+3) Remove algo
+--------------
+Nothing special here, we can use a standard RCU hlist deletion.
+But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+very very fast (before the end of RCU grace period)
+
+if (put_last_reference_on(obj) {
+   lock_chain(); // typically a spin_lock()
+   hlist_del_init_rcu(&obj->obj_node);
+   unlock_chain(); // typically a spin_unlock()
+   kmem_cache_free(cachep, obj);
+}
+
+
+
+--------------------------------------------------------------------------
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
+and extra smp_wmb() in insert function.
+
+For example, if we choose to store the slot number as the 'nulls'
+end-of-list marker for each slot of the hash table, we can detect
+a race (some writer did a delete and/or a move of an object
+to another chain) checking the final 'nulls' value if
+the lookup met the end of chain. If final 'nulls' value
+is not the slot number, then we must restart the lookup at
+the begining. If the object was moved to same chain,
+then the reader doesnt care : It might eventually
+scan the list again without harm.
+
+
+1) lookup algo
+
+ head = &table[slot];
+ rcu_read_lock();
+begin:
+ hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+   if (obj->key == key) {
+      if (!try_get_ref(obj)) // might fail for free objects
+         goto begin;
+      if (obj->key != key) { // not the object we expected
+         put_ref(obj);
+         goto begin;
+      }
+  goto out;
+ }
+/*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+   goto begin;
+ obj = NULL;
+
+out:
+ rcu_read_unlock();
+
+2) Insert function :
+--------------------
+
+/*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+obj = kmem_cache_alloc(cachep);
+lock_chain(); // typically a spin_lock()
+obj->key = key;
+atomic_set(&obj->refcnt, 1);
+/*
+ * insert obj in RCU way (readers might be traversing chain)
+ */
+hlist_nulls_add_head_rcu(&obj->obj_node, list);
+unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c28a2ac88f9..77eb6b129dd 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -120,13 +120,6 @@ Who:	Christoph Hellwig <hch@lst.de>
 
 ---------------------------
 
-What:   eepro100 network driver
-When:   January 2007
-Why:    replaced by the e100 driver
-Who:    Adrian Bunk <bunk@stusta.de>
-
----------------------------
-
 What:	Unused EXPORT_SYMBOL/EXPORT_SYMBOL_GPL exports
 	(temporary transition config option provided until then)
 	The transition config option will also be removed at the same time.
diff --git a/Documentation/networking/README.ipw2200 b/Documentation/networking/README.ipw2200
index 4f2a40f1dbc..80c728522c4 100644
--- a/Documentation/networking/README.ipw2200
+++ b/Documentation/networking/README.ipw2200
@@ -147,7 +147,7 @@ Where the supported parameter are:
 	driver.  If disabled, the driver will not attempt to scan 
 	for and associate to a network until it has been configured with 
 	one or more properties for the target network, for example configuring 
-	the network SSID.  Default is 1 (auto-associate)
+	the network SSID.  Default is 0 (do not auto-associate)
 	
 	Example: % modprobe ipw2200 associate=0
 
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 688dfe1e6b7..5ede7473b42 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -194,6 +194,48 @@ or, for backwards compatibility, the option value.  E.g.,
 
 	The parameters are as follows:
 
+ad_select
+
+	Specifies the 802.3ad aggregation selection logic to use.  The
+	possible values and their effects are:
+
+	stable or 0
+
+		The active aggregator is chosen by largest aggregate
+		bandwidth.
+
+		Reselection of the active aggregator occurs only when all
+		slaves of the active aggregator are down or the active
+		aggregator has no slaves.
+
+		This is the default value.
+
+	bandwidth or 1
+
+		The active aggregator is chosen by largest aggregate
+		bandwidth.  Reselection occurs if:
+
+		- A slave is added to or removed from the bond
+
+		- Any slave's link state changes
+
+		- Any slave's 802.3ad association state changes
+
+		- The bond's adminstrative state changes to up
+
+	count or 2
+
+		The active aggregator is chosen by the largest number of
+		ports (slaves).  Reselection occurs as described under the
+		"bandwidth" setting, above.
+
+	The bandwidth and count selection policies permit failover of
+	802.3ad aggregations when partial failure of the active aggregator
+	occurs.  This keeps the aggregator with the highest availability
+	(either in bandwidth or in number of ports) active at all times.
+
+	This option was added in bonding version 3.4.0.
+
 arp_interval
 
 	Specifies the ARP link monitoring frequency in milliseconds.
@@ -551,6 +593,16 @@ num_grat_arp
 	affects only the active-backup mode.  This option was added for
 	bonding version 3.3.0.
 
+num_unsol_na
+
+	Specifies the number of unsolicited IPv6 Neighbor Advertisements
+	to be issued after a failover event.  One unsolicited NA is issued
+	immediately after the failover.
+
+	The valid range is 0 - 255; the default value is 1.  This option
+	affects only the active-backup mode.  This option was added for
+	bonding version 3.4.0.
+
 primary
 
 	A string (eth0, eth2, etc) specifying which slave is the
@@ -922,17 +974,19 @@ USERCTL=no
 NETMASK, NETWORK and BROADCAST) to match your network configuration.
 
 	For later versions of initscripts, such as that found with Fedora
-7 and Red Hat Enterprise Linux version 5 (or later), it is possible, and,
-indeed, preferable, to specify the bonding options in the ifcfg-bond0
+7 (or later) and Red Hat Enterprise Linux version 5 (or later), it is possible,
+and, indeed, preferable, to specify the bonding options in the ifcfg-bond0
 file, e.g. a line of the format:
 
-BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=+192.168.1.254"
+BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=192.168.1.254"
 
 	will configure the bond with the specified options.  The options
 specified in BONDING_OPTS are identical to the bonding module parameters
-except for the arp_ip_target field.  Each target should be included as a
-separate option and should be preceded by a '+' to indicate it should be
-added to the list of queried targets, e.g.,
+except for the arp_ip_target field when using versions of initscripts older
+than and 8.57 (Fedora 8) and 8.45.19 (Red Hat Enterprise Linux 5.2).  When
+using older versions each target should be included as a separate option and
+should be preceded by a '+' to indicate it should be added to the list of
+queried targets, e.g.,
 
 	arp_ip_target=+192.168.1.1 arp_ip_target=+192.168.1.2
 
@@ -940,7 +994,7 @@ added to the list of queried targets, e.g.,
 options via BONDING_OPTS, it is not necessary to edit /etc/modules.conf or
 /etc/modprobe.conf.
 
-	For older versions of initscripts that do not support
+	For even older versions of initscripts that do not support
 BONDING_OPTS, it is necessary to edit /etc/modules.conf (or
 /etc/modprobe.conf, depending upon your distro) to load the bonding module
 with your desired options when the bond0 interface is brought up.  The
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 39131a3c78f..43df4487379 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,10 @@ can be set before calling bind().
 DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
 size (application payload size) in bytes, see RFC 4340, section 14.
 
+DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
+supported by the endpoint (see include/linux/dccp.h for symbolic constants).
+The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
+
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
@@ -121,9 +125,6 @@ send_ndp = 1
 send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
-ack_ratio = 2
-	The default Ack Ratio (sec. 11.3) to use.
-
 tx_ccid = 2
 	Default CCID for the sender-receiver half-connection.
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d84932650fd..c7712787933 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -27,6 +27,12 @@ min_adv_mss - INTEGER
 	The advertised MSS depends on the first hop route MTU, but will
 	never be lower than this setting.
 
+rt_cache_rebuild_count - INTEGER
+	The per net-namespace route cache emergency rebuild threshold.
+	Any net-namespace having its route cache rebuilt due to
+	a hash bucket chain being too long more than this many times
+	will have its route caching disabled
+
 IP Fragmentation:
 
 ipfrag_high_thresh - INTEGER
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index a96989a8ff3..dcf31648414 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -131,11 +131,13 @@ are expected to do this during initialization.
 
 	r = zd_reg2alpha2(mac->regdomain, alpha2);
 	if (!r)
-		regulatory_hint(hw->wiphy, alpha2, NULL);
+		regulatory_hint(hw->wiphy, alpha2);
 
 Example code - drivers providing a built in regulatory domain:
 --------------------------------------------------------------
 
+[NOTE: This API is not currently available, it can be added when required]
+
 If you have regulatory information you can obtain from your
 driver and you *need* to use this we let you build a regulatory domain
 structure and pass it to the wireless core. To do this you should
@@ -167,7 +169,6 @@ struct ieee80211_regdomain mydriver_jp_regdom = {
 
 Then in some part of your code after your wiphy has been registered:
 
-	int r;
 	struct ieee80211_regdomain *rd;
 	int size_of_regd;
 	int num_rules = mydriver_jp_regdom.n_reg_rules;
@@ -178,17 +179,12 @@ Then in some part of your code after your wiphy has been registered:
 
 	rd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!rd)
-	return -ENOMEM;
+		return -ENOMEM;
 
 	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
 
-	for (i=0; i < num_rules; i++) {
-		memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i],
-			sizeof(struct ieee80211_reg_rule));
-	}
-	r = regulatory_hint(hw->wiphy, NULL, rd);
-	if (r) {
-		kfree(rd);
-		return r;
-	}
-
+	for (i=0; i < num_rules; i++)
+		memcpy(&rd->reg_rules[i],
+		       &mydriver_jp_regdom.reg_rules[i],
+		       sizeof(struct ieee80211_reg_rule));
+	regulatory_struct_hint(rd);
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index b65f0799df4..4d3ee317a4a 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -191,12 +191,20 @@ Userspace input handlers (uevents) or kernel input handlers (rfkill-input):
 	  to tell the devices registered with the rfkill class to change
 	  their state (i.e. translates the input layer event into real
 	  action).
+
 	* rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0
 	  (power off all transmitters) in a special way: it ignores any
 	  overrides and local state cache and forces all transmitters to the
 	  RFKILL_STATE_SOFT_BLOCKED state (including those which are already
-	  supposed to be BLOCKED).  Note that the opposite event (power on all
-	  transmitters) is handled normally.
+	  supposed to be BLOCKED).
+	* rfkill EPO will remain active until rfkill-input receives an
+	  EV_SW SW_RFKILL_ALL 1 event.  While the EPO is active, transmitters
+	  are locked in the blocked state (rfkill will refuse to unblock them).
+	* rfkill-input implements different policies that the user can
+	  select for handling EV_SW SW_RFKILL_ALL 1.  It will unlock rfkill,
+	  and either do nothing (leave transmitters blocked, but now unlocked),
+	  restore the transmitters to their state before the EPO, or unblock
+	  them all.
 
 Userspace uevent handler or kernel platform-specific drivers hooked to the
 rfkill notifier chain:
@@ -331,11 +339,9 @@ class to get a sysfs interface :-)
 correct event for your switch/button.  These events are emergency power-off
 events when they are trying to turn the transmitters off.  An example of an
 input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill
-switch in a laptop which is NOT a hotkey, but a real switch that kills radios
-in hardware, even if the O.S. has gone to lunch.  An example of an input device
-which SHOULD NOT generate *_RFKILL_ALL events by default, is any sort of hot
-key that does nothing by itself, as well as any hot key that is type-specific
-(e.g. the one for WLAN).
+switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch.
+An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by
+default, is any sort of hot key that is type-specific (e.g. the one for WLAN).
 
 
 3.1 Guidelines for wireless device drivers