From f5eb917b861828da18dc28854308068c66d1449a Mon Sep 17 00:00:00 2001
From: John Hughes <john@calva.com>
Date: Wed, 7 Apr 2010 21:29:25 -0700
Subject: x25: Patch to fix bug 15678 - x25 accesses fields beyond end of
 packet.

Here is a patch to stop X.25 examining fields beyond the end of the packet.

For example, when a simple CALL ACCEPTED was received:

	10 10 0f

x25_parse_facilities was attempting to decode the FACILITIES field, but this
packet contains no facilities field.

Signed-off-by: John Hughes <john@calva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/x25.h b/include/net/x25.h
index 9baa07dc7d1..33f67fb7858 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -182,6 +182,10 @@ extern int  sysctl_x25_clear_request_timeout;
 extern int  sysctl_x25_ack_holdback_timeout;
 extern int  sysctl_x25_forward;
 
+extern int x25_parse_address_block(struct sk_buff *skb,
+		struct x25_address *called_addr,
+		struct x25_address *calling_addr);
+
 extern int  x25_addr_ntoa(unsigned char *, struct x25_address *,
 			  struct x25_address *);
 extern int  x25_addr_aton(unsigned char *, struct x25_address *,
-- 
cgit v1.2.3-70-g09d2


From aa6fec3cdeb14ecc916eb78c4cd9ed79e4f7fe8d Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Wed, 31 Mar 2010 16:26:52 +0200
Subject: firewire: cdev: iso packet documentation

Add the missing documentation for iso packets.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 40b11013408..011fdf1eaec 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -438,7 +438,7 @@ struct fw_cdev_remove_descriptor {
  * @type:	%FW_CDEV_ISO_CONTEXT_TRANSMIT or %FW_CDEV_ISO_CONTEXT_RECEIVE
  * @header_size: Header size to strip for receive contexts
  * @channel:	Channel to bind to
- * @speed:	Speed to transmit at
+ * @speed:	Speed for transmit contexts
  * @closure:	To be returned in &fw_cdev_event_iso_interrupt
  * @handle:	Handle to context, written back by kernel
  *
@@ -451,6 +451,9 @@ struct fw_cdev_remove_descriptor {
  * If a context was successfully created, the kernel writes back a handle to the
  * context, which must be passed in for subsequent operations on that context.
  *
+ * For receive contexts, @header_size must be at least 4 and must be a multiple
+ * of 4.
+ *
  * Note that the effect of a @header_size > 4 depends on
  * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt.
  */
@@ -481,10 +484,34 @@ struct fw_cdev_create_iso_context {
  *
  * &struct fw_cdev_iso_packet is used to describe isochronous packet queues.
  *
- * Use the FW_CDEV_ISO_ macros to fill in @control.  The sy and tag fields are
- * specified by IEEE 1394a and IEC 61883.
- *
- * FIXME - finish this documentation
+ * Use the FW_CDEV_ISO_ macros to fill in @control.
+ *
+ * For transmit packets, the header length must be a multiple of 4 and specifies
+ * the numbers of bytes in @header that will be prepended to the packet's
+ * payload; these bytes are copied into the kernel and will not be accessed
+ * after the ioctl has returned.  The sy and tag fields are copied to the iso
+ * packet header (these fields are specified by IEEE 1394a and IEC 61883-1).
+ * The skip flag specifies that no packet is to be sent in a frame; when using
+ * this, all other fields except the interrupt flag must be zero.
+ *
+ * For receive packets, the header length must be a multiple of the context's
+ * header size; if the header length is larger than the context's header size,
+ * multiple packets are queued for this entry.  The sy and tag fields are
+ * ignored.  If the sync flag is set, the context drops all packets until
+ * a packet with a matching sy field is received (the sync value to wait for is
+ * specified in the &fw_cdev_start_iso structure).  The payload length defines
+ * how many payload bytes can be received for one packet (in addition to payload
+ * quadlets that have been defined as headers and are stripped and returned in
+ * the &fw_cdev_event_iso_interrupt structure).  If more bytes are received, the
+ * additional bytes are dropped.  If less bytes are received, the remaining
+ * bytes in this part of the payload buffer will not be written to, not even by
+ * the next packet, i.e., packets received in consecutive frames will not
+ * necessarily be consecutive in memory.  If an entry has queued multiple
+ * packets, the payload length is divided equally among them.
+ *
+ * When a packet with the interrupt flag set has been completed, the
+ * &fw_cdev_event_iso_interrupt event will be sent.  An entry that has queued
+ * multiple receive packets is completed when its last packet is completed.
  */
 struct fw_cdev_iso_packet {
 	__u32 control;
@@ -501,7 +528,7 @@ struct fw_cdev_iso_packet {
  * Queue a number of isochronous packets for reception or transmission.
  * This ioctl takes a pointer to an array of &fw_cdev_iso_packet structs,
  * which describe how to transmit from or receive into a contiguous region
- * of a mmap()'ed payload buffer.  As part of the packet descriptors,
+ * of a mmap()'ed payload buffer.  As part of transmit packet descriptors,
  * a series of headers can be supplied, which will be prepended to the
  * payload during DMA.
  *
-- 
cgit v1.2.3-70-g09d2


From ca658b1e29d6be939207532e337fb640eb697f71 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 10 Apr 2010 12:23:09 +0200
Subject: firewire: cdev: comment fixlet

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 011fdf1eaec..6ffb24a1f2f 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -647,8 +647,8 @@ struct fw_cdev_get_cycle_timer2 {
  * instead of allocated.
  * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation.
  *
- * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources
- * for the lifetime of the fd or handle.
+ * To summarize, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE allocates iso resources
+ * for the lifetime of the fd or @handle.
  * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources
  * for the duration of a bus generation.
  *
-- 
cgit v1.2.3-70-g09d2


From 0df5dd4aae211edeeeb84f7f84f6d093406d7c22 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 11 Apr 2010 16:48:44 -0400
Subject: NFSv4: fix delegated locking

Arnaud Giersch reports that NFSv4 locking is broken when we hold a
delegation since commit 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 (NFSv4:
Don't allow posix locking against servers that don't support it).

According to Arnaud, the lock succeeds the first time he opens the file
(since we cannot do a delegated open) but then fails after we start using
delegated opens.

The following patch fixes it by ensuring that locking behaviour is
governed by a per-filesystem capability flag that is initially set, but
gets cleared if the server ever returns an OPEN without the
NFS4_OPEN_RESULT_LOCKTYPE_POSIX flag being set.

Reported-by: Arnaud Giersch <arnaud.giersch@iut-bm.univ-fcomte.fr>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/client.c           | 3 ++-
 fs/nfs/nfs4proc.c         | 4 +++-
 include/linux/nfs_fs_sb.h | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2a3d352c0bf..a8766c4ef2e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1294,7 +1294,8 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
+		NFS_CAP_POSIX_LOCK;
 	server->options = data->options;
 
 	/* Get a client record */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fe0cd9eb1d4..638067007c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
 	} else
 		nfs_refresh_inode(dir, o_res->dir_attr);
+	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
+		server->caps &= ~NFS_CAP_POSIX_LOCK;
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
 	status = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto err_opendata_put;
-	if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
+	if (server->caps & NFS_CAP_POSIX_LOCK)
 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 717a5e54eb1..e82957acea5 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -176,6 +176,7 @@ struct nfs_server {
 #define NFS_CAP_ATIME		(1U << 11)
 #define NFS_CAP_CTIME		(1U << 12)
 #define NFS_CAP_MTIME		(1U << 13)
+#define NFS_CAP_POSIX_LOCK	(1U << 14)
 
 
 /* maximum number of slots to use */
-- 
cgit v1.2.3-70-g09d2


From b62730baea32f86fe91a7930e4b7ee8d82778b79 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 9 Apr 2010 15:39:10 -0700
Subject: rcu: Add rcu_access_pointer and rcu_dereference_protected

This patch adds variants of rcu_dereference() that handle
situations where the RCU-protected data structure cannot change,
perhaps due to our holding the update-side lock, or where the
RCU-protected pointer is only to be fetched, not dereferenced.
These are needed due to some performance concerns with using
rcu_dereference() where it is not required, aside from the need
for lockdep/sparse checking.

The new rcu_access_pointer() primitive is for the case where the
pointer is be fetch and not dereferenced.  This primitive may be
used without protection, RCU or otherwise, due to the fact that
it uses ACCESS_ONCE().

The new rcu_dereference_protected() primitive is for the case
where updates are prevented, for example, due to holding the
update-side lock.  This primitive does neither ACCESS_ONCE() nor
smp_read_barrier_depends(), so can only be used when updates are
somehow prevented.

Suggested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: eric.dumazet@gmail.com
LKML-Reference: <1270852752-25278-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 872a98e13d6..8fe86609441 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -209,12 +209,44 @@ static inline int rcu_read_lock_sched_held(void)
 		rcu_dereference_raw(p); \
 	})
 
+/**
+ * rcu_dereference_protected - fetch RCU pointer when updates prevented
+ *
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
+ * is useful in cases where update-side locks prevent the value of the
+ * pointer from changing.  Please note that this primitive does -not-
+ * prevent the compiler from repeating this reference or combining it
+ * with other references, so it should not be used without protection
+ * of appropriate locks.
+ */
+#define rcu_dereference_protected(p, c) \
+	({ \
+		if (debug_lockdep_rcu_enabled() && !(c)) \
+			lockdep_rcu_dereference(__FILE__, __LINE__); \
+		(p); \
+	})
+
 #else /* #ifdef CONFIG_PROVE_RCU */
 
 #define rcu_dereference_check(p, c)	rcu_dereference_raw(p)
+#define rcu_dereference_protected(p, c) (p)
 
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
+/**
+ * rcu_access_pointer - fetch RCU pointer with no dereferencing
+ *
+ * Return the value of the specified RCU-protected pointer, but omit the
+ * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * when the value of this pointer is accessed, but the pointer is not
+ * dereferenced, for example, when testing an RCU-protected pointer against
+ * NULL.  This may also be used in cases where update-side locks prevent
+ * the value of the pointer from changing, but rcu_dereference_protected()
+ * is a lighter-weight primitive for this use case.
+ */
+#define rcu_access_pointer(p)	ACCESS_ONCE(p)
+
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  *
-- 
cgit v1.2.3-70-g09d2


From c08c68dd76bd6b776bc0eb45a5e8f354ed772cdf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 9 Apr 2010 15:39:11 -0700
Subject: rcu: Better explain the condition parameter of
 rcu_dereference_check()

Better explain the condition parameter of
rcu_dereference_check() that describes the conditions under
which the dereference is permitted to take place (and
incorporate Yong Zhang's suggestion).  This condition is only
checked under lockdep proving.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: eric.dumazet@gmail.com
LKML-Reference: <1270852752-25278-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8fe86609441..9f1ddfef84b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -195,12 +195,30 @@ static inline int rcu_read_lock_sched_held(void)
 
 /**
  * rcu_dereference_check - rcu_dereference with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
  *
- * Do an rcu_dereference(), but check that the context is correct.
- * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to
- * ensure that the rcu_dereference_check() executes within an RCU
- * read-side critical section.  It is also possible to check for
- * locks being held, for example, by using lockdep_is_held().
+ * Do an rcu_dereference(), but check that the conditions under which the
+ * dereference will take place are correct.  Typically the conditions indicate
+ * the various locking conditions that should be held at that point.  The check
+ * should return true if the conditions are satisfied.
+ *
+ * For example:
+ *
+ *	bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
+ *					      lockdep_is_held(&foo->lock));
+ *
+ * could be used to indicate to lockdep that foo->bar may only be dereferenced
+ * if either the RCU read lock is held, or that the lock required to replace
+ * the bar struct at foo->bar is held.
+ *
+ * Note that the list of conditions may also include indications of when a lock
+ * need not be held, for example during initialisation or destruction of the
+ * target struct:
+ *
+ *	bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
+ *					      lockdep_is_held(&foo->lock) ||
+ *					      atomic_read(&foo->usage) == 0);
  */
 #define rcu_dereference_check(p, c) \
 	({ \
-- 
cgit v1.2.3-70-g09d2


From 19b3eecc21b65a24b0aae2684ca0c8e1b99ef802 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 11 Apr 2010 11:52:12 +0200
Subject: firewire: cdev: change license of exported header files to MIT
 license
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Among else, this allows projects like libdc1394 to carry copies of the
ABI related header files without them or distributors having to worry
about effects on the project's overall license terms.  Switch to MIT
license as suggested by Kristian.  Also update the year in the
copyright statement according to source history.

Cc: Jay Fenlason <fenlason@redhat.com>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
---
 include/linux/firewire-cdev.h      | 35 ++++++++++++++++++++---------------
 include/linux/firewire-constants.h | 29 +++++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 6ffb24a1f2f..81f3b14d5d7 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -1,21 +1,26 @@
 /*
  * Char device interface.
  *
- * Copyright (C) 2005-2006  Kristian Hoegsberg <krh@bitplanet.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * Copyright (C) 2005-2007  Kristian Hoegsberg <krh@bitplanet.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef _LINUX_FIREWIRE_CDEV_H
diff --git a/include/linux/firewire-constants.h b/include/linux/firewire-constants.h
index b316770a43f..9c63f06e67f 100644
--- a/include/linux/firewire-constants.h
+++ b/include/linux/firewire-constants.h
@@ -1,3 +1,28 @@
+/*
+ * IEEE 1394 constants.
+ *
+ * Copyright (C) 2005-2007  Kristian Hoegsberg <krh@bitplanet.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
 #ifndef _LINUX_FIREWIRE_CONSTANTS_H
 #define _LINUX_FIREWIRE_CONSTANTS_H
 
@@ -21,7 +46,7 @@
 #define EXTCODE_WRAP_ADD		0x6
 #define EXTCODE_VENDOR_DEPENDENT	0x7
 
-/* Juju specific tcodes */
+/* Linux firewire-core (Juju) specific tcodes */
 #define TCODE_LOCK_MASK_SWAP		(0x10 | EXTCODE_MASK_SWAP)
 #define TCODE_LOCK_COMPARE_SWAP		(0x10 | EXTCODE_COMPARE_SWAP)
 #define TCODE_LOCK_FETCH_ADD		(0x10 | EXTCODE_FETCH_ADD)
@@ -36,7 +61,7 @@
 #define RCODE_TYPE_ERROR		0x6
 #define RCODE_ADDRESS_ERROR		0x7
 
-/* Juju specific rcodes */
+/* Linux firewire-core (Juju) specific rcodes */
 #define RCODE_SEND_ERROR		0x10
 #define RCODE_CANCELLED			0x11
 #define RCODE_BUSY			0x12
-- 
cgit v1.2.3-70-g09d2


From a2612cb16d4d8447793609cbdd2a2f4f156c0020 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Thu, 15 Apr 2010 22:16:04 +0200
Subject: firewire: cdev: fix cut+paste mistake in disclaimer

This was supposed to be generic "authors or copyright holders";
I mistakenly picked up text from a wrong file.

Reported-by: Daniel K. <dk@uw.no>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h      | 2 +-
 include/linux/firewire-constants.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 81f3b14d5d7..68f883b30a5 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -17,7 +17,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
diff --git a/include/linux/firewire-constants.h b/include/linux/firewire-constants.h
index 9c63f06e67f..9b4bb5fbba4 100644
--- a/include/linux/firewire-constants.h
+++ b/include/linux/firewire-constants.h
@@ -17,7 +17,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
-- 
cgit v1.2.3-70-g09d2


From bc293d62b26ec590afc90a9e0a31c45d355b7bd8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 15 Apr 2010 12:50:39 -0700
Subject: rcu: Make RCU lockdep check the lockdep_recursion variable

The lockdep facility temporarily disables lockdep checking by
incrementing the current->lockdep_recursion variable.  Such
disabling happens in NMIs and in other situations where lockdep
might expect to recurse on itself.

This patch therefore checks current->lockdep_recursion, disabling RCU
lockdep splats when this variable is non-zero.  In addition, this patch
removes the "likely()", as suggested by Lai Jiangshan.

Reported-by: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: David Miller <davem@davemloft.net>
Tested-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: eric.dumazet@gmail.com
LKML-Reference: <20100415195039.GA22623@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 5 +----
 kernel/rcupdate.c        | 7 +++++++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9f1ddfef84b..07db2feb857 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -101,10 +101,7 @@ extern struct lockdep_map rcu_sched_lock_map;
 # define rcu_read_release_sched() \
 		lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
 
-static inline int debug_lockdep_rcu_enabled(void)
-{
-	return likely(rcu_scheduler_active && debug_locks);
-}
+extern int debug_lockdep_rcu_enabled(void);
 
 /**
  * rcu_read_lock_held - might we be in RCU read-side critical section?
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 63fe2543398..03a7ea1579f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -69,6 +69,13 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
+int debug_lockdep_rcu_enabled(void)
+{
+	return rcu_scheduler_active && debug_locks &&
+	       current->lockdep_recursion == 0;
+}
+EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
+
 /**
  * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
  *
-- 
cgit v1.2.3-70-g09d2


From 79b9517a33a283c5d9db875c263670ed1e055f7e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 19 Apr 2010 17:54:31 +1000
Subject: drm/radeon/kms: add FireMV 2400 PCI ID.

This is an M24/X600 chip.

From RH# 581927

cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 04a6ebc27b9..2d428b088cc 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -6,6 +6,7 @@
 	{0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x3154, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x3155, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x3E50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x3E54, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4136, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS100|RADEON_IS_IGP}, \
-- 
cgit v1.2.3-70-g09d2


From be1a50d4eba4cdb3ebf9d97a0a8693c153436775 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 3 Apr 2010 17:37:45 +0200
Subject: regulator: Let drivers know when they use the stub API

Have the stub variant of regulator_get() return NULL, so that drivers
can (but still don't have to) handle this case specifically.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/consumer.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 28c9fd020d3..ebd74726529 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -183,9 +183,13 @@ static inline struct regulator *__must_check regulator_get(struct device *dev,
 {
 	/* Nothing except the stubbed out regulator API should be
 	 * looking at the value except to check if it is an error
-	 * value so the actual return value doesn't matter.
+	 * value. Drivers are free to handle NULL specifically by
+	 * skipping all regulator API calls, but they don't have to.
+	 * Drivers which don't, should make sure they properly handle
+	 * corner cases of the API, such as regulator_get_voltage()
+	 * returning 0.
 	 */
-	return (struct regulator *)id;
+	return NULL;
 }
 static inline void regulator_put(struct regulator *regulator)
 {
-- 
cgit v1.2.3-70-g09d2


From 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 12 Apr 2010 19:35:35 +0900
Subject: KVM: fix the handling of dirty bitmaps to avoid overflows

Int is not long enough to store the size of a dirty bitmap.

This patch fixes this problem with the introduction of a wrapper
function to calculate the sizes of dirty bitmaps.

Note: in mark_page_dirty(), we have to consider the fact that
  __set_bit() takes the offset as int, not long.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |  9 +++++----
 arch/powerpc/kvm/book3s.c |  5 +++--
 arch/x86/kvm/x86.c        |  5 +++--
 include/linux/kvm_host.h  |  5 +++++
 virt/kvm/kvm_main.c       | 13 ++++++++-----
 5 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 73c5c2b05f6..7f3c0a2e60c 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1802,7 +1802,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 {
 	struct kvm_memory_slot *memslot;
 	int r, i;
-	long n, base;
+	long base;
+	unsigned long n;
 	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
 			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
 
@@ -1815,7 +1816,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	if (!memslot->dirty_bitmap)
 		goto out;
 
-	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	n = kvm_dirty_bitmap_bytes(memslot);
 	base = memslot->base_gfn / BITS_PER_LONG;
 
 	for (i = 0; i < n/sizeof(long); ++i) {
@@ -1831,7 +1832,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		struct kvm_dirty_log *log)
 {
 	int r;
-	int n;
+	unsigned long n;
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
@@ -1850,7 +1851,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (is_dirty) {
 		kvm_flush_remote_tlbs(kvm);
 		memslot = &kvm->memslots->memslots[log->slot];
-		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+		n = kvm_dirty_bitmap_bytes(memslot);
 		memset(memslot->dirty_bitmap, 0, n);
 	}
 	r = 0;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 25da07fd9f7..604af29b71e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1004,7 +1004,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_vcpu *vcpu;
 	ulong ga, ga_end;
 	int is_dirty = 0;
-	int r, n;
+	int r;
+	unsigned long n;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -1022,7 +1023,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		kvm_for_each_vcpu(n, vcpu, kvm)
 			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
 
-		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+		n = kvm_dirty_bitmap_bytes(memslot);
 		memset(memslot->dirty_bitmap, 0, n);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ad3d064c78..45aa90f8cc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2612,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r, n, i;
+	int r, i;
 	struct kvm_memory_slot *memslot;
+	unsigned long n;
 	unsigned long is_dirty = 0;
 	unsigned long *dirty_bitmap = NULL;
 
@@ -2628,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (!memslot->dirty_bitmap)
 		goto out;
 
-	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	n = kvm_dirty_bitmap_bytes(memslot);
 
 	r = -ENOMEM;
 	dirty_bitmap = vmalloc(n);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a3fd0f91d94..9ad825e1c79 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -119,6 +119,11 @@ struct kvm_memory_slot {
 	int user_alloc;
 };
 
+static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
+{
+	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+}
+
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0cd194dce..364daacafb5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -648,7 +648,7 @@ skip_lpage:
 
 	/* Allocate page dirty bitmap if needed */
 	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
-		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
+		unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
 
 		new.dirty_bitmap = vmalloc(dirty_bytes);
 		if (!new.dirty_bitmap)
@@ -768,7 +768,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 {
 	struct kvm_memory_slot *memslot;
 	int r, i;
-	int n;
+	unsigned long n;
 	unsigned long any = 0;
 
 	r = -EINVAL;
@@ -780,7 +780,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 	if (!memslot->dirty_bitmap)
 		goto out;
 
-	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	n = kvm_dirty_bitmap_bytes(memslot);
 
 	for (i = 0; !any && i < n/sizeof(long); ++i)
 		any = memslot->dirty_bitmap[i];
@@ -1186,10 +1186,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	memslot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
+		unsigned long *p = memslot->dirty_bitmap +
+					rel_gfn / BITS_PER_LONG;
+		int offset = rel_gfn % BITS_PER_LONG;
 
 		/* avoid RMW */
-		if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
-			generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+		if (!generic_test_le_bit(offset, p))
+			generic___set_le_bit(offset, p);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From e80e2a60ff7914dae691345a976c80bbbff3ec74 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 30 Mar 2010 16:48:25 -0700
Subject: KVM: Increase NR_IOBUS_DEVS limit to 200

This patch increases the current hardcoded limit of NR_IOBUS_DEVS
from 6 to 200. We are hitting this limit when creating a guest with more
than 1 virtio-net device using vhost-net backend. Each virtio-net
device requires 2 such devices to service notifications from rx/tx queues.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9ad825e1c79..169d07758ee 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
  */
 struct kvm_io_bus {
 	int                   dev_count;
-#define NR_IOBUS_DEVS 6
+#define NR_IOBUS_DEVS 200
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
-- 
cgit v1.2.3-70-g09d2


From 04de0816173c86948b75da93a6344a0a02bbec4d Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Tue, 20 Apr 2010 14:49:01 +0200
Subject: pcmcia: pcmcia_dev_present bugfix

pcmcia_dev_present is in and by itself buggy. Add a note specifying
why it is broken, and replace the broken locking -- taking a mutex
is a bad idea in IRQ context, from which this function is rarely
called -- by an atomic_t.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/ds.c | 47 ++++++++++++++---------------------------------
 include/pcmcia/ds.h |  7 +++----
 include/pcmcia/ss.h |  8 +++-----
 3 files changed, 20 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 4014cf8e4a2..92a5af8aa0b 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -335,7 +335,6 @@ static void pcmcia_card_remove(struct pcmcia_socket *s, struct pcmcia_device *le
 
 		mutex_lock(&s->ops_mutex);
 		list_del(&p_dev->socket_device_list);
-		p_dev->_removed = 1;
 		mutex_unlock(&s->ops_mutex);
 
 		dev_dbg(&p_dev->dev, "unregistering device\n");
@@ -654,14 +653,7 @@ static int pcmcia_requery_callback(struct device *dev, void * _data)
 
 static void pcmcia_requery(struct pcmcia_socket *s)
 {
-	int present, has_pfc;
-
-	mutex_lock(&s->ops_mutex);
-	present = s->pcmcia_state.present;
-	mutex_unlock(&s->ops_mutex);
-
-	if (!present)
-		return;
+	int has_pfc;
 
 	if (s->functions == 0) {
 		pcmcia_card_add(s);
@@ -1260,9 +1252,7 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
 
 	switch (event) {
 	case CS_EVENT_CARD_REMOVAL:
-		mutex_lock(&s->ops_mutex);
-		s->pcmcia_state.present = 0;
-		mutex_unlock(&s->ops_mutex);
+		atomic_set(&skt->present, 0);
 		pcmcia_card_remove(skt, NULL);
 		handle_event(skt, event);
 		mutex_lock(&s->ops_mutex);
@@ -1271,9 +1261,9 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
 		break;
 
 	case CS_EVENT_CARD_INSERTION:
+		atomic_set(&skt->present, 1);
 		mutex_lock(&s->ops_mutex);
 		s->pcmcia_state.has_pfc = 0;
-		s->pcmcia_state.present = 1;
 		destroy_cis_cache(s); /* to be on the safe side... */
 		mutex_unlock(&s->ops_mutex);
 		pcmcia_card_add(skt);
@@ -1313,7 +1303,13 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
     return 0;
 } /* ds_event */
 
-
+/*
+ * NOTE: This is racy. There's no guarantee the card will still be
+ * physically present, even if the call to this function returns
+ * non-NULL. Furthermore, the device driver most likely is unbound
+ * almost immediately, so the timeframe where pcmcia_dev_present
+ * returns NULL is probably really really small.
+ */
 struct pcmcia_device *pcmcia_dev_present(struct pcmcia_device *_p_dev)
 {
 	struct pcmcia_device *p_dev;
@@ -1323,22 +1319,9 @@ struct pcmcia_device *pcmcia_dev_present(struct pcmcia_device *_p_dev)
 	if (!p_dev)
 		return NULL;
 
-	mutex_lock(&p_dev->socket->ops_mutex);
-	if (!p_dev->socket->pcmcia_state.present)
-		goto out;
+	if (atomic_read(&p_dev->socket->present) != 0)
+		ret = p_dev;
 
-	if (p_dev->socket->pcmcia_state.dead)
-		goto out;
-
-	if (p_dev->_removed)
-		goto out;
-
-	if (p_dev->suspended)
-		goto out;
-
-	ret = p_dev;
- out:
-	mutex_unlock(&p_dev->socket->ops_mutex);
 	pcmcia_put_dev(p_dev);
 	return ret;
 }
@@ -1388,6 +1371,8 @@ static int __devinit pcmcia_bus_add_socket(struct device *dev,
 		return ret;
 	}
 
+	atomic_set(&socket->present, 0);
+
 	return 0;
 }
 
@@ -1399,10 +1384,6 @@ static void pcmcia_bus_remove_socket(struct device *dev,
 	if (!socket)
 		return;
 
-	mutex_lock(&socket->ops_mutex);
-	socket->pcmcia_state.dead = 1;
-	mutex_unlock(&socket->ops_mutex);
-
 	pccard_register_pcmcia(socket, NULL);
 
 	/* unregister any unbound devices */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index d57847f2f6c..aab3c13dc31 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -26,6 +26,7 @@
 #ifdef __KERNEL__
 #include <linux/device.h>
 #include <pcmcia/ss.h>
+#include <asm/atomic.h>
 
 /*
  * PCMCIA device drivers (16-bit cards only; 32-bit cards require CardBus
@@ -94,10 +95,8 @@ struct pcmcia_device {
 	config_req_t		conf;
 	window_handle_t		win;
 
-	/* Is the device suspended, or in the process of
-	 * being removed? */
+	/* Is the device suspended? */
 	u16			suspended:1;
-	u16			_removed:1;
 
 	/* Flags whether io, irq, win configurations were
 	 * requested, and whether the configuration is "locked" */
@@ -115,7 +114,7 @@ struct pcmcia_device {
 	u16			has_card_id:1;
 	u16			has_func_id:1;
 
-	u16			reserved:3;
+	u16			reserved:4;
 
 	u8			func_id;
 	u16			manf_id;
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 2e488b60bc7..344705cb42f 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -224,18 +224,16 @@ struct pcmcia_socket {
 
 	/* 16-bit state: */
 	struct {
-		/* PCMCIA card is present in socket */
-		u8			present:1;
 		/* "master" ioctl is used */
 		u8			busy:1;
-		/* pcmcia module is being unloaded */
-		u8			dead:1;
 		/* the PCMCIA card consists of two pseudo devices */
 		u8			has_pfc:1;
 
-		u8			reserved:4;
+		u8			reserved:6;
 	} pcmcia_state;
 
+	/* non-zero if PCMCIA card is present */
+	atomic_t			present;
 
 #ifdef CONFIG_PCMCIA_IOCTL
 	struct user_info_t		*user;
-- 
cgit v1.2.3-70-g09d2


From c3c532061e46156e8aab1268f38d66cfb63aeb2d Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 11:37:01 +0200
Subject: bdi: add helper function for doing init and register of a bdi for a
 file system

Pretty trivial helper, just sets up the bdi and registers it. An atomic
sequence count is used to ensure that the registered sysfs names are
unique.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/backing-dev.h |  1 +
 mm/backing-dev.c            | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fcbc26af00e..e19c677f219 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -101,6 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
+int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 				long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f13e067e146..dbda4707f59 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -11,6 +11,8 @@
 #include <linux/writeback.h>
 #include <linux/device.h>
 
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
 }
@@ -715,6 +717,33 @@ void bdi_destroy(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_destroy);
 
+/*
+ * For use from filesystems to quickly init and register a bdi associated
+ * with dirty writeback
+ */
+int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
+			   unsigned int cap)
+{
+	char tmp[32];
+	int err;
+
+	bdi->name = name;
+	bdi->capabilities = cap;
+	err = bdi_init(bdi);
+	if (err)
+		return err;
+
+	sprintf(tmp, "%.28s%s", name, "-%d");
+	err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
+	if (err) {
+		bdi_destroy(bdi);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(bdi_setup_and_register);
+
 static wait_queue_head_t congestion_wqh[2] = {
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
-- 
cgit v1.2.3-70-g09d2


From 5163d90076729413cb882d3dd5c3d3cfb5b9f035 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:12:40 +0200
Subject: coda: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/coda/inode.c            | 8 ++++++++
 include/linux/coda_psdev.h | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dcadd9..d97f9935a02 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 		return -EBUSY;
 	}
 
+	error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
+	if (error)
+		goto bdi_err;
+
 	vc->vc_sb = sb;
 
 	sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits = 12;
 	sb->s_magic = CODA_SUPER_MAGIC;
 	sb->s_op = &coda_super_operations;
+	sb->s_bdi = &vc->bdi;
 
 	/* get root fid from Venus: this needs the root inode */
 	error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
         return 0;
 
  error:
+	bdi_destroy(&vc->bdi);
+ bdi_err:
 	if (root)
 		iput(root);
 	if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 
 static void coda_put_super(struct super_block *sb)
 {
+	bdi_destroy(&coda_vcp(sb)->bdi);
 	coda_vcp(sb)->vc_sb = NULL;
 	sb->s_fs_info = NULL;
 
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 5b5d4731f95..644062e8d85 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -1,6 +1,7 @@
 #ifndef __CODA_PSDEV_H
 #define __CODA_PSDEV_H
 
+#include <linux/backing-dev.h>
 #include <linux/magic.h>
 
 #define CODA_PSDEV_MAJOR 67
@@ -17,6 +18,7 @@ struct venus_comm {
 	struct list_head    vc_processing;
 	int                 vc_inuse;
 	struct super_block *vc_sb;
+	struct backing_dev_info bdi;
 };
 
 
-- 
cgit v1.2.3-70-g09d2


From f1970c73cbb6b884152207e4dfe90639f5029905 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:31:11 +0200
Subject: ncpfs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/ncpfs/inode.c          | 8 ++++++++
 include/linux/ncp_fs_sb.h | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1be23..fa338515402 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = NCP_SUPER_MAGIC;
 	sb->s_op = &ncp_sops;
+	sb->s_bdi = &server->bdi;
 
 	server = NCP_SBP(sb);
 	memset(server, 0, sizeof(*server));
 
+	error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
+	if (error)
+		goto out_bdi;
+
 	server->ncp_filp = ncp_filp;
 	server->ncp_sock = sock;
 	
@@ -719,6 +724,8 @@ out_fput2:
 	if (server->info_filp)
 		fput(server->info_filp);
 out_fput:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
 	 * 
 	 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
 	kill_pid(server->m.wdog_pid, SIGTERM, 1);
 	put_pid(server->m.wdog_pid);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->priv.data);
 	kfree(server->auth.object_name);
 	vfree(server->rxbuf);
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index 6330fc76b00..5ec9ca67168 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -12,6 +12,7 @@
 #include <linux/ncp_mount.h>
 #include <linux/net.h>
 #include <linux/mutex.h>
+#include <linux/backing-dev.h>
 
 #ifdef __KERNEL__
 
@@ -127,6 +128,7 @@ struct ncp_server {
 		size_t len;
 		__u8 data[128];
 	} unexpected_packet;
+	struct backing_dev_info bdi;
 };
 
 extern void ncp_tcp_rcv_proc(struct work_struct *work);
-- 
cgit v1.2.3-70-g09d2


From 424264b7b220e8eee165dc3080ae48692af73dec Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:37:07 +0200
Subject: smbfs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/smbfs/inode.c          | 8 ++++++++
 include/linux/smb_fs_sb.h | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f08997..dfa1d67f8fc 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
 	if (server->conn_pid)
 		kill_pid(server->conn_pid, SIGTERM, 1);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->ops);
 	smb_unload_nls(server);
 	sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
 	if (!server)
 		goto out_no_server;
 	sb->s_fs_info = server;
+	
+	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
+		goto out_bdi;
+
+	sb->s_bdi = &server->bdi;
 
 	server->super_block = sb;
 	server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
 out_bad_option:
 	kfree(mem);
 out_no_mem:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	if (!server->mnt)
 		printk(KERN_ERR "smb_fill_super: allocation failure\n");
 	sb->s_fs_info = NULL;
diff --git a/include/linux/smb_fs_sb.h b/include/linux/smb_fs_sb.h
index 8a060a7040d..bb947dd1fba 100644
--- a/include/linux/smb_fs_sb.h
+++ b/include/linux/smb_fs_sb.h
@@ -10,6 +10,7 @@
 #define _SMB_FS_SB
 
 #include <linux/types.h>
+#include <linux/backing-dev.h>
 #include <linux/smb.h>
 
 /*
@@ -74,6 +75,8 @@ struct smb_sb_info {
 	struct smb_ops *ops;
 
 	struct super_block *super_block;
+
+	struct backing_dev_info bdi;
 };
 
 static inline int
-- 
cgit v1.2.3-70-g09d2


From 71d0a6112a363e703e383ae5b12c492485c39701 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 22 Apr 2010 15:35:57 -0400
Subject: NFS: Fix an unstable write data integrity race

Commit 2c61be0a9478258f77b66208a0c4b1f5f8161c3c (NFS: Ensure that the WRITE
and COMMIT RPC calls are always uninterruptible) exposed a race on file
close. In order to ensure correct close-to-open behaviour, we want to wait
for all outstanding background commit operations to complete.

This patch adds an inode flag that indicates if a commit operation is under
way, and provides a mechanism to allow ->write_inode() to wait for its
completion if this is a data integrity flush.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 36 ++++++++++++++++++++++++++++++++----
 include/linux/nfs_fs.h |  1 +
 2 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63aa92..ccde2aeb3fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+{
+	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
+		return 1;
+	if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
+				NFS_INO_COMMIT, nfs_wait_bit_killable,
+				TASK_KILLABLE))
+		return 1;
+	return 0;
+}
+
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+{
+	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+}
+
+
 static void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	if (how & FLUSH_SYNC)
-		rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
 	return 0;
 }
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 				BDI_RECLAIMABLE);
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(data->inode));
 	nfs_commitdata_release(calldata);
 }
 
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
-	int res;
+	int may_wait = how & FLUSH_SYNC;
+	int res = 0;
 
+	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+		goto out;
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-	}
+		if (may_wait)
+			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+					nfs_wait_bit_killable,
+					TASK_KILLABLE);
+	} else
+		nfs_commit_clear_lock(NFS_I(inode));
+out:
 	return res;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1a0b85aa151..07ce4609fe5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -209,6 +209,7 @@ struct nfs_inode {
 #define NFS_INO_FLUSHING	(4)		/* inode is flushing out data */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
+#define NFS_INO_COMMIT		(7)		/* inode is committing unstable writes */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
cgit v1.2.3-70-g09d2


From 3a3076f4d6e2fa31338a0b007df42a3b32f079e0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 23 Apr 2010 12:17:17 -0400
Subject: Cleanup generic block based fiemap

This cleans up a few of the complaints of __generic_block_fiemap.  I've
fixed all the typing stuff, used inline functions instead of macros,
gotten rid of a couple of variables, and made sure the size and block
requests are all block aligned.  It also fixes a problem where sometimes
FIEMAP_EXTENT_LAST wasn't being set properly.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ioctl.c         | 92 +++++++++++++++++++++++++++++++-----------------------
 include/linux/fs.h |  5 +--
 2 files changed, 56 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c751106c2e..7faefb4da93 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 
 #ifdef CONFIG_BLOCK
 
-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+	return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+	return (blk << inode->i_blkbits);
+}
 
 /**
  * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
  *
  * This does FIEMAP for block based inodes.  Basically it will just loop
  * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  */
 
 int __generic_block_fiemap(struct inode *inode,
-			   struct fiemap_extent_info *fieinfo, u64 start,
-			   u64 len, get_block_t *get_block)
+			   struct fiemap_extent_info *fieinfo, loff_t start,
+			   loff_t len, get_block_t *get_block)
 {
-	struct buffer_head tmp;
-	unsigned long long start_blk;
-	long long length = 0, map_len = 0;
+	struct buffer_head map_bh;
+	sector_t start_blk, last_blk;
+	loff_t isize = i_size_read(inode);
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = FIEMAP_EXTENT_MERGED;
-	int ret = 0, past_eof = 0, whole_file = 0;
+	bool past_eof = false, whole_file = false;
+	int ret = 0;
 
-	if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
 		return ret;
 
-	start_blk = logical_to_blk(inode, start);
-
-	length = (long long)min_t(u64, len, i_size_read(inode));
-	if (length < len)
-		whole_file = 1;
+	/*
+	 * Either the i_mutex or other appropriate locking needs to be held
+	 * since we expect isize to not change at all through the duration of
+	 * this call.
+	 */
+	if (len >= isize) {
+		whole_file = true;
+		len = isize;
+	}
 
-	map_len = length;
+	start_blk = logical_to_blk(inode, start);
+	last_blk = logical_to_blk(inode, start + len - 1);
 
 	do {
 		/*
 		 * we set b_size to the total size we want so it will map as
 		 * many contiguous blocks as possible at once
 		 */
-		memset(&tmp, 0, sizeof(struct buffer_head));
-		tmp.b_size = map_len;
+		memset(&map_bh, 0, sizeof(struct buffer_head));
+		map_bh.b_size = len;
 
-		ret = get_block(inode, start_blk, &tmp, 0);
+		ret = get_block(inode, start_blk, &map_bh, 0);
 		if (ret)
 			break;
 
 		/* HOLE */
-		if (!buffer_mapped(&tmp)) {
-			length -= blk_to_logical(inode, 1);
+		if (!buffer_mapped(&map_bh)) {
 			start_blk++;
 
 			/*
-			 * we want to handle the case where there is an
+			 * We want to handle the case where there is an
 			 * allocated block at the front of the file, and then
 			 * nothing but holes up to the end of the file properly,
 			 * to make sure that extent at the front gets properly
 			 * marked with FIEMAP_EXTENT_LAST
 			 */
 			if (!past_eof &&
-			    blk_to_logical(inode, start_blk) >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
+			    blk_to_logical(inode, start_blk) >= isize)
 				past_eof = 1;
 
 			/*
-			 * first hole after going past the EOF, this is our
+			 * First hole after going past the EOF, this is our
 			 * last extent
 			 */
 			if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
-				break;
+			} else if (size) {
+				ret = fiemap_fill_next_extent(fieinfo, logical,
+							      phys, size, flags);
+				size = 0;
 			}
 
 			/* if we have holes up to/past EOF then we're done */
-			if (length <= 0 || past_eof)
+			if (start_blk > last_blk || past_eof || ret)
 				break;
 		} else {
 			/*
-			 * we have gone over the length of what we wanted to
+			 * We have gone over the length of what we wanted to
 			 * map, and it wasn't the entire file, so add the extent
 			 * we got last time and exit.
 			 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
 			 * are good to go, just add the extent to the fieinfo
 			 * and break
 			 */
-			if (length <= 0 && !whole_file) {
+			if (start_blk > last_blk && !whole_file) {
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
 			}
 
 			logical = blk_to_logical(inode, start_blk);
-			phys = blk_to_logical(inode, tmp.b_blocknr);
-			size = tmp.b_size;
+			phys = blk_to_logical(inode, map_bh.b_blocknr);
+			size = map_bh.b_size;
 			flags = FIEMAP_EXTENT_MERGED;
 
-			length -= tmp.b_size;
 			start_blk += logical_to_blk(inode, size);
 
 			/*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
 			 * soon as we find a hole that the last extent we found
 			 * is marked with FIEMAP_EXTENT_LAST
 			 */
-			if (!past_eof &&
-			    logical+size >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
-				past_eof = 1;
+			if (!past_eof && logical + size >= isize)
+				past_eof = true;
 		}
 		cond_resched();
 	} while (1);
 
-	/* if ret is 1 then we just hit the end of the extent array */
+	/* If ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc6cc7..44f35aea2f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2315,8 +2315,9 @@ extern int vfs_fstatat(int , char __user *, struct kstat *, int);
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
 extern int __generic_block_fiemap(struct inode *inode,
-				  struct fiemap_extent_info *fieinfo, u64 start,
-				  u64 len, get_block_t *get_block);
+				  struct fiemap_extent_info *fieinfo,
+				  loff_t start, loff_t len,
+				  get_block_t *get_block);
 extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
-- 
cgit v1.2.3-70-g09d2


From 23be7468e8802a2ac1de6ee3eecb3ec7f14dc703 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Fri, 23 Apr 2010 13:17:56 -0400
Subject: hugetlb: fix infinite loop in get_futex_key() when backed by huge
 pages

If a futex key happens to be located within a huge page mapped
MAP_PRIVATE, get_futex_key() can go into an infinite loop waiting for a
page->mapping that will never exist.

See https://bugzilla.redhat.com/show_bug.cgi?id=552257 for more details
about the problem.

This patch makes page->mapping a poisoned value that includes
PAGE_MAPPING_ANON mapped MAP_PRIVATE.  This is enough for futex to
continue but because of PAGE_MAPPING_ANON, the poisoned value is not
dereferenced or used by futex.  No other part of the VM should be
dereferencing the page->mapping of a hugetlbfs page as its page cache is
not on the LRU.

This patch fixes the problem with the test case described in the bugzilla.

[akpm@linux-foundation.org: mel cant spel]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Darren Hart <darren@dvhart.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poison.h | 9 +++++++++
 mm/hugetlb.c           | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 2110a81c5e2..34066ffd893 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -48,6 +48,15 @@
 #define POISON_FREE	0x6b	/* for use-after-free poisoning */
 #define	POISON_END	0xa5	/* end-byte of poisoning */
 
+/********** mm/hugetlb.c **********/
+/*
+ * Private mappings of hugetlb pages use this poisoned value for
+ * page->mapping. The core VM should not be doing anything with this mapping
+ * but futex requires the existence of some page->mapping value even though it
+ * is unused if PAGE_MAPPING_ANON is set.
+ */
+#define HUGETLB_POISON	((void *)(0x00300300 + POISON_POINTER_DELTA + PAGE_MAPPING_ANON))
+
 /********** arch/$ARCH/mm/init.c **********/
 #define POISON_FREE_INITMEM	0xcc
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6034dc9e979..ffbdfc86aed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -546,6 +546,7 @@ static void free_huge_page(struct page *page)
 
 	mapping = (struct address_space *) page_private(page);
 	set_page_private(page, 0);
+	page->mapping = NULL;
 	BUG_ON(page_count(page));
 	INIT_LIST_HEAD(&page->lru);
 
@@ -2447,8 +2448,10 @@ retry:
 			spin_lock(&inode->i_lock);
 			inode->i_blocks += blocks_per_huge_page(h);
 			spin_unlock(&inode->i_lock);
-		} else
+		} else {
 			lock_page(page);
+			page->mapping = HUGETLB_POISON;
+		}
 	}
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 5129a469a91a91427334c40e29e64c6d0ab68caf Mon Sep 17 00:00:00 2001
From: Jörn Engel <joern@logfs.org>
Date: Sun, 25 Apr 2010 08:54:42 +0200
Subject: Catch filesystems lacking s_bdi

noop_backing_dev_info is used only as a flag to mark filesystems that
don't have any backing store, like tmpfs, procfs, spufs, etc.

Signed-off-by: Joern Engel <joern@logfs.org>

Changed the BUG_ON() to a WARN_ON(). Note that adding dirty inodes
to the noop_backing_dev_info is not legal and will not result in
them being flushed, but we already catch this condition in
__mark_inode_dirty() when checking for a registered bdi.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/super.c                  | 8 +++++---
 fs/sync.c                   | 3 ++-
 include/linux/backing-dev.h | 1 +
 mm/backing-dev.c            | 5 +++++
 4 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index f35ac602210..dc72491a19f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -693,6 +693,7 @@ int set_anon_super(struct super_block *s, void *data)
 		return -EMFILE;
 	}
 	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_bdi = &noop_backing_dev_info;
 	return 0;
 }
 
@@ -954,10 +955,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (error < 0)
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
+	WARN_ON(!mnt->mnt_sb->s_bdi);
 
- 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- 	if (error)
- 		goto out_sb;
+	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
+	if (error)
+		goto out_sb;
 
 	/*
 	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d75cf3..92b228176f7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
 #include "internal.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	 * This should be safe, as we require bdi backing to actually
 	 * write out data in the first place
 	 */
-	if (!sb->s_bdi)
+	if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
 		return 0;
 
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e19c677f219..bd0e3c6f323 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -247,6 +247,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #endif
 
 extern struct backing_dev_info default_backing_dev_info;
+extern struct backing_dev_info noop_backing_dev_info;
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page);
 
 int writeback_in_progress(struct backing_dev_info *bdi);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dbda4707f59..707d0dc6da0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -27,6 +27,11 @@ struct backing_dev_info default_backing_dev_info = {
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
+struct backing_dev_info noop_backing_dev_info = {
+	.name		= "noop",
+};
+EXPORT_SYMBOL_GPL(noop_backing_dev_info);
+
 static struct class *bdi_class;
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 33f60e9640b2f60dde6735293d4aa5ecc5b1d5d5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 28 Apr 2010 09:20:33 +0200
Subject: coda: move backing-dev.h kernel include inside __KERNEL__

Otherwise we must export backing-dev.h as well, which doesn't make
any sense.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/coda_psdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 644062e8d85..8859e2ede9f 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -1,13 +1,14 @@
 #ifndef __CODA_PSDEV_H
 #define __CODA_PSDEV_H
 
-#include <linux/backing-dev.h>
 #include <linux/magic.h>
 
 #define CODA_PSDEV_MAJOR 67
 #define MAX_CODADEVS  5	   /* how many do we allow */
 
 #ifdef __KERNEL__
+#include <linux/backing-dev.h>
+
 struct kstatfs;
 
 /* communication pending/processing queues */
-- 
cgit v1.2.3-70-g09d2


From 561b1733a465cf9677356b40c27653dd45f1ac56 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Wed, 28 Apr 2010 08:47:18 +0000
Subject: sctp: avoid irq lock inversion while call sk->sk_data_ready()

sk->sk_data_ready() of sctp socket can be called from both BH and non-BH
contexts, but the default sk->sk_data_ready(), sock_def_readable(), can
not be used in this case. Therefore, we have to make a new function
sctp_data_ready() to grab sk->sk_data_ready() with BH disabling.

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
2.6.33-rc6 #129
---------------------------------------------------------
sctp_darn/1517 just changed the state of lock:
 (clock-AF_INET){++.?..}, at: [<c06aab60>] sock_def_readable+0x20/0x80
but this lock took another, SOFTIRQ-unsafe lock in the past:
 (slock-AF_INET){+.-...}

and interrupts could create inverse lock ordering between them.

other info that might help us debug this:
1 lock held by sctp_darn/1517:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<cdfe363d>] sctp_sendmsg+0x23d/0xc00 [sctp]

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  1 +
 net/sctp/endpointola.c  |  1 +
 net/sctp/socket.c       | 10 ++++++++++
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 78740ec57d5..fa6cde578a1 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -128,6 +128,7 @@ extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
 int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
+void sctp_data_ready(struct sock *sk, int len);
 unsigned int sctp_poll(struct file *file, struct socket *sock,
 		poll_table *wait);
 void sctp_sock_rfree(struct sk_buff *skb);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 905fda582b9..7ec09ba03a1 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -144,6 +144,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	/* Use SCTP specific send buffer space queues.  */
 	ep->sndbuf_policy = sctp_sndbuf_policy;
 
+	sk->sk_data_ready = sctp_data_ready;
 	sk->sk_write_space = sctp_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 007e8baba08..efa2bc3f002 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6189,6 +6189,16 @@ do_nonblock:
 	goto out;
 }
 
+void sctp_data_ready(struct sock *sk, int len)
+{
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk_has_sleeper(sk))
+		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+						POLLRDNORM | POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
 /* If socket sndbuf has changed, wake up all per association waiters.  */
 void sctp_write_space(struct sock *sk)
 {
-- 
cgit v1.2.3-70-g09d2


From c0786693404cffd80ca3cb6e75ee7b35186b2825 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 28 Apr 2010 08:47:22 +0000
Subject: sctp: Fix oops when sending queued ASCONF chunks

When we finish processing ASCONF_ACK chunk, we try to send
the next queued ASCONF.  This action runs the sctp state
machine recursively and it's not prepared to do so.

kernel BUG at kernel/timer.c:790!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/module/ipv6/initstate
Modules linked in: sha256_generic sctp libcrc32c ipv6 dm_multipath
uinput 8139too i2c_piix4 8139cp mii i2c_core pcspkr virtio_net joydev
floppy virtio_blk virtio_pci [last unloaded: scsi_wait_scan]

Pid: 0, comm: swapper Not tainted 2.6.34-rc4 #15 /Bochs
EIP: 0060:[<c044a2ef>] EFLAGS: 00010286 CPU: 0
EIP is at add_timer+0xd/0x1b
EAX: cecbab14 EBX: 000000f0 ECX: c0957b1c EDX: 03595cf4
ESI: cecba800 EDI: cf276f00 EBP: c0957aa0 ESP: c0957aa0
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=c0956000 task=c0988ba0 task.ti=c0956000)
Stack:
 c0957ae0 d1851214 c0ab62e4 c0ab5f26 0500ffff 00000004 00000005 00000004
<0> 00000000 d18694fd 00000004 1666b892 cecba800 cecba800 c0957b14
00000004
<0> c0957b94 d1851b11 ceda8b00 cecba800 cf276f00 00000001 c0957b14
000000d0
Call Trace:
 [<d1851214>] ? sctp_side_effects+0x607/0xdfc [sctp]
 [<d1851b11>] ? sctp_do_sm+0x108/0x159 [sctp]
 [<d1863386>] ? sctp_pname+0x0/0x1d [sctp]
 [<d1861a56>] ? sctp_primitive_ASCONF+0x36/0x3b [sctp]
 [<d185657c>] ? sctp_process_asconf_ack+0x2a4/0x2d3 [sctp]
 [<d184e35c>] ? sctp_sf_do_asconf_ack+0x1dd/0x2b4 [sctp]
 [<d1851ac1>] ? sctp_do_sm+0xb8/0x159 [sctp]
 [<d1863334>] ? sctp_cname+0x0/0x52 [sctp]
 [<d1854377>] ? sctp_assoc_bh_rcv+0xac/0xe1 [sctp]
 [<d1858f0f>] ? sctp_inq_push+0x2d/0x30 [sctp]
 [<d186329d>] ? sctp_rcv+0x797/0x82e [sctp]

Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Yuansong Qiao <ysqiao@research.ait.ie>
Signed-off-by: Shuaijun Zhang <szhang@research.ait.ie>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h |  1 +
 net/sctp/sm_make_chunk.c   | 15 ---------------
 net/sctp/sm_sideeffect.c   | 26 ++++++++++++++++++++++++++
 net/sctp/sm_statefuns.c    |  8 +++++++-
 4 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 8be5135ff7a..2c55a7ea20a 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -107,6 +107,7 @@ typedef enum {
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
 	SCTP_CMD_SEND_MSG,	 /* Send the whole use message */
+	SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */
 	SCTP_CMD_LAST
 } sctp_verb_t;
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f6fc5c1a407..0fd5b4c8835 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3318,21 +3318,6 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	sctp_chunk_free(asconf);
 	asoc->addip_last_asconf = NULL;
 
-	/* Send the next asconf chunk from the addip chunk queue. */
-	if (!list_empty(&asoc->addip_chunk_list)) {
-		struct list_head *entry = asoc->addip_chunk_list.next;
-		asconf = list_entry(entry, struct sctp_chunk, list);
-
-		list_del_init(entry);
-
-		/* Hold the chunk until an ASCONF_ACK is received. */
-		sctp_chunk_hold(asconf);
-		if (sctp_primitive_ASCONF(asoc, asconf))
-			sctp_chunk_free(asconf);
-		else
-			asoc->addip_last_asconf = asconf;
-	}
-
 	return retval;
 }
 
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4c5bed9af4e..d5ae450b6f0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -962,6 +962,29 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc,
 }
 
 
+/* Sent the next ASCONF packet currently stored in the association.
+ * This happens after the ASCONF_ACK was succeffully processed.
+ */
+static void sctp_cmd_send_asconf(struct sctp_association *asoc)
+{
+	/* Send the next asconf chunk from the addip chunk
+	 * queue.
+	 */
+	if (!list_empty(&asoc->addip_chunk_list)) {
+		struct list_head *entry = asoc->addip_chunk_list.next;
+		struct sctp_chunk *asconf = list_entry(entry,
+						struct sctp_chunk, list);
+		list_del_init(entry);
+
+		/* Hold the chunk until an ASCONF_ACK is received. */
+		sctp_chunk_hold(asconf);
+		if (sctp_primitive_ASCONF(asoc, asconf))
+			sctp_chunk_free(asconf);
+		else
+			asoc->addip_last_asconf = asconf;
+	}
+}
+
 
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
@@ -1617,6 +1640,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			}
 			error = sctp_cmd_send_msg(asoc, cmd->obj.msg);
 			break;
+		case SCTP_CMD_SEND_NEXT_ASCONF:
+			sctp_cmd_send_asconf(asoc);
+			break;
 		default:
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index abf601a1b84..24b2cd55563 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3676,8 +3676,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
 		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
-					     asconf_ack))
+					     asconf_ack)) {
+			/* Successfully processed ASCONF_ACK.  We can
+			 * release the next asconf if we have one.
+			 */
+			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
+					SCTP_NULL());
 			return SCTP_DISPOSITION_CONSUME;
+		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(sctp_errhdr_t));
-- 
cgit v1.2.3-70-g09d2


From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 28 Apr 2010 10:30:59 +0000
Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter
 errors (CVE-2010-1173) (v4)

Ok, version 4

Change Notes:
1) Minor cleanups, from Vlads notes

Summary:

Hey-
	Recently, it was reported to me that the kernel could oops in the
following way:

<5> kernel BUG at net/core/skbuff.c:91!
<5> invalid operand: 0000 [#1]
<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter
ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U)
vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5
ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss
snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore
pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi
mptbase sd_mod scsi_mod
<5> CPU:    0
<5> EIP:    0060:[<c02bff27>]    Not tainted VLI
<5> EFLAGS: 00010216   (2.6.9-89.0.25.EL)
<5> EIP is at skb_over_panic+0x1f/0x2d
<5> eax: 0000002c   ebx: c033f461   ecx: c0357d96   edx: c040fd44
<5> esi: c033f461   edi: df653280   ebp: 00000000   esp: c040fd40
<5> ds: 007b   es: 007b   ss: 0068
<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0)
<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180
e0c2947d
<5>        00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004
df653490
<5>        00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e
00000004
<5> Call Trace:
<5>  [<e0c29478>] sctp_addto_chunk+0xb0/0x128 [sctp]
<5>  [<e0c2947d>] sctp_addto_chunk+0xb5/0x128 [sctp]
<5>  [<e0c2877a>] sctp_init_cause+0x3f/0x47 [sctp]
<5>  [<e0c29d2e>] sctp_process_unk_param+0xac/0xb8 [sctp]
<5>  [<e0c29e90>] sctp_verify_init+0xcc/0x134 [sctp]
<5>  [<e0c20322>] sctp_sf_do_5_1B_init+0x83/0x28e [sctp]
<5>  [<e0c25333>] sctp_do_sm+0x41/0x77 [sctp]
<5>  [<c01555a4>] cache_grow+0x140/0x233
<5>  [<e0c26ba1>] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp]
<5>  [<e0c2b863>] sctp_inq_push+0xe/0x10 [sctp]
<5>  [<e0c34600>] sctp_rcv+0x454/0x509 [sctp]
<5>  [<e084e017>] ipt_hook+0x17/0x1c [iptable_filter]
<5>  [<c02d005e>] nf_iterate+0x40/0x81
<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
<5>  [<c02e0c7f>] ip_local_deliver_finish+0xc6/0x151
<5>  [<c02d0362>] nf_hook_slow+0x83/0xb5
<5>  [<c02e0bb2>] ip_local_deliver+0x1a2/0x1a9
<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
<5>  [<c02e103e>] ip_rcv+0x334/0x3b4
<5>  [<c02c66fd>] netif_receive_skb+0x320/0x35b
<5>  [<e0a0928b>] init_stall_timer+0x67/0x6a [uhci_hcd]
<5>  [<c02c67a4>] process_backlog+0x6c/0xd9
<5>  [<c02c690f>] net_rx_action+0xfe/0x1f8
<5>  [<c012a7b1>] __do_softirq+0x35/0x79
<5>  [<c0107efb>] handle_IRQ_event+0x0/0x4f
<5>  [<c01094de>] do_softirq+0x46/0x4d

Its an skb_over_panic BUG halt that results from processing an init chunk in
which too many of its variable length parameters are in some way malformed.

The problem is in sctp_process_unk_param:
if (NULL == *errp)
	*errp = sctp_make_op_error_space(asoc, chunk,
					 ntohs(chunk->chunk_hdr->length));

	if (*errp) {
		sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
				 WORD_ROUND(ntohs(param.p->length)));
		sctp_addto_chunk(*errp,
			WORD_ROUND(ntohs(param.p->length)),
				  param.v);

When we allocate an error chunk, we assume that the worst case scenario requires
that we have chunk_hdr->length data allocated, which would be correct nominally,
given that we call sctp_addto_chunk for the violating parameter.  Unfortunately,
we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error
chunk, so the worst case situation in which all parameters are in violation
requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data.

The result of this error is that a deliberately malformed packet sent to a
listening host can cause a remote DOS, described in CVE-2010-1173:
http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173

I've tested the below fix and confirmed that it fixes the issue.  We move to a
strategy whereby we allocate a fixed size error chunk and ignore errors we don't
have space to report.  Tested by me successfully

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 net/sctp/sm_make_chunk.c   | 62 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 58 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ff301774471..597f8e27aaf 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -778,6 +778,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
 			  struct iovec *data);
 void sctp_chunk_free(struct sctp_chunk *);
 void  *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
+void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data);
 struct sctp_chunk *sctp_chunkify(struct sk_buff *,
 				 const struct sctp_association *,
 				 struct sock *);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 0fd5b4c8835..30c1767186b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -108,7 +108,7 @@ static const struct sctp_paramhdr prsctp_param = {
 	cpu_to_be16(sizeof(struct sctp_paramhdr)),
 };
 
-/* A helper to initialize to initialize an op error inside a
+/* A helper to initialize an op error inside a
  * provided chunk, as most cause codes will be embedded inside an
  * abort chunk.
  */
@@ -125,6 +125,29 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
 }
 
+/* A helper to initialize an op error inside a
+ * provided chunk, as most cause codes will be embedded inside an
+ * abort chunk.  Differs from sctp_init_cause in that it won't oops
+ * if there isn't enough space in the op error chunk
+ */
+int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
+		      size_t paylen)
+{
+	sctp_errhdr_t err;
+	__u16 len;
+
+	/* Cause code constants are now defined in network order.  */
+	err.cause = cause_code;
+	len = sizeof(sctp_errhdr_t) + paylen;
+	err.length  = htons(len);
+
+	if (skb_tailroom(chunk->skb) >  len)
+		return -ENOSPC;
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
+						     sizeof(sctp_errhdr_t),
+						     &err);
+	return 0;
+}
 /* 3.3.2 Initiation (INIT) (1)
  *
  * This chunk is used to initiate a SCTP association between two
@@ -1132,6 +1155,24 @@ nodata:
 	return retval;
 }
 
+/* Create an Operation Error chunk of a fixed size,
+ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
+ * This is a helper function to allocate an error chunk for
+ * for those invalid parameter codes in which we may not want
+ * to report all the errors, if the incomming chunk is large
+ */
+static inline struct sctp_chunk *sctp_make_op_error_fixed(
+	const struct sctp_association *asoc,
+	const struct sctp_chunk *chunk)
+{
+	size_t size = asoc ? asoc->pathmtu : 0;
+
+	if (!size)
+		size = SCTP_DEFAULT_MAXSEGMENT;
+
+	return sctp_make_op_error_space(asoc, chunk, size);
+}
+
 /* Create an Operation Error chunk.  */
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
 				 const struct sctp_chunk *chunk,
@@ -1374,6 +1415,18 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 	return target;
 }
 
+/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
+ * space in the chunk
+ */
+void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
+			     int len, const void *data)
+{
+	if (skb_tailroom(chunk->skb) > len)
+		return sctp_addto_chunk(chunk, len, data);
+	else
+		return NULL;
+}
+
 /* Append bytes from user space to the end of a chunk.  Will panic if
  * chunk is not big enough.
  * Returns a kernel err value.
@@ -1977,13 +2030,12 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
 		 * returning multiple unknown parameters.
 		 */
 		if (NULL == *errp)
-			*errp = sctp_make_op_error_space(asoc, chunk,
-					ntohs(chunk->chunk_hdr->length));
+			*errp = sctp_make_op_error_fixed(asoc, chunk);
 
 		if (*errp) {
-			sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+			sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
 					WORD_ROUND(ntohs(param.p->length)));
-			sctp_addto_chunk(*errp,
+			sctp_addto_chunk_fixed(*errp,
 					WORD_ROUND(ntohs(param.p->length)),
 					param.v);
 		} else {
-- 
cgit v1.2.3-70-g09d2


From 073900a28d95c75a706bf40ebf092ea048c7b236 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 12 Apr 2010 13:17:25 +0200
Subject: USB: rename usb_buffer_alloc() and usb_buffer_free()

For more clearance what the functions actually do,

  usb_buffer_alloc() is renamed to usb_alloc_coherent()
  usb_buffer_free()  is renamed to usb_free_coherent()

They should only be used in code which really needs DMA coherency.

[added compatibility macros so we can convert things easier - gregkh]

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Pedro Ribeiro <pedrib@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 20 ++++++++++----------
 include/linux/usb.h    | 18 +++++++++++++++---
 2 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 1297e9b16a5..0561430f2ed 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -718,7 +718,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
 EXPORT_SYMBOL_GPL(__usb_get_extra_descriptor);
 
 /**
- * usb_buffer_alloc - allocate dma-consistent buffer for URB_NO_xxx_DMA_MAP
+ * usb_alloc_coherent - allocate dma-consistent buffer for URB_NO_xxx_DMA_MAP
  * @dev: device the buffer will be used with
  * @size: requested buffer size
  * @mem_flags: affect whether allocation may block
@@ -737,30 +737,30 @@ EXPORT_SYMBOL_GPL(__usb_get_extra_descriptor);
  * architectures where CPU caches are not DMA-coherent.  On systems without
  * bus-snooping caches, these buffers are uncached.
  *
- * When the buffer is no longer used, free it with usb_buffer_free().
+ * When the buffer is no longer used, free it with usb_free_coherent().
  */
-void *usb_buffer_alloc(struct usb_device *dev, size_t size, gfp_t mem_flags,
-		       dma_addr_t *dma)
+void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags,
+			 dma_addr_t *dma)
 {
 	if (!dev || !dev->bus)
 		return NULL;
 	return hcd_buffer_alloc(dev->bus, size, mem_flags, dma);
 }
-EXPORT_SYMBOL_GPL(usb_buffer_alloc);
+EXPORT_SYMBOL_GPL(usb_alloc_coherent);
 
 /**
- * usb_buffer_free - free memory allocated with usb_buffer_alloc()
+ * usb_free_coherent - free memory allocated with usb_alloc_coherent()
  * @dev: device the buffer was used with
  * @size: requested buffer size
  * @addr: CPU address of buffer
  * @dma: DMA address of buffer
  *
  * This reclaims an I/O buffer, letting it be reused.  The memory must have
- * been allocated using usb_buffer_alloc(), and the parameters must match
+ * been allocated using usb_alloc_coherent(), and the parameters must match
  * those provided in that allocation request.
  */
-void usb_buffer_free(struct usb_device *dev, size_t size, void *addr,
-		     dma_addr_t dma)
+void usb_free_coherent(struct usb_device *dev, size_t size, void *addr,
+		       dma_addr_t dma)
 {
 	if (!dev || !dev->bus)
 		return;
@@ -768,7 +768,7 @@ void usb_buffer_free(struct usb_device *dev, size_t size, void *addr,
 		return;
 	hcd_buffer_free(dev->bus, size, addr, dma);
 }
-EXPORT_SYMBOL_GPL(usb_buffer_free);
+EXPORT_SYMBOL_GPL(usb_free_coherent);
 
 /**
  * usb_buffer_map - create DMA mapping(s) for an urb
diff --git a/include/linux/usb.h b/include/linux/usb.h
index ce1323c4e47..739f1fd1cc1 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1085,7 +1085,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * Alternatively, drivers may pass the URB_NO_xxx_DMA_MAP transfer flags,
  * which tell the host controller driver that no such mapping is needed since
  * the device driver is DMA-aware.  For example, a device driver might
- * allocate a DMA buffer with usb_buffer_alloc() or call usb_buffer_map().
+ * allocate a DMA buffer with usb_alloc_coherent() or call usb_buffer_map().
  * When these transfer flags are provided, host controller drivers will
  * attempt to use the dma addresses found in the transfer_dma and/or
  * setup_dma fields rather than determining a dma address themselves.
@@ -1366,11 +1366,23 @@ static inline int usb_urb_dir_out(struct urb *urb)
 	return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT;
 }
 
-void *usb_buffer_alloc(struct usb_device *dev, size_t size,
+void *usb_alloc_coherent(struct usb_device *dev, size_t size,
 	gfp_t mem_flags, dma_addr_t *dma);
-void usb_buffer_free(struct usb_device *dev, size_t size,
+void usb_free_coherent(struct usb_device *dev, size_t size,
 	void *addr, dma_addr_t dma);
 
+/* Compatible macros while we switch over */
+static inline void *usb_buffer_alloc(struct usb_device *dev, size_t size,
+				     gfp_t mem_flags, dma_addr_t *dma)
+{
+	return usb_alloc_coherent(dev, size, mem_flags, dma);
+}
+static inline void usb_buffer_free(struct usb_device *dev, size_t size,
+				   void *addr, dma_addr_t dma)
+{
+	return usb_free_coherent(dev, size, addr, dma);
+}
+
 #if 0
 struct urb *usb_buffer_map(struct urb *urb);
 void usb_buffer_dmasync(struct urb *urb);
-- 
cgit v1.2.3-70-g09d2


From 1183f3838c588545592c042c0ce15015661ce7f2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 2 May 2010 13:42:39 -0700
Subject: net: fix compile error due to double return type in SOCK_DEBUG

Fix this one:
include/net/sock.h: error: two or more data types in declaration specifiers

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b4603cd54fc..1ad6435f252 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -74,7 +74,7 @@
 					printk(KERN_DEBUG msg); } while (0)
 #else
 /* Validate arguments and do nothing */
-static void inline int __attribute__ ((format (printf, 2, 3)))
+static inline void __attribute__ ((format (printf, 2, 3)))
 SOCK_DEBUG(struct sock *sk, const char *msg, ...)
 {
 }
-- 
cgit v1.2.3-70-g09d2


From 6629dcff19470a894ce294d0adb9cbab94ee1fb9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 4 May 2010 11:09:28 +0200
Subject: i2c-core: Use per-adapter userspace device lists

Using a single list for all userspace devices leads to a dead lock
on multiplexed buses in some circumstances (mux chip instantiated
from userspace). This is solved by using a separate list for each
bus segment.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Michael Lawnick <ml.lawnick@gmx.de>
---
 drivers/i2c/i2c-core.c | 34 ++++++++++++++++++----------------
 include/linux/i2c.h    |  2 ++
 2 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 5105126225c..c2258a51fe0 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -40,12 +40,11 @@
 #include "i2c-core.h"
 
 
-/* core_lock protects i2c_adapter_idr, userspace_devices, and guarantees
+/* core_lock protects i2c_adapter_idr, and guarantees
    that device detection, deletion of detected devices, and attach_adapter
    and detach_adapter calls are serialized */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
-static LIST_HEAD(userspace_devices);
 
 static struct device_type i2c_client_type;
 static int i2c_check_addr(struct i2c_adapter *adapter, int addr);
@@ -542,9 +541,9 @@ i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
 		return -EEXIST;
 
 	/* Keep track of the added device */
-	mutex_lock(&core_lock);
-	list_add_tail(&client->detected, &userspace_devices);
-	mutex_unlock(&core_lock);
+	i2c_lock_adapter(adap);
+	list_add_tail(&client->detected, &adap->userspace_clients);
+	i2c_unlock_adapter(adap);
 	dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
 		 info.type, info.addr);
 
@@ -583,9 +582,10 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 
 	/* Make sure the device was added through sysfs */
 	res = -ENOENT;
-	mutex_lock(&core_lock);
-	list_for_each_entry_safe(client, next, &userspace_devices, detected) {
-		if (client->addr == addr && client->adapter == adap) {
+	i2c_lock_adapter(adap);
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		if (client->addr == addr) {
 			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
 				 "delete_device", client->name, client->addr);
 
@@ -595,7 +595,7 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 			break;
 		}
 	}
-	mutex_unlock(&core_lock);
+	i2c_unlock_adapter(adap);
 
 	if (res < 0)
 		dev_err(dev, "%s: Can't find device in list\n",
@@ -677,6 +677,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 	}
 
 	rt_mutex_init(&adap->bus_lock);
+	INIT_LIST_HEAD(&adap->userspace_clients);
 
 	/* Set default timeout to 1 second if not already set */
 	if (adap->timeout == 0)
@@ -879,14 +880,15 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 		return res;
 
 	/* Remove devices instantiated from sysfs */
-	list_for_each_entry_safe(client, next, &userspace_devices, detected) {
-		if (client->adapter == adap) {
-			dev_dbg(&adap->dev, "Removing %s at 0x%x\n",
-				client->name, client->addr);
-			list_del(&client->detected);
-			i2c_unregister_device(client);
-		}
+	i2c_lock_adapter(adap);
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
+			client->addr);
+		list_del(&client->detected);
+		i2c_unregister_device(client);
 	}
+	i2c_unlock_adapter(adap);
 
 	/* Detach any active clients. This can't fail, thus we do not
 	   checking the returned value. */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 0a5da639b32..6ed1d59bfb1 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -355,6 +355,8 @@ struct i2c_adapter {
 	int nr;
 	char name[48];
 	struct completion dev_released;
+
+	struct list_head userspace_clients;
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
 
-- 
cgit v1.2.3-70-g09d2


From 1ce7e4ff24fe338438bc7837e02780f202bf202b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 23 Apr 2010 10:35:52 +0800
Subject: cgroup: Check task_lock in task_subsys_state()

Expand task_subsys_state()'s rcu_dereference_check() to include the full
locking rule as documented in Documentation/cgroups/cgroups.txt by adding
a check for task->alloc_lock being held.

This fixes an RCU false positive when resuming from suspend. The warning
comes from freezer cgroup in cgroup_freezing_or_frozen().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8ad1ea9958..8f78073d7ca 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -530,6 +530,7 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
+				     lockdep_is_held(&task->alloc_lock) ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3-70-g09d2


From 50b5d6ad63821cea324a5a7a19854d4de1a0a819 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 6 May 2010 00:56:07 -0700
Subject: sctp: Fix a race between ICMP protocol unreachable and connect()

ICMP protocol unreachable handling completely disregarded
the fact that the user may have locked the socket.  It proceeded
to destroy the association, even though the user may have
held the lock and had a ref on the association.  This resulted
in the following:

Attempt to release alive inet socket f6afcc00

=========================
[ BUG: held lock freed! ]
-------------------------
somenu/2672 is freeing memory f6afcc00-f6afcfff, with a lock still held
there!
 (sk_lock-AF_INET){+.+.+.}, at: [<c122098a>] sctp_connect+0x13/0x4c
1 lock held by somenu/2672:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<c122098a>] sctp_connect+0x13/0x4c

stack backtrace:
Pid: 2672, comm: somenu Not tainted 2.6.32-telco #55
Call Trace:
 [<c1232266>] ? printk+0xf/0x11
 [<c1038553>] debug_check_no_locks_freed+0xce/0xff
 [<c10620b4>] kmem_cache_free+0x21/0x66
 [<c1185f25>] __sk_free+0x9d/0xab
 [<c1185f9c>] sk_free+0x1c/0x1e
 [<c1216e38>] sctp_association_put+0x32/0x89
 [<c1220865>] __sctp_connect+0x36d/0x3f4
 [<c122098a>] ? sctp_connect+0x13/0x4c
 [<c102d073>] ? autoremove_wake_function+0x0/0x33
 [<c12209a8>] sctp_connect+0x31/0x4c
 [<c11d1e80>] inet_dgram_connect+0x4b/0x55
 [<c11834fa>] sys_connect+0x54/0x71
 [<c103a3a2>] ? lock_release_non_nested+0x88/0x239
 [<c1054026>] ? might_fault+0x42/0x7c
 [<c1054026>] ? might_fault+0x42/0x7c
 [<c11847ab>] sys_socketcall+0x6d/0x178
 [<c10da994>] ? trace_hardirqs_on_thunk+0xc/0x10
 [<c1002959>] syscall_call+0x7/0xb

This was because the sctp_wait_for_connect() would aqcure the socket
lock and then proceed to release the last reference count on the
association, thus cause the fully destruction path to finish freeing
the socket.

The simplest solution is to start a very short timer in case the socket
is owned by user.  When the timer expires, we can do some verification
and be able to do the release properly.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h      |  1 +
 include/net/sctp/structs.h |  3 +++
 net/sctp/input.c           | 22 ++++++++++++++++++----
 net/sctp/sm_sideeffect.c   | 35 +++++++++++++++++++++++++++++++++++
 net/sctp/transport.c       |  2 ++
 5 files changed, 59 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 851c813adb3..61d73e37d54 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -279,6 +279,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
 /* 2nd level prototypes */
 void sctp_generate_t3_rtx_event(unsigned long peer);
 void sctp_generate_heartbeat_event(unsigned long peer);
+void sctp_generate_proto_unreach_event(unsigned long peer);
 
 void sctp_ootb_pkt_free(struct sctp_packet *);
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 597f8e27aaf..219043a67bf 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1010,6 +1010,9 @@ struct sctp_transport {
 	/* Heartbeat timer is per destination. */
 	struct timer_list hb_timer;
 
+	/* Timer to handle ICMP proto unreachable envets */
+	struct timer_list proto_unreach_timer;
+
 	/* Since we're using per-destination retransmission timers
 	 * (see above), we're also using per-destination "transmitted"
 	 * queues.  This probably ought to be a private struct
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a570184e5a..ea2192444ce 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -440,11 +440,25 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 {
 	SCTP_DEBUG_PRINTK("%s\n",  __func__);
 
-	sctp_do_sm(SCTP_EVENT_T_OTHER,
-		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
-		   asoc->state, asoc->ep, asoc, t,
-		   GFP_ATOMIC);
+	if (sock_owned_by_user(sk)) {
+		if (timer_pending(&t->proto_unreach_timer))
+			return;
+		else {
+			if (!mod_timer(&t->proto_unreach_timer,
+						jiffies + (HZ/20)))
+				sctp_association_hold(asoc);
+		}
+			
+	} else {
+		if (timer_pending(&t->proto_unreach_timer) &&
+		    del_timer(&t->proto_unreach_timer))
+			sctp_association_put(asoc);
 
+		sctp_do_sm(SCTP_EVENT_T_OTHER,
+			   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+			   asoc->state, asoc->ep, asoc, t,
+			   GFP_ATOMIC);
+	}
 }
 
 /* Common lookup code for icmp/icmpv6 error handler. */
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d5ae450b6f0..eb1f42f45fd 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -397,6 +397,41 @@ out_unlock:
 	sctp_transport_put(transport);
 }
 
+/* Handle the timeout of the ICMP protocol unreachable timer.  Trigger
+ * the correct state machine transition that will close the association.
+ */
+void sctp_generate_proto_unreach_event(unsigned long data)
+{
+	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_association *asoc = transport->asoc;
+	
+	sctp_bh_lock_sock(asoc->base.sk);
+	if (sock_owned_by_user(asoc->base.sk)) {
+		SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__);
+
+		/* Try again later.  */
+		if (!mod_timer(&transport->proto_unreach_timer,
+				jiffies + (HZ/20)))
+			sctp_association_hold(asoc);
+		goto out_unlock;
+	}
+
+	/* Is this structure just waiting around for us to actually
+	 * get destroyed?
+	 */
+	if (asoc->base.dead)
+		goto out_unlock;
+
+	sctp_do_sm(SCTP_EVENT_T_OTHER,
+		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
+		   asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
+
+out_unlock:
+	sctp_bh_unlock_sock(asoc->base.sk);
+	sctp_association_put(asoc);
+}
+
+
 /* Inject a SACK Timeout event into the state machine.  */
 static void sctp_generate_sack_event(unsigned long data)
 {
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5c..4a368038d46 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -108,6 +108,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
 			(unsigned long)peer);
+	setup_timer(&peer->proto_unreach_timer,
+		    sctp_generate_proto_unreach_event, (unsigned long)peer);
 
 	/* Initialize the 64-bit random nonce sent with heartbeat. */
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
-- 
cgit v1.2.3-70-g09d2


From ee84b8243b07c33a5c8aed42b4b2da60cb16d1d2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 6 May 2010 09:28:41 -0700
Subject: rcu: create rcu_my_thread_group_empty() wrapper

Some RCU-lockdep splat repairs need to know whether they are running
in a single-threaded process.  Unfortunately, the thread_group_empty()
primitive is defined in sched.h, and can induce #include hell.  This
commit therefore introduces a rcu_my_thread_group_empty() wrapper that
is defined in rcupdate.c, thus avoiding the need to include sched.h
everywhere.

Signed-off-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcupdate.c        | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 07db2feb857..db266bbed23 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 #ifdef CONFIG_PROVE_RCU
 
+extern int rcu_my_thread_group_empty(void);
+
 /**
  * rcu_dereference_check - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 03a7ea1579f..49d808e833b 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,3 +122,14 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	rcu = container_of(head, struct rcu_synchronize, head);
 	complete(&rcu->completion);
 }
+
+#ifdef CONFIG_PROVE_RCU
+/*
+ * wrapper function to avoid #include problems.
+ */
+int rcu_my_thread_group_empty(void)
+{
+	return thread_group_empty(current);
+}
+EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
+#endif /* #ifdef CONFIG_PROVE_RCU */
-- 
cgit v1.2.3-70-g09d2


From 03b1930efd3c2320b1dcba76c8af15f7e454919d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 24 Mar 2010 19:09:55 -0300
Subject: V4L/DVB: saa7146: fix regression of the av7110/budget-av driver

An earlier regression fix for the mxb driver (V4L/DVB: saa7146_vv: fix
regression where v4l2_device was registered too late) caused a new
regression in the av7110 driver.

Reverted the old fix and fixed the problem in the mxb driver instead.
Tested on mxb and budget-av cards.

The real problem is that the saa7146 framework has separate probe()
and attach() driver callbacks which should be rolled into one. This
is now done for the mxb driver, but others should do the same. Lack
of hardware makes this hard to do, though. I hope to get hold of some
hexium cards and then I can try to improve the framework to prevent
this from happening again.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_fops.c | 11 +++++------
 drivers/media/video/hexium_gemini.c |  3 ---
 drivers/media/video/hexium_orion.c  |  4 ----
 drivers/media/video/mxb.c           | 17 ++++++++---------
 include/media/saa7146_vv.h          |  1 -
 5 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index fd8e1f45be3..7364b9642d0 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -423,15 +423,14 @@ static void vv_callback(struct saa7146_dev *dev, unsigned long status)
 	}
 }
 
-int saa7146_vv_devinit(struct saa7146_dev *dev)
-{
-	return v4l2_device_register(&dev->pci->dev, &dev->v4l2_dev);
-}
-EXPORT_SYMBOL_GPL(saa7146_vv_devinit);
-
 int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
 {
 	struct saa7146_vv *vv;
+	int err;
+
+	err = v4l2_device_register(&dev->pci->dev, &dev->v4l2_dev);
+	if (err)
+		return err;
 
 	vv = kzalloc(sizeof(struct saa7146_vv), GFP_KERNEL);
 	if (vv == NULL) {
diff --git a/drivers/media/video/hexium_gemini.c b/drivers/media/video/hexium_gemini.c
index e620a3a92f2..ad2c232baa6 100644
--- a/drivers/media/video/hexium_gemini.c
+++ b/drivers/media/video/hexium_gemini.c
@@ -356,9 +356,6 @@ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_d
 
 	DEB_EE((".\n"));
 
-	ret = saa7146_vv_devinit(dev);
-	if (ret)
-		return ret;
 	hexium = kzalloc(sizeof(struct hexium), GFP_KERNEL);
 	if (NULL == hexium) {
 		printk("hexium_gemini: not enough kernel memory in hexium_attach().\n");
diff --git a/drivers/media/video/hexium_orion.c b/drivers/media/video/hexium_orion.c
index fe596a1c12a..938a1f8f880 100644
--- a/drivers/media/video/hexium_orion.c
+++ b/drivers/media/video/hexium_orion.c
@@ -216,10 +216,6 @@ static int hexium_probe(struct saa7146_dev *dev)
 		return -EFAULT;
 	}
 
-	err = saa7146_vv_devinit(dev);
-	if (err)
-		return err;
-
 	hexium = kzalloc(sizeof(struct hexium), GFP_KERNEL);
 	if (NULL == hexium) {
 		printk("hexium_orion: hexium_probe: not enough kernel memory.\n");
diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c
index 9f01f14e4aa..ef0c8178f25 100644
--- a/drivers/media/video/mxb.c
+++ b/drivers/media/video/mxb.c
@@ -169,11 +169,7 @@ static struct saa7146_extension extension;
 static int mxb_probe(struct saa7146_dev *dev)
 {
 	struct mxb *mxb = NULL;
-	int err;
 
-	err = saa7146_vv_devinit(dev);
-	if (err)
-		return err;
 	mxb = kzalloc(sizeof(struct mxb), GFP_KERNEL);
 	if (mxb == NULL) {
 		DEB_D(("not enough kernel memory.\n"));
@@ -699,14 +695,17 @@ static struct saa7146_ext_vv vv_data;
 /* this function only gets called when the probing was successful */
 static int mxb_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info)
 {
-	struct mxb *mxb = (struct mxb *)dev->ext_priv;
+	struct mxb *mxb;
 
 	DEB_EE(("dev:%p\n", dev));
 
-	/* checking for i2c-devices can be omitted here, because we
-	   already did this in "mxb_vl42_probe" */
-
 	saa7146_vv_init(dev, &vv_data);
+	if (mxb_probe(dev)) {
+		saa7146_vv_release(dev);
+		return -1;
+	}
+	mxb = (struct mxb *)dev->ext_priv;
+
 	vv_data.ops.vidioc_queryctrl = vidioc_queryctrl;
 	vv_data.ops.vidioc_g_ctrl = vidioc_g_ctrl;
 	vv_data.ops.vidioc_s_ctrl = vidioc_s_ctrl;
@@ -726,6 +725,7 @@ static int mxb_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data
 	vv_data.ops.vidioc_default = vidioc_default;
 	if (saa7146_register_device(&mxb->video_dev, dev, "mxb", VFL_TYPE_GRABBER)) {
 		ERR(("cannot register capture v4l2 device. skipping.\n"));
+		saa7146_vv_release(dev);
 		return -1;
 	}
 
@@ -846,7 +846,6 @@ static struct saa7146_extension extension = {
 	.pci_tbl	= &pci_tbl[0],
 	.module		= THIS_MODULE,
 
-	.probe		= mxb_probe,
 	.attach		= mxb_attach,
 	.detach		= mxb_detach,
 
diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h
index b9da1f5591e..4aeff96ff7d 100644
--- a/include/media/saa7146_vv.h
+++ b/include/media/saa7146_vv.h
@@ -188,7 +188,6 @@ void saa7146_buffer_timeout(unsigned long data);
 void saa7146_dma_free(struct saa7146_dev* dev,struct videobuf_queue *q,
 						struct saa7146_buf *buf);
 
-int saa7146_vv_devinit(struct saa7146_dev *dev);
 int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv);
 int saa7146_vv_release(struct saa7146_dev* dev);
 
-- 
cgit v1.2.3-70-g09d2


From 8cfe92d683a0041ac8e016a0b0a487c99a78f6c1 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 28 Apr 2010 11:33:25 +0200
Subject: drm/ttm: Remove the ttm_bo_block_reservation() function.

It's unused and buggy in its current form, since it can place a bo
in the reserved state without removing it from lru lists.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 30 +-----------------------------
 include/drm/ttm/ttm_bo_driver.h | 28 ----------------------------
 2 files changed, 1 insertion(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index dd47b2a9a79..0e3754a3a30 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1716,40 +1716,12 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
-void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo)
-{
-	atomic_set(&bo->reserved, 0);
-	wake_up_all(&bo->event_queue);
-}
-
-int ttm_bo_block_reservation(struct ttm_buffer_object *bo, bool interruptible,
-			     bool no_wait)
-{
-	int ret;
-
-	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
-		if (no_wait)
-			return -EBUSY;
-		else if (interruptible) {
-			ret = wait_event_interruptible
-			    (bo->event_queue, atomic_read(&bo->reserved) == 0);
-			if (unlikely(ret != 0))
-				return ret;
-		} else {
-			wait_event(bo->event_queue,
-				   atomic_read(&bo->reserved) == 0);
-		}
-	}
-	return 0;
-}
-
 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 {
 	int ret = 0;
 
 	/*
-	 * Using ttm_bo_reserve instead of ttm_bo_block_reservation
-	 * makes sure the lru lists are updated.
+	 * Using ttm_bo_reserve makes sure the lru lists are updated.
 	 */
 
 	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e929c27ede2..6b9db917e71 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -789,34 +789,6 @@ extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
 extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
 				  bool interruptible);
 
-/**
- * ttm_bo_block_reservation
- *
- * @bo: A pointer to a struct ttm_buffer_object.
- * @interruptible: Use interruptible sleep when waiting.
- * @no_wait: Don't sleep, but rather return -EBUSY.
- *
- * Block reservation for validation by simply reserving the buffer.
- * This is intended for single buffer use only without eviction,
- * and thus needs no deadlock protection.
- *
- * Returns:
- * -EBUSY: If no_wait == 1 and the buffer is already reserved.
- * -ERESTARTSYS: If interruptible == 1 and the process received a signal
- * while sleeping.
- */
-extern int ttm_bo_block_reservation(struct ttm_buffer_object *bo,
-				    bool interruptible, bool no_wait);
-
-/**
- * ttm_bo_unblock_reservation
- *
- * @bo: A pointer to a struct ttm_buffer_object.
- *
- * Unblocks reservation leaving lru lists untouched.
- */
-extern void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo);
-
 /*
  * ttm_bo_util.c
  */
-- 
cgit v1.2.3-70-g09d2


From f33d7e2d2d113a63772bbc993cdec3b5327f0ef1 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 11 May 2010 14:06:43 -0700
Subject: dma-mapping: fix dma_sync_single_range_*

dma_sync_single_range_for_cpu() and dma_sync_single_range_for_device() use
a wrong address with a partial synchronization.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/dma-mapping-common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index e694263445f..69206957b72 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -131,7 +131,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 		debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
 
 	} else
-		dma_sync_single_for_cpu(dev, addr, size, dir);
+		dma_sync_single_for_cpu(dev, addr + offset, size, dir);
 }
 
 static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -148,7 +148,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 		debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
 
 	} else
-		dma_sync_single_for_device(dev, addr, size, dir);
+		dma_sync_single_for_device(dev, addr + offset, size, dir);
 }
 
 static inline void
-- 
cgit v1.2.3-70-g09d2


From 34441427aab4bdb3069a4ffcda69a99357abcb2e Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Tue, 11 May 2010 14:06:46 -0700
Subject: revert "procfs: provide stack information for threads" and its fixup
 commits

Originally, commit d899bf7b ("procfs: provide stack information for
threads") attempted to introduce a new feature for showing where the
threadstack was located and how many pages are being utilized by the
stack.

Commit c44972f1 ("procfs: disable per-task stack usage on NOMMU") was
applied to fix the NO_MMU case.

Commit 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on
64-bit") was applied to fix a bug in ia32 executables being loaded.

Commit 9ebd4eba7 ("procfs: fix /proc/<pid>/stat stack pointer for kernel
threads") was applied to fix a bug which had kernel threads printing a
userland stack address.

Commit 1306d603f ('proc: partially revert "procfs: provide stack
information for threads"') was then applied to revert the stack pages
being used to solve a significant performance regression.

This patch nearly undoes the effect of all these patches.

The reason for reverting these is it provides an unusable value in
field 28.  For x86_64, a fork will result in the task->stack_start
value being updated to the current user top of stack and not the stack
start address.  This unpredictability of the stack_start value makes
it worthless.  That includes the intended use of showing how much stack
space a thread has.

Other architectures will get different values.  As an example, ia64
gets 0.  The do_fork() and copy_process() functions appear to treat the
stack_start and stack_size parameters as architecture specific.

I only partially reverted c44972f1 ("procfs: disable per-task stack usage
on NOMMU") .  If I had completely reverted it, I would have had to change
mm/Makefile only build pagewalk.o when CONFIG_PROC_PAGE_MONITOR is
configured.  Since I could not test the builds without significant effort,
I decided to not change mm/Makefile.

I only partially reverted 89240ba0 ("x86, fs: Fix x86 procfs stack
information for threads on 64-bit") .  I left the KSTK_ESP() change in
place as that seemed worthwhile.

Signed-off-by: Robin Holt <holt@sgi.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  3 +--
 fs/compat.c                        |  2 --
 fs/exec.c                          |  2 --
 fs/proc/array.c                    |  3 +--
 fs/proc/task_mmu.c                 | 19 -------------------
 include/linux/sched.h              |  1 -
 kernel/fork.c                      |  2 --
 7 files changed, 2 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a4f30faa4f1..1e359b62c40 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -316,7 +316,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -352,7 +352,6 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
- [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc47..05448730f84 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
-	current->stack_start = current->mm->start_stack;
-
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b..e6e94c626c2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
-	current->stack_start = current->mm->start_stack;
-
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e..885ab5513ac 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
-#include <linux/swapops.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
-		(permitted && mm) ? task->stack_start : 0,
+		(permitted && mm) ? mm->start_stack : 0,
 		esp,
 		eip,
 		/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd..47f5b145f56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 				} else if (vma->vm_start <= mm->start_stack &&
 					   vma->vm_end >= mm->start_stack) {
 					name = "[stack]";
-				} else {
-					unsigned long stack_start;
-					struct proc_maps_private *pmp;
-
-					pmp = m->private;
-					stack_start = pmp->task->stack_start;
-
-					if (vma->vm_start <= stack_start &&
-					    vma->vm_end >= stack_start) {
-						pad_len_spaces(m, len);
-						seq_printf(m,
-						 "[threadstack:%08lx]",
-#ifdef CONFIG_STACK_GROWSUP
-						 vma->vm_end - stack_start
-#else
-						 stack_start - vma->vm_start
-#endif
-						);
-					}
 				}
 			} else {
 				name = "[vdso]";
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf..2b7b81df78b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1497,7 +1497,6 @@ struct task_struct {
 	/* bitmask of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
-	unsigned long stack_start;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
 	struct memcg_batch_info {
 		int do_batch;	/* incremented when batch uncharge started */
diff --git a/kernel/fork.c b/kernel/fork.c
index 44b0791b0a2..4c14942a0ee 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1114,8 +1114,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->bts = NULL;
 
-	p->stack_start = stack_start;
-
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-- 
cgit v1.2.3-70-g09d2


From d83c49f3e36cecd2e8823b6c48ffba083b8a5704 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 30 Apr 2010 17:17:09 -0400
Subject: Fix the regression created by "set S_DEAD on unlink()..." commit

1) i_flags simply doesn't work for mount/unlink race prevention;
we may have many links to file and rm on one of those obviously
shouldn't prevent bind on top of another later on.  To fix it
right way we need to mark _dentry_ as unsuitable for mounting
upon; new flag (DCACHE_CANT_MOUNT) is protected by d_flags and
i_mutex on the inode in question.  Set it (with dont_mount(dentry))
in unlink/rmdir/etc., check (with cant_mount(dentry)) in places
in namespace.c that used to check for S_DEAD.  Setting S_DEAD
is still needed in places where we used to set it (for directories
getting killed), since we rely on it for readdir/rmdir race
prevention.

2) rename()/mount() protection has another bogosity - we unhash
the target before we'd checked that it's not a mountpoint.  Fixed.

3) ancient bogosity in pivot_root() - we locked i_mutex on the
right directory, but checked S_DEAD on the different (and wrong)
one.  Noticed and fixed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/core/inode.c |  1 +
 fs/configfs/dir.c        |  4 ++++
 fs/namei.c               | 21 +++++++++++++--------
 fs/namespace.c           |  6 +++---
 include/linux/dcache.h   | 14 ++++++++++++++
 5 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 4a6366a4212..111a01a747f 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -380,6 +380,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry)
 	mutex_lock(&inode->i_mutex);
 	dentry_unhash(dentry);
 	if (usbfs_empty(dentry)) {
+		dont_mount(dentry);
 		drop_nlink(dentry->d_inode);
 		drop_nlink(dentry->d_inode);
 		dput(dentry);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205a..0b502f80c69 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
 
 		configfs_detach_group(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
+		dont_mount(child);
 
 		mutex_unlock(&child->d_inode->i_mutex);
 
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
 			mutex_lock(&dentry->d_inode->i_mutex);
 			configfs_remove_dir(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			d_delete(dentry);
 		}
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
 		if (ret) {
 			configfs_detach_item(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 		}
 		configfs_adjust_dir_dirent_depth_after_populate(sd);
 		mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 	mutex_unlock(&configfs_symlink_mutex);
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
+	dont_mount(dentry);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	d_delete(dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 16df7277a92..b86b96fe1dc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2176,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 		error = security_inode_rmdir(dir, dentry);
 		if (!error) {
 			error = dir->i_op->rmdir(dir, dentry);
-			if (!error)
+			if (!error) {
 				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
+			}
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2261,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 		if (!error) {
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
-				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2572,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 		return error;
 
 	target = new_dentry->d_inode;
-	if (target) {
+	if (target)
 		mutex_lock(&target->i_mutex);
-		dentry_unhash(new_dentry);
-	}
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		error = -EBUSY;
-	else 
+	else {
+		if (target)
+			dentry_unhash(new_dentry);
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	}
 	if (target) {
-		if (!error)
+		if (!error) {
 			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
+		}
 		mutex_unlock(&target->i_mutex);
 		if (d_unhashed(new_dentry))
 			d_rehash(new_dentry);
@@ -2614,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (!error) {
 		if (target)
-			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 			d_move(old_dentry, new_dentry);
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c7..f20cb57d106 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out_unlock;
 
 	err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out1;
 
 	if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	if (!check_mnt(root.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new.dentry->d_inode))
+	if (cant_mount(old.dentry))
 		goto out2;
 	if (d_unlinked(new.dentry))
 		goto out2;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 30b93b2a01a..eebb617c17d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -186,6 +186,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
 
+#define DCACHE_CANT_MOUNT	0x0100
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
@@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry)
 	return d_unhashed(dentry) && !IS_ROOT(dentry);
 }
 
+static inline int cant_mount(struct dentry *dentry)
+{
+	return (dentry->d_flags & DCACHE_CANT_MOUNT);
+}
+
+static inline void dont_mount(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_CANT_MOUNT;
+	spin_unlock(&dentry->d_lock);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
-- 
cgit v1.2.3-70-g09d2


From 35790c0421121364883a167bab8a2e37e1f67f78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 16 May 2010 00:34:04 -0700
Subject: tcp: fix MD5 (RFC2385) support

TCP MD5 support uses percpu data for temporary storage. It currently
disables preemption so that same storage cannot be reclaimed by another
thread on same cpu.

We also have to make sure a softirq handler wont try to use also same
context. Various bug reports demonstrated corruptions.

Fix is to disable preemption and BH.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 21 +++------------------
 net/ipv4/tcp.c    | 34 ++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 75be5a28815..aa04b9a5093 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1197,30 +1197,15 @@ extern int			tcp_v4_md5_do_del(struct sock *sk,
 extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
 extern void			tcp_free_md5sig_pool(void);
 
-extern struct tcp_md5sig_pool	*__tcp_get_md5sig_pool(int cpu);
-extern void			__tcp_put_md5sig_pool(void);
+extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
+extern void			tcp_put_md5sig_pool(void);
+
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
 extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
 				 unsigned header_len);
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
 			    struct tcp_md5sig_key *key);
 
-static inline
-struct tcp_md5sig_pool		*tcp_get_md5sig_pool(void)
-{
-	int cpu = get_cpu();
-	struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
-	if (!ret)
-		put_cpu();
-	return ret;
-}
-
-static inline void		tcp_put_md5sig_pool(void)
-{
-	__tcp_put_md5sig_pool();
-	put_cpu();
-}
-
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa..296150b2a62 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2839,7 +2839,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
 			if (p->md5_desc.tfm)
 				crypto_free_hash(p->md5_desc.tfm);
 			kfree(p);
-			p = NULL;
 		}
 	}
 	free_percpu(pool);
@@ -2937,25 +2936,40 @@ retry:
 
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
-struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
+
+/**
+ *	tcp_get_md5sig_pool - get md5sig_pool for this user
+ *
+ *	We use percpu structure, so if we succeed, we exit with preemption
+ *	and BH disabled, to make sure another thread or softirq handling
+ *	wont try to get same context.
+ */
+struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
 	struct tcp_md5sig_pool * __percpu *p;
-	spin_lock_bh(&tcp_md5sig_pool_lock);
+
+	local_bh_disable();
+
+	spin_lock(&tcp_md5sig_pool_lock);
 	p = tcp_md5sig_pool;
 	if (p)
 		tcp_md5sig_users++;
-	spin_unlock_bh(&tcp_md5sig_pool_lock);
-	return (p ? *per_cpu_ptr(p, cpu) : NULL);
-}
+	spin_unlock(&tcp_md5sig_pool_lock);
+
+	if (p)
+		return *per_cpu_ptr(p, smp_processor_id());
 
-EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+	local_bh_enable();
+	return NULL;
+}
+EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void __tcp_put_md5sig_pool(void)
+void tcp_put_md5sig_pool(void)
 {
+	local_bh_enable();
 	tcp_free_md5sig_pool();
 }
-
-EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+EXPORT_SYMBOL(tcp_put_md5sig_pool);
 
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 			struct tcphdr *th)
-- 
cgit v1.2.3-70-g09d2


From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Sun, 16 May 2010 01:05:45 -0700
Subject: rtnetlink: make SR-IOV VF interface symmetric

Now we have a set of nested attributes:

  IFLA_VFINFO_LIST (NESTED)
    IFLA_VF_INFO (NESTED)
      IFLA_VF_MAC
      IFLA_VF_VLAN
      IFLA_VF_TX_RATE

This allows a single set to operate on multiple attributes if desired.
Among other things, it means a dump can be replayed to set state.

The current interface has yet to be released, so this seems like
something to consider for 2.6.34.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  23 +++++--
 net/core/rtnetlink.c    | 159 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 129 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd765..d94963b379d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -79,10 +79,7 @@ enum {
 	IFLA_NET_NS_PID,
 	IFLA_IFALIAS,
 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
-	IFLA_VF_MAC,		/* Hardware queue specific attributes */
-	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
-	IFLA_VFINFO,
+	IFLA_VFINFO_LIST,
 	__IFLA_MAX
 };
 
@@ -203,6 +200,24 @@ enum macvlan_mode {
 
 /* SR-IOV virtual function managment section */
 
+enum {
+	IFLA_VF_INFO_UNSPEC,
+	IFLA_VF_INFO,
+	__IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+	IFLA_VF_UNSPEC,
+	IFLA_VF_MAC,		/* Hardware queue specific attributes */
+	IFLA_VF_VLAN,
+	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	__IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
 struct ifla_vf_mac {
 	__u32 vf;
 	__u8 mac[32]; /* MAX_ADDR_LEN */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddec..31e85d327aa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+/* All VF info */
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent))
-		return dev_num_vf(dev->dev.parent) *
-			sizeof(struct ifla_vf_info);
-	else
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nlmsg_total_size(sizeof(struct nlattr));
+		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs * (sizeof(struct ifla_vf_mac) +
+				  sizeof(struct ifla_vf_vlan) +
+				  sizeof(struct ifla_vf_tx_rate));
+		return size;
+	} else
 		return 0;
 }
 
@@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
+	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
@@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
-		struct ifla_vf_info ivi;
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
-		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
-			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+			nla_nest_end(skb, vf);
 		}
+		nla_nest_end(skb, vfinfo);
 	}
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
@@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
-	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_mac) },
-	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_vlan) },
-	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (tb[IFLA_VF_MAC]) {
-		struct ifla_vf_mac *ivm;
-		ivm = nla_data(tb[IFLA_VF_MAC]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_mac)
-			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-
-	if (tb[IFLA_VF_VLAN]) {
-		struct ifla_vf_vlan *ivv;
-		ivv = nla_data(tb[IFLA_VF_VLAN]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_vlan)
-			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-						   ivv->vlan,
-						   ivv->qos);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-	err = 0;
-
-	if (tb[IFLA_VF_TX_RATE]) {
-		struct ifla_vf_tx_rate *ivt;
-		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_tx_rate)
-			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
-		if (err < 0)
-			goto errout;
-		modified = 1;
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO)
+				goto errout;
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
 	}
 	err = 0;
 
-- 
cgit v1.2.3-70-g09d2