From ecf61c78bd787b9dfb4a6b1dfb7561c2a95c5c17 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 11 Nov 2013 14:40:35 +0100
Subject: PCI: Omit PCI ID macro strings to shorten quirk names

Pasting the verbatim PCI_(VENDOR|DEVICE)_* macros in the __pci_fixup_*
symbol names results in insanely long names such as

__pci_fixup_resumePCI_VENDOR_ID_SERVERWORKSPCI_DEVICE_ID_SERVERWORKS_HT1000SBquirk_disable_broadcom_boot_interrupt

When Link-Time Optimization adds its numeric suffix to such symbol, it
overflows the namebuf[KSYM_NAME_LEN] array in kernel/kallsyms.c.  Use the
line number instead to create (nearly) unique symbol names.

Reported-by: Joe Mario <jmario@redhat.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
 include/linux/pci.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1084a15175e..eb8078aeadc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1567,65 +1567,65 @@ enum pci_fixup_pass {
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,	\
 				  class_shift, hook)			\
-	static const struct pci_fixup __pci_fixup_##name __used		\
+	static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used	\
 	__attribute__((__section__(#section), aligned((sizeof(void *)))))    \
 		= { vendor, device, class, class_shift, hook };
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_HEADER(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_FINAL(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_ENABLE(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,			\
-		vendor##device##hook, vendor, device, class, class_shift, hook)
+		hook, vendor, device, class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
-		resume##vendor##device##hook, vendor, device, class,	\
+		resume##hook, vendor, device, class,	\
 		class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class,	\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
-		resume_early##vendor##device##hook, vendor, device,	\
+		resume_early##hook, vendor, device,	\
 		class, class_shift, hook)
 #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class,		\
 					 class_shift, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
-		suspend##vendor##device##hook, vendor, device, class,	\
+		suspend##hook, vendor, device, class,	\
 		class_shift, hook)
 
 #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_HEADER(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_header,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_FINAL(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_final,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_ENABLE(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_enable,			\
-		vendor##device##hook, vendor, device, PCI_ANY_ID, 0, hook)
+		hook, vendor, device, PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
-		resume##vendor##device##hook, vendor, device,		\
+		resume##hook, vendor, device,		\
 		PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)		\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
-		resume_early##vendor##device##hook, vendor, device,	\
+		resume_early##hook, vendor, device,	\
 		PCI_ANY_ID, 0, hook)
 #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
-		suspend##vendor##device##hook, vendor, device,		\
+		suspend##hook, vendor, device,		\
 		PCI_ANY_ID, 0, hook)
 
 #ifdef CONFIG_PCI_QUIRKS
-- 
cgit v1.2.3-70-g09d2


From 23fd78d76415729b338ff1802a0066b4a62f7fb8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Dec 2013 11:24:18 +0000
Subject: KEYS: Fix multiple key add into associative array

If sufficient keys (or keyrings) are added into a keyring such that a node in
the associative array's tree overflows (each node has a capacity N, currently
16) and such that all N+1 keys have the same index key segment for that level
of the tree (the level'th nibble of the index key), then assoc_array_insert()
calls ops->diff_objects() to indicate at which bit position the two index keys
vary.

However, __key_link_begin() passes a NULL object to assoc_array_insert() with
the intention of supplying the correct pointer later before we commit the
change.  This means that keyring_diff_objects() is given a NULL pointer as one
of its arguments which it does not expect.  This results in an oops like the
attached.

With the previous patch to fix the keyring hash function, this can be forced
much more easily by creating a keyring and only adding keyrings to it.  Add any
other sort of key and a different insertion path is taken - all 16+1 objects
must want to cluster in the same node slot.

This can be tested by:

	r=`keyctl newring sandbox @s`
	for ((i=0; i<=16; i++)); do keyctl newring ring$i $r; done

This should work fine, but oopses when the 17th keyring is added.

Since ops->diff_objects() is always called with the first pointer pointing to
the object to be inserted (ie. the NULL pointer), we can fix the problem by
changing the to-be-inserted object pointer to point to the index key passed
into assoc_array_insert() instead.

Whilst we're at it, we also switch the arguments so that they are the same as
for ->compare_object().

BUG: unable to handle kernel NULL pointer dereference at 0000000000000088
IP: [<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
RIP: 0010:[<ffffffff81191ee4>] hash_key_type_and_desc+0x18/0xb0
...
Call Trace:
 [<ffffffff81191f9d>] keyring_diff_objects+0x21/0xd2
 [<ffffffff811f09ef>] assoc_array_insert+0x3b6/0x908
 [<ffffffff811929a7>] __key_link_begin+0x78/0xe5
 [<ffffffff81191a2e>] key_create_or_update+0x17d/0x36a
 [<ffffffff81192e0a>] SyS_add_key+0x123/0x183
 [<ffffffff81400ddb>] tracesys+0xdd/0xe2

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Stephen Gallagher <sgallagh@redhat.com>
---
 Documentation/assoc_array.txt | 6 +++---
 include/linux/assoc_array.h   | 6 +++---
 lib/assoc_array.c             | 4 ++--
 security/keys/keyring.c       | 7 +++----
 4 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
index f4faec0f66e..2f2c6cdd73c 100644
--- a/Documentation/assoc_array.txt
+++ b/Documentation/assoc_array.txt
@@ -164,10 +164,10 @@ This points to a number of methods, all of which need to be provided:
 
  (4) Diff the index keys of two objects.
 
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
-     Return the bit position at which the index keys of two objects differ or
-     -1 if they are the same.
+     Return the bit position at which the index key of the specified object
+     differs from the given index key or -1 if they are the same.
 
 
  (5) Free an object.
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
index 9a193b84238..a89df3be168 100644
--- a/include/linux/assoc_array.h
+++ b/include/linux/assoc_array.h
@@ -41,10 +41,10 @@ struct assoc_array_ops {
 	/* Is this the object we're looking for? */
 	bool (*compare_object)(const void *object, const void *index_key);
 
-	/* How different are two objects, to a bit position in their keys? (or
-	 * -1 if they're the same)
+	/* How different is an object from an index key, to a bit position in
+	 * their keys? (or -1 if they're the same)
 	 */
-	int (*diff_objects)(const void *a, const void *b);
+	int (*diff_objects)(const void *object, const void *index_key);
 
 	/* Method to free an object. */
 	void (*free_object)(void *object);
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 17edeaf1918..1b6a44f1ec3 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -759,8 +759,8 @@ all_leaves_cluster_together:
 	pr_devel("all leaves cluster together\n");
 	diff = INT_MAX;
 	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
-		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
-					  assoc_array_ptr_to_leaf(node->slots[i]));
+		int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]),
+					  index_key);
 		if (x < diff) {
 			BUG_ON(x < 0);
 			diff = x;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 0adbc77a59b..3dd8445cd48 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -279,12 +279,11 @@ static bool keyring_compare_object(const void *object, const void *data)
  * Compare the index keys of a pair of objects and determine the bit position
  * at which they differ - if they differ.
  */
-static int keyring_diff_objects(const void *_a, const void *_b)
+static int keyring_diff_objects(const void *object, const void *data)
 {
-	const struct key *key_a = keyring_ptr_to_key(_a);
-	const struct key *key_b = keyring_ptr_to_key(_b);
+	const struct key *key_a = keyring_ptr_to_key(object);
 	const struct keyring_index_key *a = &key_a->index_key;
-	const struct keyring_index_key *b = &key_b->index_key;
+	const struct keyring_index_key *b = data;
 	unsigned long seg_a, seg_b;
 	int level, i;
 
-- 
cgit v1.2.3-70-g09d2


From c7277090927a5e71871e799a355ed2940f6c8fc6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 2 Dec 2013 11:24:19 +0000
Subject: security: shmem: implement kernel private shmem inodes

We have a problem where the big_key key storage implementation uses a
shmem backed inode to hold the key contents.  Because of this detail of
implementation LSM checks are being done between processes trying to
read the keys and the tmpfs backed inode.  The LSM checks are already
being handled on the key interface level and should not be enforced at
the inode level (since the inode is an implementation detail, not a
part of the security model)

This patch implements a new function shmem_kernel_file_setup() which
returns the equivalent to shmem_file_setup() only the underlying inode
has S_PRIVATE set.  This means that all LSM checks for the inode in
question are skipped.  It should only be used for kernel internal
operations where the inode is not exposed to userspace without proper
LSM checking.  It is possible that some other users of
shmem_file_setup() should use the new interface, but this has not been
explored.

Reproducing this bug is a little bit difficult.  The steps I used on
Fedora are:

 (1) Turn off selinux enforcing:

	setenforce 0

 (2) Create a huge key

	k=`dd if=/dev/zero bs=8192 count=1 | keyctl padd big_key test-key @s`

 (3) Access the key in another context:

	runcon system_u:system_r:httpd_t:s0-s0:c0.c1023 keyctl print $k >/dev/null

 (4) Examine the audit logs:

	ausearch -m AVC -i --subject httpd_t | audit2allow

If the last command's output includes a line that looks like:

	allow httpd_t user_tmpfs_t:file { open read };

There was an inode check between httpd and the tmpfs filesystem.  With
this patch no such denial will be seen.  (NOTE! you should clear your
audit log if you have tested for this previously)

(Please return you box to enforcing)

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Hugh Dickins <hughd@google.com>
cc: linux-mm@kvack.org
---
 include/linux/shmem_fs.h |  2 ++
 mm/shmem.c               | 36 +++++++++++++++++++++++++++++-------
 security/keys/big_key.c  |  2 +-
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 30aa0dc60d7..9d55438bc4a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -47,6 +47,8 @@ extern int shmem_init(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
+extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
+					    unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 extern void shmem_unlock_mapping(struct address_space *mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8297623fcae..902a14842b7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2918,13 +2918,8 @@ static struct dentry_operations anon_ops = {
 	.d_dname = simple_dname
 };
 
-/**
- * shmem_file_setup - get an unlinked file living in tmpfs
- * @name: name for dentry (to be seen in /proc/<pid>/maps
- * @size: size to be set for the file
- * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
- */
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+static struct file *__shmem_file_setup(const char *name, loff_t size,
+				       unsigned long flags, unsigned int i_flags)
 {
 	struct file *res;
 	struct inode *inode;
@@ -2957,6 +2952,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	if (!inode)
 		goto put_dentry;
 
+	inode->i_flags |= i_flags;
 	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	clear_nlink(inode);	/* It is unlinked */
@@ -2977,6 +2973,32 @@ put_memory:
 	shmem_unacct_size(flags, size);
 	return res;
 }
+
+/**
+ * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
+ * 	kernel internal.  There will be NO LSM permission checks against the
+ * 	underlying inode.  So users of this interface must do LSM checks at a
+ * 	higher layer.  The one user is the big_key implementation.  LSM checks
+ * 	are provided at the key level rather than the inode level.
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, S_PRIVATE);
+}
+
+/**
+ * shmem_file_setup - get an unlinked file living in tmpfs
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
+ */
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+	return __shmem_file_setup(name, size, flags, 0);
+}
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
 /**
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 7f44c3207a9..8137b27d641 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -70,7 +70,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 		 *
 		 * TODO: Encrypt the stored data with a temporary key.
 		 */
-		file = shmem_file_setup("", datalen, 0);
+		file = shmem_kernel_file_setup("", datalen, 0);
 		if (IS_ERR(file)) {
 			ret = PTR_ERR(file);
 			goto err_quota;
-- 
cgit v1.2.3-70-g09d2


From 8496e85c20e7836b3dec97780e40f420a3ae2801 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 1 Dec 2013 02:34:37 +0100
Subject: PCI / tg3: Give up chip reset and carrier loss handling if PCI device
 is not present

Modify tg3_chip_reset() and tg3_close() to check if the PCI network
adapter device is accessible at all in order to skip poking it or
trying to handle a carrier loss in vain when that's not the case.
Introduce a special PCI helper function pci_device_is_present()
for this purpose.

Of course, this uncovers the lack of the appropriate RTNL locking
in tg3_suspend() and tg3_resume(), so add that locking in there
too.

These changes prevent tg3 from burning a CPU at 100% load level for
solid several seconds after the Thunderbolt link is disconnected from
a Matrox DS1 docking station.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 26 +++++++++++++++++++-------
 drivers/pci/pci.c                   |  8 ++++++++
 include/linux/pci.h                 |  1 +
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 369b736dde0..472305cdff4 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8932,6 +8932,9 @@ static int tg3_chip_reset(struct tg3 *tp)
 	void (*write_op)(struct tg3 *, u32, u32);
 	int i, err;
 
+	if (!pci_device_is_present(tp->pdev))
+		return -ENODEV;
+
 	tg3_nvram_lock(tp);
 
 	tg3_ape_lock(tp, TG3_APE_LOCK_GRC);
@@ -11581,10 +11584,11 @@ static int tg3_close(struct net_device *dev)
 	memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev));
 	memset(&tp->estats_prev, 0, sizeof(tp->estats_prev));
 
-	tg3_power_down_prepare(tp);
-
-	tg3_carrier_off(tp);
+	if (pci_device_is_present(tp->pdev)) {
+		tg3_power_down_prepare(tp);
 
+		tg3_carrier_off(tp);
+	}
 	return 0;
 }
 
@@ -17726,10 +17730,12 @@ static int tg3_suspend(struct device *device)
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct tg3 *tp = netdev_priv(dev);
-	int err;
+	int err = 0;
+
+	rtnl_lock();
 
 	if (!netif_running(dev))
-		return 0;
+		goto unlock;
 
 	tg3_reset_task_cancel(tp);
 	tg3_phy_stop(tp);
@@ -17771,6 +17777,8 @@ out:
 			tg3_phy_start(tp);
 	}
 
+unlock:
+	rtnl_unlock();
 	return err;
 }
 
@@ -17779,10 +17787,12 @@ static int tg3_resume(struct device *device)
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct tg3 *tp = netdev_priv(dev);
-	int err;
+	int err = 0;
+
+	rtnl_lock();
 
 	if (!netif_running(dev))
-		return 0;
+		goto unlock;
 
 	netif_device_attach(dev);
 
@@ -17806,6 +17816,8 @@ out:
 	if (!err)
 		tg3_phy_start(tp);
 
+unlock:
+	rtnl_unlock();
 	return err;
 }
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 33120d15666..07369f32e8b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4165,6 +4165,14 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode,
 	return 0;
 }
 
+bool pci_device_is_present(struct pci_dev *pdev)
+{
+	u32 v;
+
+	return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
+}
+EXPORT_SYMBOL_GPL(pci_device_is_present);
+
 #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 static DEFINE_SPINLOCK(resource_alignment_lock);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1084a15175e..25a389566e4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -960,6 +960,7 @@ void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
+bool pci_device_is_present(struct pci_dev *pdev);
 
 /* ROM control related routines */
 int pci_enable_rom(struct pci_dev *pdev);
-- 
cgit v1.2.3-70-g09d2


From 7ce5a27f2ef8ed4f8f892f46391f933c0e2ac0f1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Dec 2013 17:26:05 -0500
Subject: Revert "net: Handle CHECKSUM_COMPLETE more adequately in
 pskb_trim_rcsum()."

This reverts commit 018c5bba052b3a383d83cf0c756da0e7bc748397.

It causes regressions for people using chips driven by the sungem
driver.  Suspicion is that the skb->csum value isn't being adjusted
properly.

The change also has a bug in that if __pskb_trim() fails, we'll leave
a corruped skb->csum value in there.  We would really need to revert
it to it's original value in that case.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bec1cc7d5e3..215b5ea1cb3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2263,6 +2263,24 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
 
+/**
+ *	pskb_trim_rcsum - trim received skb and update checksum
+ *	@skb: buffer to trim
+ *	@len: new length
+ *
+ *	This is exactly the same as pskb_trim except that it ensures the
+ *	checksum of received packets are still valid after the operation.
+ */
+
+static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (likely(len >= skb->len))
+		return 0;
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	return __pskb_trim(skb, len);
+}
+
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
@@ -2360,27 +2378,6 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-/**
- *	pskb_trim_rcsum - trim received skb and update checksum
- *	@skb: buffer to trim
- *	@len: new length
- *
- *	This is exactly the same as pskb_trim except that it ensures the
- *	checksum of received packets are still valid after the operation.
- */
-
-static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
-{
-	if (likely(len >= skb->len))
-		return 0;
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__wsum adj = skb_checksum(skb, len, skb->len - len, 0);
-
-		skb->csum = csum_sub(skb->csum, adj);
-	}
-	return __pskb_trim(skb, len);
-}
-
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
 {
-- 
cgit v1.2.3-70-g09d2


From b1a0fbfdde65dffd83c84c006f84fa12041907c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 4 Dec 2013 10:12:40 -0500
Subject: percpu: fix spurious sparse warnings from DEFINE_PER_CPU()

When CONFIG_DEBUG_FORCE_WEAK_PER_CPU or CONFIG_ARCH_NEEDS_WEAK_PER_CPU
is set, DEFINE_PER_CPU() explodes into cryptic series of definitions
to still allow using "static" for percpu variables while keeping all
per-cpu symbols unique in the kernel image which is required for weak
symbols.  This ultimately converts the actual symbol to global whether
DEFINE_PER_CPU() is prefixed with static or not.

Unfortunately, the macro forgot to add explicit extern declartion of
the actual symbol ending up defining global symbol without preceding
declaration for static definitions which naturally don't have matching
DECLARE_PER_CPU().  The only ill effect is triggering of the following
warnings.

 fs/inode.c:74:8: warning: symbol 'nr_inodes' was not declared. Should it be static?
 fs/inode.c:75:8: warning: symbol 'nr_unused' was not declared. Should it be static?

Fix it by adding extern declaration in the DEFINE_PER_CPU() macro.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Tested-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
---
 include/linux/percpu-defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 57e890abe1f..a5fc7d01aad 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -69,6 +69,7 @@
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
+	extern __PCPU_ATTRS(sec) __typeof__(type) name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
 	__typeof__(type) name
 #else
-- 
cgit v1.2.3-70-g09d2


From 1431fb31ecbaba1b5718006128f0f2ed0b94e1c3 Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Tue, 3 Dec 2013 17:39:29 +0000
Subject: xen-netback: fix fragment detection in checksum setup

The code to detect fragments in checksum_setup() was missing for IPv4 and
too eager for IPv6. (It transpires that Windows seems to send IPv6 packets
with a fragment header even if they are not a fragment - i.e. offset is zero,
and M bit is not set).

This patch also incorporates a fix to callers of maybe_pull_tail() where
skb->network_header was being erroneously added to the length argument.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
cc: David Miller <davem@davemloft.net>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 236 ++++++++++++++++++++++----------------
 include/linux/ipv6.h              |   1 +
 include/net/ipv6.h                |   3 +-
 3 files changed, 140 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 64f0e0d18b8..acf13920e6d 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1149,49 +1149,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
 	return 0;
 }
 
-static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
+static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
+				  unsigned int max)
 {
-	if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
-		/* If we need to pullup then pullup to the max, so we
-		 * won't need to do it again.
-		 */
-		int target = min_t(int, skb->len, MAX_TCP_HEADER);
-		__pskb_pull_tail(skb, target - skb_headlen(skb));
-	}
+	if (skb_headlen(skb) >= len)
+		return 0;
+
+	/* If we need to pullup then pullup to the max, so we
+	 * won't need to do it again.
+	 */
+	if (max > skb->len)
+		max = skb->len;
+
+	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	if (skb_headlen(skb) < len)
+		return -EPROTO;
+
+	return 0;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * maximally sized IP and TCP or UDP headers.
+ */
+#define MAX_IP_HDR_LEN 128
+
 static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			     int recalculate_partial_csum)
 {
-	struct iphdr *iph = (void *)skb->data;
-	unsigned int header_size;
 	unsigned int off;
-	int err = -EPROTO;
+	bool fragment;
+	int err;
+
+	fragment = false;
+
+	err = maybe_pull_tail(skb,
+			      sizeof(struct iphdr),
+			      MAX_IP_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	off = sizeof(struct iphdr);
+	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
+		fragment = true;
 
-	header_size = skb->network_header + off + MAX_IPOPTLEN;
-	maybe_pull_tail(skb, header_size);
+	off = ip_hdrlen(skb);
 
-	off = iph->ihl * 4;
+	err = -EPROTO;
 
-	switch (iph->protocol) {
+	switch (ip_hdr(skb)->protocol) {
 	case IPPROTO_TCP:
 		if (!skb_partial_csum_set(skb, off,
 					  offsetof(struct tcphdr, check)))
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1200,24 +1223,20 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - off,
-							 IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IP_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   iph->protocol);
 		goto out;
 	}
 
@@ -1227,75 +1246,99 @@ out:
 	return err;
 }
 
+/* This value should be large enough to cover a tagged ethernet header plus
+ * an IPv6 header, all options, and a maximal TCP or UDP header.
+ */
+#define MAX_IPV6_HDR_LEN 256
+
+#define OPT_HDR(type, skb, off) \
+	(type *)(skb_network_header(skb) + (off))
+
 static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			       int recalculate_partial_csum)
 {
-	int err = -EPROTO;
-	struct ipv6hdr *ipv6h = (void *)skb->data;
+	int err;
 	u8 nexthdr;
-	unsigned int header_size;
 	unsigned int off;
+	unsigned int len;
 	bool fragment;
 	bool done;
 
+	fragment = false;
 	done = false;
 
 	off = sizeof(struct ipv6hdr);
 
-	header_size = skb->network_header + off;
-	maybe_pull_tail(skb, header_size);
+	err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
+	if (err < 0)
+		goto out;
 
-	nexthdr = ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 
-	while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
-	       !done) {
+	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+	while (off <= len && !done) {
 		switch (nexthdr) {
 		case IPPROTO_DSTOPTS:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING: {
-			struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+			struct ipv6_opt_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ipv6_opt_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ipv6_opt_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
 
+			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
 			nexthdr = hp->nexthdr;
 			off += ipv6_optlen(hp);
 			break;
 		}
 		case IPPROTO_AH: {
-			struct ip_auth_hdr *hp = (void *)(skb->data + off);
+			struct ip_auth_hdr *hp;
+
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct ip_auth_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct ip_auth_hdr, skb, off);
+			nexthdr = hp->nexthdr;
+			off += ipv6_authlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr *hp;
 
-			header_size = skb->network_header +
-				off +
-				sizeof(struct ip_auth_hdr);
-			maybe_pull_tail(skb, header_size);
+			err = maybe_pull_tail(skb,
+					      off +
+					      sizeof(struct frag_hdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			hp = OPT_HDR(struct frag_hdr, skb, off);
+
+			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
+				fragment = true;
 
 			nexthdr = hp->nexthdr;
-			off += (hp->hdrlen+2)<<2;
+			off += sizeof(struct frag_hdr);
 			break;
 		}
-		case IPPROTO_FRAGMENT:
-			fragment = true;
-			/* fall through */
 		default:
 			done = true;
 			break;
 		}
 	}
 
-	if (!done) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Failed to parse packet header\n");
-		goto out;
-	}
+	err = -EPROTO;
 
-	if (fragment) {
-		if (net_ratelimit())
-			netdev_err(vif->dev, "Packet is a fragment!\n");
+	if (!done || fragment)
 		goto out;
-	}
 
 	switch (nexthdr) {
 	case IPPROTO_TCP:
@@ -1304,17 +1347,17 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = tcp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct tcphdr);
-			maybe_pull_tail(skb, header_size);
-
-			tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_TCP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct tcphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_TCP, 0);
 		}
 		break;
 	case IPPROTO_UDP:
@@ -1323,25 +1366,20 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
 			goto out;
 
 		if (recalculate_partial_csum) {
-			struct udphdr *udph = udp_hdr(skb);
-
-			header_size = skb->network_header +
-				off +
-				sizeof(struct udphdr);
-			maybe_pull_tail(skb, header_size);
-
-			udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
-						       &ipv6h->daddr,
-						       skb->len - off,
-						       IPPROTO_UDP, 0);
+			err = maybe_pull_tail(skb,
+					      off + sizeof(struct udphdr),
+					      MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			udp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 skb->len - off,
+						 IPPROTO_UDP, 0);
 		}
 		break;
 	default:
-		if (net_ratelimit())
-			netdev_err(vif->dev,
-				   "Attempting to checksum a non-TCP/UDP packet, "
-				   "dropping a protocol %d packet\n",
-				   nexthdr);
 		goto out;
 	}
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d89d1b808a..c56c350324e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/ipv6.h>
 
 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3)
+#define ipv6_authlen(p) (((p)->hdrlen+2) << 2)
 /*
  * This structure contains configuration options per IPv6 link.
  */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eb198acaac1..488316e339a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -110,7 +110,8 @@ struct frag_hdr {
 	__be32	identification;
 };
 
-#define	IP6_MF	0x0001
+#define	IP6_MF		0x0001
+#define	IP6_OFFSET	0xFFF8
 
 #include <net/sock.h>
 
-- 
cgit v1.2.3-70-g09d2


From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 Dec 2013 20:12:04 -0800
Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling.

kill memcg_tcp_enter_memory_pressure.  The only function of
memcg_tcp_enter_memory_pressure was to reduce deal with the
unnecessary abstraction that was tcp_memcontrol.  Now that struct
tcp_memcontrol is gone remove this unnecessary function, the
unnecessary function pointer, and modify sk_enter_memory_pressure to
set this field directly, just as sk_leave_memory_pressure cleas this
field directly.

This fixes a small bug I intruduced when killing struct tcp_memcontrol
that caused memcg_tcp_enter_memory_pressure to never be called and
thus failed to ever set cg_proto->memory_pressure.

Remove the cg_proto enter_memory_pressure function as it now serves
no useful purpose.

Don't test cg_proto->memory_presser in sk_leave_memory_pressure before
clearing it.  The test was originally there to ensure that the pointer
was non-NULL.  Now that cg_proto is not a pointer the pointer does not
matter.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h        | 6 ++----
 net/ipv4/tcp_memcontrol.c | 7 -------
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index e3a18ff0c38..2ef3c3eca47 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1035,7 +1035,6 @@ enum cg_proto_flags {
 };
 
 struct cg_proto {
-	void			(*enter_memory_pressure)(struct sock *sk);
 	struct res_counter	memory_allocated;	/* Current allocated memory. */
 	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
 	int			memory_pressure;
@@ -1155,8 +1154,7 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 		struct proto *prot = sk->sk_prot;
 
 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			if (cg_proto->memory_pressure)
-				cg_proto->memory_pressure = 0;
+			cg_proto->memory_pressure = 0;
 	}
 
 }
@@ -1171,7 +1169,7 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 		struct proto *prot = sk->sk_prot;
 
 		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			cg_proto->enter_memory_pressure(sk);
+			cg_proto->memory_pressure = 1;
 	}
 
 	sk->sk_prot->enter_memory_pressure(sk);
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 269a89ecd2f..f7e522c558b 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -6,13 +6,6 @@
 #include <linux/memcontrol.h>
 #include <linux/module.h>
 
-static void memcg_tcp_enter_memory_pressure(struct sock *sk)
-{
-	if (sk->sk_cgrp->memory_pressure)
-		sk->sk_cgrp->memory_pressure = 1;
-}
-EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
-
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
 	/*
-- 
cgit v1.2.3-70-g09d2


From 4fc9bbf98fd66f879e628d8537ba7c240be2b58e Mon Sep 17 00:00:00 2001
From: Khalid Aziz <khalid.aziz@oracle.com>
Date: Wed, 27 Nov 2013 15:19:25 -0700
Subject: PCI: Disable Bus Master only on kexec reboot

Add a flag to tell the PCI subsystem that kernel is shutting down in
preparation to kexec a kernel.  Add code in PCI subsystem to use this flag
to clear Bus Master bit on PCI devices only in case of kexec reboot.

This fixes a power-off problem on Acer Aspire V5-573G and likely other
machines and avoids any other issues caused by clearing Bus Master bit on
PCI devices in normal shutdown path.  The problem was introduced by
b566a22c2332 ("PCI: disable Bus Master on PCI device shutdown").

This patch is based on discussion at
http://marc.info/?l=linux-pci&m=138425645204355&w=2

Link: https://bugzilla.kernel.org/show_bug.cgi?id=63861
Reported-by: Chang Liu <cl91tp@gmail.com>
Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: stable@vger.kernel.org	# v3.5+
---
 drivers/pci/pci-driver.c | 12 +++++++++---
 include/linux/kexec.h    |  3 +++
 kernel/kexec.c           |  4 ++++
 3 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7edd5c30744..25f0bc65916 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -19,6 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
+#include <linux/kexec.h>
 #include "pci.h"
 
 struct pci_dynid {
@@ -415,12 +416,17 @@ static void pci_device_shutdown(struct device *dev)
 	pci_msi_shutdown(pci_dev);
 	pci_msix_shutdown(pci_dev);
 
+#ifdef CONFIG_KEXEC
 	/*
-	 * Turn off Bus Master bit on the device to tell it to not
-	 * continue to do DMA. Don't touch devices in D3cold or unknown states.
+	 * If this is a kexec reboot, turn off Bus Master bit on the
+	 * device to tell it to not continue to do DMA. Don't touch
+	 * devices in D3cold or unknown states.
+	 * If it is not a kexec reboot, firmware will hit the PCI
+	 * devices with big hammer and stop their DMA any way.
 	 */
-	if (pci_dev->current_state <= PCI_D3hot)
+	if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot))
 		pci_clear_master(pci_dev);
+#endif
 }
 
 #ifdef CONFIG_PM
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d78d28a733b..5fd33dc1fe3 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -198,6 +198,9 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+/* flag to track if kexec reboot is in progress */
+extern bool kexec_in_progress;
+
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
 int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 490afc03627..d0d8fca5406 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -47,6 +47,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 size_t vmcoreinfo_size;
 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 
+/* Flag to indicate we are going to kexec a new kernel */
+bool kexec_in_progress = false;
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
 	.name  = "Crash kernel",
@@ -1675,6 +1678,7 @@ int kernel_kexec(void)
 	} else
 #endif
 	{
+		kexec_in_progress = true;
 		kernel_restart_prepare(NULL);
 		printk(KERN_EMERG "Starting new kernel\n");
 		machine_shutdown();
-- 
cgit v1.2.3-70-g09d2


From 11e6a09fba446ce799a9de51d2d99586024bbdaa Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Sun, 8 Dec 2013 23:39:04 -0800
Subject: drm/vmwgfx: Add max surface memory param

Userspace uses this to workaround overcommit issues
by flushing the command stream early.

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 3 +++
 include/uapi/drm/vmwgfx_drm.h         | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index a51f48e3e91..45d5b5ab6ca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -68,6 +68,9 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 				  SVGA_FIFO_3D_HWVERSION));
 		break;
 	}
+	case DRM_VMW_PARAM_MAX_SURF_MEMORY:
+		param->value = dev_priv->memory_size;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index bcb0912afe7..f854ca4a137 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -75,6 +75,7 @@
 #define DRM_VMW_PARAM_FIFO_CAPS        4
 #define DRM_VMW_PARAM_MAX_FB_SIZE      5
 #define DRM_VMW_PARAM_FIFO_HW_VERSION  6
+#define DRM_VMW_PARAM_MAX_SURF_MEMORY  7
 
 /**
  * struct drm_vmw_getparam_arg
-- 
cgit v1.2.3-70-g09d2


From c1b96a236e94d49d9396d0bbceb5524519c5c66e Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 21 May 2013 05:11:35 -0300
Subject: [media] videobuf2: Add support for file access mode flags for DMABUF
 exporting

Currently it is not possible for userspace to map a DMABUF exported buffer
with write permissions. This patch allows to also pass O_RDONLY/O_RDWR when
exporting the buffer, so that userspace may map it with write permissions.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/vidioc-expbuf.xml | 8 +++++---
 drivers/media/v4l2-core/videobuf2-core.c          | 8 ++++----
 drivers/media/v4l2-core/videobuf2-dma-contig.c    | 4 ++--
 include/media/videobuf2-core.h                    | 2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
index e287c8fc803..4165e7bfa4f 100644
--- a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
@@ -73,7 +73,8 @@ range from zero to the maximal number of valid planes for the currently active
 format. For the single-planar API, applications must set <structfield> plane
 </structfield> to zero.  Additional flags may be posted in the <structfield>
 flags </structfield> field.  Refer to a manual for open() for details.
-Currently only O_CLOEXEC is supported.  All other fields must be set to zero.
+Currently only O_CLOEXEC, O_RDONLY, O_WRONLY, and O_RDWR are supported.  All
+other fields must be set to zero.
 In the case of multi-planar API, every plane is exported separately using
 multiple <constant> VIDIOC_EXPBUF </constant> calls. </para>
 
@@ -170,8 +171,9 @@ multi-planar API. Otherwise this value must be set to zero. </entry>
 	    <entry>__u32</entry>
 	    <entry><structfield>flags</structfield></entry>
 	    <entry>Flags for the newly created file, currently only <constant>
-O_CLOEXEC </constant> is supported, refer to the manual of open() for more
-details.</entry>
+O_CLOEXEC </constant>, <constant>O_RDONLY</constant>, <constant>O_WRONLY
+</constant>, and <constant>O_RDWR</constant> are supported, refer to the manual
+of open() for more details.</entry>
 	  </row>
 	  <row>
 	    <entry>__s32</entry>
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index b19b306c8f7..57ba131f08a 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1824,8 +1824,8 @@ int vb2_expbuf(struct vb2_queue *q, struct v4l2_exportbuffer *eb)
 		return -EINVAL;
 	}
 
-	if (eb->flags & ~O_CLOEXEC) {
-		dprintk(1, "Queue does support only O_CLOEXEC flag\n");
+	if (eb->flags & ~(O_CLOEXEC | O_ACCMODE)) {
+		dprintk(1, "Queue does support only O_CLOEXEC and access mode flags\n");
 		return -EINVAL;
 	}
 
@@ -1848,14 +1848,14 @@ int vb2_expbuf(struct vb2_queue *q, struct v4l2_exportbuffer *eb)
 
 	vb_plane = &vb->planes[eb->plane];
 
-	dbuf = call_memop(q, get_dmabuf, vb_plane->mem_priv);
+	dbuf = call_memop(q, get_dmabuf, vb_plane->mem_priv, eb->flags & O_ACCMODE);
 	if (IS_ERR_OR_NULL(dbuf)) {
 		dprintk(1, "Failed to export buffer %d, plane %d\n",
 			eb->index, eb->plane);
 		return -EINVAL;
 	}
 
-	ret = dma_buf_fd(dbuf, eb->flags);
+	ret = dma_buf_fd(dbuf, eb->flags & ~O_ACCMODE);
 	if (ret < 0) {
 		dprintk(3, "buffer %d, plane %d failed to export (%d)\n",
 			eb->index, eb->plane, ret);
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 646f08f4f50..33d3871d1e1 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -393,7 +393,7 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf)
 	return sgt;
 }
 
-static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv)
+static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
 {
 	struct vb2_dc_buf *buf = buf_priv;
 	struct dma_buf *dbuf;
@@ -404,7 +404,7 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv)
 	if (WARN_ON(!buf->sgt_base))
 		return NULL;
 
-	dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, 0);
+	dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, flags);
 	if (IS_ERR(dbuf))
 		return NULL;
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index bd8218b1500..941055e9d12 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -83,7 +83,7 @@ struct vb2_fileio_data;
 struct vb2_mem_ops {
 	void		*(*alloc)(void *alloc_ctx, unsigned long size, gfp_t gfp_flags);
 	void		(*put)(void *buf_priv);
-	struct dma_buf *(*get_dmabuf)(void *buf_priv);
+	struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags);
 
 	void		*(*get_userptr)(void *alloc_ctx, unsigned long vaddr,
 					unsigned long size, int write);
-- 
cgit v1.2.3-70-g09d2


From 932e9dec380c67ec15ac3eb073bb55797d8b4801 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@citrix.com>
Date: Tue, 10 Dec 2013 14:20:28 +0000
Subject: ALSA: memalloc.h - fix wrong truncation of dma_addr_t

When running a 32bit kernel the hda_intel driver is still reporting
a 64bit dma_mask if the HW supports it.

From sound/pci/hda/hda_intel.c:

        /* allow 64bit DMA address if supported by H/W */
        if ((gcap & ICH6_GCAP_64OK) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
                pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
        else {
                pci_set_dma_mask(pci, DMA_BIT_MASK(32));
                pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
        }

which means when there is a call to dma_alloc_coherent from
snd_malloc_dev_pages a machine address bigger than 32bit can be returned.
This can be true in particular if running  the 32bit kernel as a pv dom0
under the Xen Hypervisor or PAE on bare metal.

The problem is that when calling setup_bdle to program the BLE the
dma_addr_t returned from the dma_alloc_coherent is wrongly truncated
from snd_sgbuf_get_addr if running a 32bit kernel:

static inline dma_addr_t snd_sgbuf_get_addr(struct snd_dma_buffer *dmab,
                                           size_t offset)
{
        struct snd_sg_buf *sgbuf = dmab->private_data;
        dma_addr_t addr = sgbuf->table[offset >> PAGE_SHIFT].addr;
        addr &= PAGE_MASK;
        return addr + offset % PAGE_SIZE;
}

where PAGE_MASK in a 32bit kernel is zeroing the upper 32bit af addr.

Without this patch the HW will fetch the 32bit truncated address,
which is not the one obtained from dma_alloc_coherent and will result
to a non working audio but can corrupt host memory at a random location.

The current patch apply to v3.13-rc3-74-g6c843f5

Signed-off-by: Stefano Panella <stefano.panella@citrix.com>
Reviewed-by: Frediano Ziglio <frediano.ziglio@citrix.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/memalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index af998397041..5f73785f597 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -108,7 +108,7 @@ static inline dma_addr_t snd_sgbuf_get_addr(struct snd_dma_buffer *dmab,
 {
 	struct snd_sg_buf *sgbuf = dmab->private_data;
 	dma_addr_t addr = sgbuf->table[offset >> PAGE_SHIFT].addr;
-	addr &= PAGE_MASK;
+	addr &= ~((dma_addr_t)PAGE_SIZE - 1);
 	return addr + offset % PAGE_SIZE;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6733cf572a9e20db2b7580a5dd39d5782d571eec Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 10 Dec 2013 12:15:52 +0100
Subject: ALSA: compress: Fix 64bit ABI incompatibility

snd_pcm_uframes_t is defined as unsigned long so it would take
different sizes depending on 32 or 64bit architectures.  As we don't
want this ABI incompatibility, and there is no real 64bit user yet,
let's make it the fixed size with __u32.

Also bump the protocol version number to 0.1.2.

Acked-by: Vinod Koul <vinod.koul@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/compress_offload.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index d630163b9a2..5759810e1c1 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h
@@ -30,7 +30,7 @@
 #include <sound/compress_params.h>
 
 
-#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 1, 1)
+#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 1, 2)
 /**
  * struct snd_compressed_buffer: compressed buffer
  * @fragment_size: size of buffer fragment in bytes
@@ -67,8 +67,8 @@ struct snd_compr_params {
 struct snd_compr_tstamp {
 	__u32 byte_offset;
 	__u32 copied_total;
-	snd_pcm_uframes_t pcm_frames;
-	snd_pcm_uframes_t pcm_io_frames;
+	__u32 pcm_frames;
+	__u32 pcm_io_frames;
 	__u32 sampling_rate;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 503cf95c061a0551eb684da364509297efbe55d9 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 10 Dec 2013 14:56:06 -0800
Subject: x86, build, icc: Remove uninitialized_var() from compiler-intel.h

When compiling with icc, <linux/compiler-gcc.h> ends up included
because the icc environment defines __GNUC__.  Thus, we neither need
nor want to have this macro defined in both compiler-gcc.h and
compiler-intel.h, and the fact that they are inconsistent just makes
the compiler spew warnings.

Reported-by: Sunil K. Pandey <sunil.k.pandey@intel.com>
Cc: Kevin B. Smith <kevin.b.smith@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-0mbwou1zt7pafij09b897lg3@git.kernel.org
Cc: <stable@vger.kernel.org>
---
 include/linux/compiler-intel.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 973ce10c40b..dc1bd3dcf11 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -28,8 +28,6 @@
 
 #endif
 
-#define uninitialized_var(x) x
-
 #ifndef __HAVE_BUILTIN_BSWAP16__
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
-- 
cgit v1.2.3-70-g09d2


From 12663bfc97c8b3fdb292428105dd92d563164050 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Sat, 7 Dec 2013 17:26:27 -0500
Subject: net: unix: allow set_peek_off to fail

unix_dgram_recvmsg() will hold the readlock of the socket until recv
is complete.

In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until
unix_dgram_recvmsg() will complete (which can take a while) without allowing
us to break out of it, triggering a hung task spew.

Instead, allow set_peek_off to fail, this way userspace will not hang.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 2 +-
 net/core/sock.c     | 2 +-
 net/unix/af_unix.c  | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4bcee94cef9..69be3e6079c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -181,7 +181,7 @@ struct proto_ops {
 				      int offset, size_t size, int flags);
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
-	void		(*set_peek_off)(struct sock *sk, int val);
+	int		(*set_peek_off)(struct sock *sk, int val);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
diff --git a/net/core/sock.c b/net/core/sock.c
index ab20ed9b0f3..5393b4b719d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -882,7 +882,7 @@ set_rcvbuf:
 
 	case SO_PEEK_OFF:
 		if (sock->ops->set_peek_off)
-			sock->ops->set_peek_off(sk, val);
+			ret = sock->ops->set_peek_off(sk, val);
 		else
 			ret = -EOPNOTSUPP;
 		break;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 01625ccc3ae..a0ca162e5bd 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -530,13 +530,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t, int);
 
-static void unix_set_peek_off(struct sock *sk, int val)
+static int unix_set_peek_off(struct sock *sk, int val)
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	mutex_lock(&u->readlock);
+	if (mutex_lock_interruptible(&u->readlock))
+		return -EINTR;
+
 	sk->sk_peek_off = val;
 	mutex_unlock(&u->readlock);
+
+	return 0;
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 9f70f46bd4c7267d48ef461a1d613ec9ec0d520c Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 10 Dec 2013 06:48:15 -0500
Subject: sctp: properly latch and use autoclose value from sock to association

Currently, sctp associations latch a sockets autoclose value to an association
at association init time, subject to capping constraints from the max_autoclose
sysctl value.  This leads to an odd situation where an application may set a
socket level autoclose timeout, but sliently sctp will limit the autoclose
timeout to something less than that.

Fix this by modifying the autoclose setsockopt function to check the limit, cap
it and warn the user via syslog that the timeout is capped.  This will allow
getsockopt to return valid autoclose timeout values that reflect what subsequent
associations actually use.

While were at it, also elimintate the assoc->autoclose variable, it duplicates
whats in the timeout array, which leads to multiple sources for the same
information, that may differ (as the former isn't subject to any capping).  This
gives us the timeout information in a canonical place and saves some space in
the association structure as well.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
CC: Wang Weidong <wangweidong1@huawei.com>
CC: David Miller <davem@davemloft.net>
CC: Vlad Yasevich <vyasevich@gmail.com>
CC: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  6 ------
 net/sctp/associola.c       |  5 +----
 net/sctp/output.c          |  3 ++-
 net/sctp/sm_statefuns.c    | 12 ++++++------
 net/sctp/socket.c          |  4 ++++
 5 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ea0ca5f6e62..67b5d006827 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1726,12 +1726,6 @@ struct sctp_association {
 	/* How many duplicated TSNs have we seen?  */
 	int numduptsns;
 
-	/* Number of seconds of idle time before an association is closed.
-	 * In the association context, this is really used as a boolean
-	 * since the real timeout is stored in the timeouts array
-	 */
-	__u32 autoclose;
-
 	/* These are to support
 	 * "SCTP Extensions for Dynamic Reconfiguration of IP Addresses
 	 *  and Enforcement of Flow and Message Limits"
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 68a27f9796d..31ed008c8e1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -154,8 +154,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
-	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
-		min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ;
+	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ;
 
 	/* Initializes the timers */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
@@ -291,8 +290,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 		asoc->peer.ipv6_address = 1;
 	INIT_LIST_HEAD(&asoc->asocs);
 
-	asoc->autoclose = sp->autoclose;
-
 	asoc->default_stream = sp->default_stream;
 	asoc->default_ppid = sp->default_ppid;
 	asoc->default_flags = sp->default_flags;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0e2644d0a77..0fb140f8f08 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -581,7 +581,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 		unsigned long timeout;
 
 		/* Restart the AUTOCLOSE timer when sending data. */
-		if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) {
+		if (sctp_state(asoc, ESTABLISHED) &&
+		    asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
 			timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
 			timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index dfe3f36ff2a..a26065be728 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -820,7 +820,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
-	if (new_asoc->autoclose)
+	if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE])
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 
@@ -908,7 +908,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 	SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
 	SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
-	if (asoc->autoclose)
+	if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE])
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 
@@ -2970,7 +2970,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 	if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM)
 		force = SCTP_FORCE();
 
-	if (asoc->autoclose) {
+	if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 	}
@@ -3878,7 +3878,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 				SCTP_CHUNK(chunk));
 
 	/* Count this as receiving DATA. */
-	if (asoc->autoclose) {
+	if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 	}
@@ -5267,7 +5267,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	if (asoc->autoclose)
+	if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE])
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 
@@ -5346,7 +5346,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
 
-	if (asoc->autoclose)
+	if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE])
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 72046b9729a..5455043f449 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2196,6 +2196,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
 				     unsigned int optlen)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
+	struct net *net = sock_net(sk);
 
 	/* Applicable to UDP-style socket only */
 	if (sctp_style(sk, TCP))
@@ -2205,6 +2206,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
 	if (copy_from_user(&sp->autoclose, optval, optlen))
 		return -EFAULT;
 
+	if (sp->autoclose > net->sctp.max_autoclose)
+		sp->autoclose = net->sctp.max_autoclose;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 4bd7b5127bd02c12c1cc837a7a0b6ce295eb2505 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 10 Dec 2013 02:20:41 +0300
Subject: micrel: add support for KSZ8041RNLI

Renesas R-Car development boards use KSZ8041RNLI PHY which for some reason has
ID of 0x00221537 that is not documented for KSZ8041-family PHYs and does not
match the  documented ID of  0x0022151x (where 'x' is the revision).  We have
to add the new #define PHY_ID_* and new ksphy_driver[] entry, almost the same
as KSZ8041 one, differing only in the 'phy_id' and 'name' fields.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 15 +++++++++++++++
 include/linux/micrel_phy.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3ae28f42086..26fa05a472b 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -335,6 +335,21 @@ static struct phy_driver ksphy_driver[] = {
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8041RNLI,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ8041RNLI",
+	.features	= PHY_BASIC_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
+	.driver		= { .owner = THIS_MODULE,},
 }, {
 	.phy_id		= PHY_ID_KSZ8051,
 	.phy_id_mask	= 0x00fffff0,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index ad05ce60c1c..2e5b194b9b1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -22,6 +22,8 @@
 #define PHY_ID_KSZ8021		0x00221555
 #define PHY_ID_KSZ8031		0x00221556
 #define PHY_ID_KSZ8041		0x00221510
+/* undocumented */
+#define PHY_ID_KSZ8041RNLI	0x00221537
 #define PHY_ID_KSZ8051		0x00221550
 /* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */
 #define PHY_ID_KSZ8001		0x0022161A
-- 
cgit v1.2.3-70-g09d2


From a51d5229d10dd3a337b674ce8603437d2996c5c3 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Sat, 23 Nov 2013 10:35:58 -0800
Subject: target: Remove write-only stats fields and lock from struct
 se_node_acl

Commit 04f3b31bff72 ("iscsi-target: Convert iscsi_session statistics to
atomic_long_t") removed the updating of these fields in iscsi (the only
fabric driver that ever touched these counters), and the core has no way
to report or otherwise use the values.  Remove the last remnants of
these counters.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tpg.c  | 2 --
 include/target/target_core_base.h | 4 ----
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index f697f8baec5..f755712a9a0 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -278,7 +278,6 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
 	snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname);
 	acl->se_tpg = tpg;
 	acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX);
-	spin_lock_init(&acl->stats_lock);
 	acl->dynamic_node_acl = 1;
 
 	tpg->se_tpg_tfo->set_default_node_attributes(acl);
@@ -406,7 +405,6 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 	snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname);
 	acl->se_tpg = tpg;
 	acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX);
-	spin_lock_init(&acl->stats_lock);
 
 	tpg->se_tpg_tfo->set_default_node_attributes(acl);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 45412a6afa6..9f1dda659c5 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -517,10 +517,6 @@ struct se_node_acl {
 	u32			acl_index;
 #define MAX_ACL_TAG_SIZE 64
 	char			acl_tag[MAX_ACL_TAG_SIZE];
-	u64			num_cmds;
-	u64			read_bytes;
-	u64			write_bytes;
-	spinlock_t		stats_lock;
 	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
 	atomic_t		acl_pr_ref_count;
 	struct se_dev_entry	**device_list;
-- 
cgit v1.2.3-70-g09d2


From a5c21dcefa1c3d759457a604b3cfc4af29c8713f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 12 Dec 2013 17:40:21 +0000
Subject: dcache: allow word-at-a-time name hashing with big-endian CPUs

When explicitly hashing the end of a string with the word-at-a-time
interface, we have to be careful which end of the word we pick up.

On big-endian CPUs, the upper-bits will contain the data we're after, so
ensure we generate our masks accordingly (and avoid hashing whatever
random junk may have been sitting after the string).

This patch adds a new dcache helper, bytemask_from_count, which creates
a mask appropriate for the CPU endianness.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 2 +-
 fs/namei.c             | 7 +------
 include/linux/dcache.h | 2 ++
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300b16e..6055d61811d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -192,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
 		if (!tcount)
 			return 0;
 	}
-	mask = ~(~0ul << tcount*8);
+	mask = bytemask_from_count(tcount);
 	return unlikely(!!((a ^ b) & mask));
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index c53d3a9547f..3531deebad3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1598,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
  *   do a "get_unaligned()" if this helps and is sufficiently
  *   fast.
  *
- * - Little-endian machines (so that we can generate the mask
- *   of low bytes efficiently). Again, we *could* do a byte
- *   swapping load on big-endian architectures if that is not
- *   expensive enough to make the optimization worthless.
- *
  * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
  *   do not trap on the (extremely unlikely) case of a page
  *   crossing operation.
@@ -1646,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 		if (!len)
 			goto done;
 	}
-	mask = ~(~0ul << len*8);
+	mask = bytemask_from_count(len);
 	hash += mask & a;
 done:
 	return fold_hash(hash);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 57e87e749a4..bf72e9ac6de 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -29,8 +29,10 @@ struct vfsmount;
 /* The hash is always the low bits of hash_len */
 #ifdef __LITTLE_ENDIAN
  #define HASH_LEN_DECLARE u32 hash; u32 len;
+ #define bytemask_from_count(cnt)	(~(~0ul << (cnt)*8))
 #else
  #define HASH_LEN_DECLARE u32 len; u32 hash;
+ #define bytemask_from_count(cnt)	(~(~0ul >> (cnt)*8))
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 11ec50caedb56e3a87715edeff6a1852e6ae5416 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 12 Dec 2013 17:40:22 +0000
Subject: word-at-a-time: provide generic big-endian zero_bytemask
 implementation

Whilst architectures may be able to do better than this (which they can,
by simply defining their own macro), this is a generic stab at a
zero_bytemask implementation for the asm-generic, big-endian
word-at-a-time implementation.

On arm64, a clz instruction is used to implement the fls efficiently.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/word-at-a-time.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 3f21f1b72e4..d3909effd72 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -49,4 +49,12 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 	return (val + c->high_bits) & ~rhs;
 }
 
+#ifndef zero_bytemask
+#ifdef CONFIG_64BIT
+#define zero_bytemask(mask)	(~0ul << fls64(mask))
+#else
+#define zero_bytemask(mask)	(~0ul << fls(mask))
+#endif /* CONFIG_64BIT */
+#endif /* zero_bytemask */
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
-- 
cgit v1.2.3-70-g09d2


From a0a9663dd2146d54339237764e1bfc60e8a39328 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Dec 2013 10:15:59 +0100
Subject: net: make neigh_priv_len in struct net_device 16bit instead of 8bit

neigh_priv_len is defined as u8. With all debug enabled struct
ipoib_neigh has 200 bytes. The largest part is sk_buff_head with 96
bytes and here the spinlock with 72 bytes.
The size value still fits in this u8 leaving some room for more.

On -RT struct ipoib_neigh put on weight and has 392 bytes. The main
reason is sk_buff_head with 288 and the fatty here is spinlock with 192
bytes. This does no longer fit into into neigh_priv_len and gcc
complains.

This patch changes neigh_priv_len from being 8bit to 16bit. Since the
following element (dev_id) is 16bit followed by a spinlock which is
aligned, the struct remains with a total size of 3200 (allmodconfig) /
2048 (with as much debug off as possible) bytes on x86-64.
On x86-32 the struct is 1856 (allmodconfig) / 1216 (with as much debug
off as possible) bytes long. The numbers were gained with and without
the patch to prove that this change does not increase the size of the
struct.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f0ed423a36..d9a550bf3e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1255,7 +1255,7 @@ struct net_device {
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_assign_type; /* hw address assignment type */
 	unsigned char		addr_len;	/* hardware address length	*/
-	unsigned char		neigh_priv_len;
+	unsigned short		neigh_priv_len;
 	unsigned short          dev_id;		/* Used to differentiate devices
 						 * that share the same link
 						 * layer address
-- 
cgit v1.2.3-70-g09d2


From f40386a4e976acb2bd3e0f9ead11e8e769acbe98 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 12 Dec 2013 17:12:19 -0800
Subject: include/linux/hugetlb.h: make isolate_huge_page() an inline

With CONFIG_HUGETLBFS=n:

  mm/migrate.c: In function `do_move_page_to_node_array':
  include/linux/hugetlb.h:140:33: warning: statement with no effect [-Wunused-value]
   #define isolate_huge_page(p, l) false
                                   ^
  mm/migrate.c:1170:4: note: in expansion of macro `isolate_huge_page'
      isolate_huge_page(page, &pagelist);

Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9649ff0c63f..bd7e9875222 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -142,7 +142,10 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 	return 0;
 }
 
-#define isolate_huge_page(p, l) false
+static inline bool isolate_huge_page(struct page *page, struct list_head *list)
+{
+	return false;
+}
 #define putback_active_hugepage(p)	do {} while (0)
 #define is_hugepage_active(x)	false
 
-- 
cgit v1.2.3-70-g09d2


From 386e79066f04850352f144e67edfd5b636c0f95c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 12 Dec 2013 17:12:24 -0800
Subject: include/linux/kernel.h: make might_fault() a nop for !MMU

The machine cannot fault if !MUU, so make might_fault() a nop for !MMU.

This fixes below build error if
!CONFIG_MMU && (CONFIG_PROVE_LOCKING=y || CONFIG_DEBUG_ATOMIC_SLEEP=y):

  arch/arm/kernel/built-in.o: In function `arch_ptrace':
  arch/arm/kernel/ptrace.c:852: undefined reference to `might_fault'
  arch/arm/kernel/built-in.o: In function `restore_sigframe':
  arch/arm/kernel/signal.c:173: undefined reference to `might_fault'
  ...
  arch/arm/kernel/built-in.o:arch/arm/kernel/signal.c:177: more undefined references to `might_fault' follow
  make: *** [vmlinux] Error 1

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d4e98d13eff..ecb87544cc5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -193,7 +193,8 @@ extern int _cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
-#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
+#if defined(CONFIG_MMU) && \
+	(defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
 void might_fault(void);
 #else
 static inline void might_fault(void) { }
-- 
cgit v1.2.3-70-g09d2


From 3e1e4a5f3a324502c27c4e8808e06ac2ea842360 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 12 Dec 2013 17:12:31 -0800
Subject: mfd/rtc: s5m: fix register updating by adding regmap for RTC

Rename old regmap field of "struct sec_pmic_dev" to "regmap_pmic" and
add new regmap for RTC.

On S5M8767A registers were not properly updated and read due to usage of
the same regmap as the PMIC.  This could be observed in various hangs,
e.g.  in infinite loop during waiting for UDR field change.

On this chip family the RTC has different I2C address than PMIC so
additional regmap is needed.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sec-core.c           | 30 ++++++++++++++++++++++--------
 drivers/mfd/sec-irq.c            |  6 +++---
 drivers/regulator/s5m8767.c      |  2 +-
 drivers/rtc/rtc-s5m.c            |  2 +-
 include/linux/mfd/samsung/core.h |  3 ++-
 5 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 34c18fb8c08..54cc2554659 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -81,31 +81,31 @@ static struct of_device_id sec_dt_match[] = {
 
 int sec_reg_read(struct sec_pmic_dev *sec_pmic, u8 reg, void *dest)
 {
-	return regmap_read(sec_pmic->regmap, reg, dest);
+	return regmap_read(sec_pmic->regmap_pmic, reg, dest);
 }
 EXPORT_SYMBOL_GPL(sec_reg_read);
 
 int sec_bulk_read(struct sec_pmic_dev *sec_pmic, u8 reg, int count, u8 *buf)
 {
-	return regmap_bulk_read(sec_pmic->regmap, reg, buf, count);
+	return regmap_bulk_read(sec_pmic->regmap_pmic, reg, buf, count);
 }
 EXPORT_SYMBOL_GPL(sec_bulk_read);
 
 int sec_reg_write(struct sec_pmic_dev *sec_pmic, u8 reg, u8 value)
 {
-	return regmap_write(sec_pmic->regmap, reg, value);
+	return regmap_write(sec_pmic->regmap_pmic, reg, value);
 }
 EXPORT_SYMBOL_GPL(sec_reg_write);
 
 int sec_bulk_write(struct sec_pmic_dev *sec_pmic, u8 reg, int count, u8 *buf)
 {
-	return regmap_raw_write(sec_pmic->regmap, reg, buf, count);
+	return regmap_raw_write(sec_pmic->regmap_pmic, reg, buf, count);
 }
 EXPORT_SYMBOL_GPL(sec_bulk_write);
 
 int sec_reg_update(struct sec_pmic_dev *sec_pmic, u8 reg, u8 val, u8 mask)
 {
-	return regmap_update_bits(sec_pmic->regmap, reg, mask, val);
+	return regmap_update_bits(sec_pmic->regmap_pmic, reg, mask, val);
 }
 EXPORT_SYMBOL_GPL(sec_reg_update);
 
@@ -166,6 +166,11 @@ static struct regmap_config s5m8767_regmap_config = {
 	.cache_type = REGCACHE_FLAT,
 };
 
+static const struct regmap_config sec_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
 #ifdef CONFIG_OF
 /*
  * Only the common platform data elements for s5m8767 are parsed here from the
@@ -266,9 +271,9 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 		break;
 	}
 
-	sec_pmic->regmap = devm_regmap_init_i2c(i2c, regmap);
-	if (IS_ERR(sec_pmic->regmap)) {
-		ret = PTR_ERR(sec_pmic->regmap);
+	sec_pmic->regmap_pmic = devm_regmap_init_i2c(i2c, regmap);
+	if (IS_ERR(sec_pmic->regmap_pmic)) {
+		ret = PTR_ERR(sec_pmic->regmap_pmic);
 		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
 			ret);
 		return ret;
@@ -277,6 +282,15 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	sec_pmic->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
 	i2c_set_clientdata(sec_pmic->rtc, sec_pmic);
 
+	sec_pmic->regmap_rtc = devm_regmap_init_i2c(sec_pmic->rtc,
+			&sec_rtc_regmap_config);
+	if (IS_ERR(sec_pmic->regmap_rtc)) {
+		ret = PTR_ERR(sec_pmic->regmap_rtc);
+		dev_err(&i2c->dev, "Failed to allocate RTC register map: %d\n",
+			ret);
+		return ret;
+	}
+
 	if (pdata && pdata->cfg_pmic_irq)
 		pdata->cfg_pmic_irq();
 
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 0dd84e99081..b441b1be27c 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -280,19 +280,19 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 
 	switch (type) {
 	case S5M8763X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s5m8763_irq_chip,
 				  &sec_pmic->irq_data);
 		break;
 	case S5M8767X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s5m8767_irq_chip,
 				  &sec_pmic->irq_data);
 		break;
 	case S2MPS11X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap, sec_pmic->irq,
+		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				  sec_pmic->irq_base, &s2mps11_irq_chip,
 				  &sec_pmic->irq_data);
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index cbf91e25cf7..aeb40aad0ae 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -925,7 +925,7 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 		config.dev = s5m8767->dev;
 		config.init_data = pdata->regulators[i].initdata;
 		config.driver_data = s5m8767;
-		config.regmap = iodev->regmap;
+		config.regmap = iodev->regmap_pmic;
 		config.of_node = pdata->regulators[i].reg_node;
 
 		rdev[i] = devm_regulator_register(&pdev->dev, &regulators[id],
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 0ba56b7cb38..ae8119dc284 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -567,7 +567,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 
 	info->dev = &pdev->dev;
 	info->s5m87xx = s5m87xx;
-	info->regmap = s5m87xx->regmap;
+	info->regmap = s5m87xx->regmap_rtc;
 	info->device_type = s5m87xx->device_type;
 	info->wtsr_smpl = s5m87xx->wtsr_smpl;
 
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 2d0c9071bcf..cab2dd27907 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -39,7 +39,8 @@ enum sec_device_type {
 struct sec_pmic_dev {
 	struct device *dev;
 	struct sec_platform_data *pdata;
-	struct regmap *regmap;
+	struct regmap *regmap_pmic;
+	struct regmap *regmap_rtc;
 	struct i2c_client *i2c;
 	struct i2c_client *rtc;
 
-- 
cgit v1.2.3-70-g09d2


From 380108d891acf8db5cf0d477176c7ed2b62b7928 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@linaro.org>
Date: Tue, 3 Dec 2013 15:40:37 +0000
Subject: xen/block: Correctly define structures in public headers on ARM32 and
 ARM64

On ARM (32 bits and 64 bits), the double-word is 8-bytes aligned. This will
result on different structure from Xen and Linux repositories.

As Linux is using __packed__ attribute, it must have a 4-bytes padding before
each "id" field.

This change breaks guest block support with older kernel. IMHO, it's acceptable
because Xen on ARM is still on Tech Preview and the hypercall ABI is not yet
freezed.

Only one architecture (x86_32) doesn't have 64-bit ABI for the block interface.
Don't add padding if Linux is compiled for this architecture.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Acked-by: Roger Pau Monne <roger.pau@citrix.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
[I had asked for confirmation that it did not break x86 and Ian went
beyound the call of duty to confirm it. Also a internal regression
bucket with 32/64 dom0 with 32/64 domU (PV and HVM) confirmed no
regressions. ABI changes are a drag..]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/interface/io/blkif.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index 65e12099ef8..ae665ac59c3 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -146,7 +146,7 @@ struct blkif_request_segment_aligned {
 struct blkif_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
-#ifdef CONFIG_X86_64
+#ifndef CONFIG_X86_32
 	uint32_t       _pad1;	     /* offsetof(blkif_request,u.rw.id) == 8 */
 #endif
 	uint64_t       id;           /* private guest value, echoed in resp  */
@@ -163,7 +163,7 @@ struct blkif_request_discard {
 	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */
 #define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0          */
 	blkif_vdev_t   _pad1;        /* only for read/write requests         */
-#ifdef CONFIG_X86_64
+#ifndef CONFIG_X86_32
 	uint32_t       _pad2;        /* offsetof(blkif_req..,u.discard.id)==8*/
 #endif
 	uint64_t       id;           /* private guest value, echoed in resp  */
@@ -175,7 +175,7 @@ struct blkif_request_discard {
 struct blkif_request_other {
 	uint8_t      _pad1;
 	blkif_vdev_t _pad2;        /* only for read/write requests         */
-#ifdef CONFIG_X86_64
+#ifndef CONFIG_X86_32
 	uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/
 #endif
 	uint64_t     id;           /* private guest value, echoed in resp  */
@@ -184,7 +184,7 @@ struct blkif_request_other {
 struct blkif_request_indirect {
 	uint8_t        indirect_op;
 	uint16_t       nr_segments;
-#ifdef CONFIG_X86_64
+#ifndef CONFIG_X86_32
 	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
 #endif
 	uint64_t       id;
@@ -192,7 +192,7 @@ struct blkif_request_indirect {
 	blkif_vdev_t   handle;
 	uint16_t       _pad2;
 	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
-#ifdef CONFIG_X86_64
+#ifndef CONFIG_X86_32
 	uint32_t      _pad3;         /* make it 64 byte aligned */
 #else
 	uint64_t      _pad3;         /* make it 64 byte aligned */
-- 
cgit v1.2.3-70-g09d2


From 2a4d81547b88b4ff566d4b4f9c0e36285ed07634 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 15 Dec 2013 21:00:48 -0800
Subject: Input: define KEY_WWAN for Wireless WAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices with support for mobile networks may have buttons for
enabling/disabling such connection. An example can be Linksys router 54G3G.
We already have KEY_BLUETOOTH, KEY_WLAN and KEY_UWB so it makes sense to
add KEY_WWAN as well.  As we already have KEY_WIMAX, use it's value for
KEY_WWAN and make it an alias.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/uapi/linux/input.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index ecc88592ecb..bd24470d24a 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -464,7 +464,8 @@ struct input_keymap_entry {
 #define KEY_BRIGHTNESS_ZERO	244	/* brightness off, use ambient */
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
-#define KEY_WIMAX		246
+#define KEY_WWAN		246	/* Wireless WAN (LTE, UMTS, GSM, etc.) */
+#define KEY_WIMAX		KEY_WWAN
 #define KEY_RFKILL		247	/* Key that controls all radios */
 
 #define KEY_MICMUTE		248	/* Mute / unmute the microphone */
-- 
cgit v1.2.3-70-g09d2


From 309243ec14fde1149e1c66f19746e239e86caf39 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Wed, 11 Dec 2013 23:01:44 +0100
Subject: IB/core: const'ify inbuf in struct ib_udata

Userspace input buffer is not modified by kernel, so it can be 'const'.

This is also a prerequisite to remove the implicit cast
from INIT_UDATA().

Link: http://marc.info/?i=cover.1386798254.git.ydroneaud@opteya.com>
Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/uverbs.h | 2 +-
 include/rdma/ib_verbs.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index bdc842e9fae..9879568aed8 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -49,7 +49,7 @@
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
 	do {								\
-		(udata)->inbuf  = (void __user *) (ibuf);		\
+		(udata)->inbuf  = (const void __user *) (ibuf);		\
 		(udata)->outbuf = (void __user *) (obuf);		\
 		(udata)->inlen  = (ilen);				\
 		(udata)->outlen = (olen);				\
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 979874c627e..61e1935c91b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -978,7 +978,7 @@ struct ib_uobject {
 };
 
 struct ib_udata {
-	void __user *inbuf;
+	const void __user *inbuf;
 	void __user *outbuf;
 	size_t       inlen;
 	size_t       outlen;
-- 
cgit v1.2.3-70-g09d2


From f78dea064c5f7de07de4912a6e5136dbc443d614 Mon Sep 17 00:00:00 2001
From: Marc Carino <marc.ceeeee@gmail.com>
Date: Mon, 16 Dec 2013 18:15:53 -0800
Subject: libata: implement ATA_HORKAGE_NO_NCQ_TRIM and apply it to Micro M500
 SSDs

Certain drives cannot handle queued TRIM commands properly, even
though support is indicated in the IDENTIFY DEVICE buffer.  This patch
allows for disabling the commands for the affected drives and apply it
to the Micron/Crucial M500 SSDs which exhibit incorrect protocol
behavior when issued queued TRIM commands, which could lead to silent
data corruption.

tj: Merged two unnecessarily split patches and made minor edits
    including shortening horkage name.

Signed-off-by: Marc Carino <marc.ceeeee@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/1387246554-7311-1-git-send-email-marc.ceeeee@gmail.com
Cc: stable@vger.kernel.org # 3.12+
---
 drivers/ata/libata-core.c | 15 +++++++++++++--
 include/linux/libata.h    |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index ff0158481d5..1393a5890ed 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2149,9 +2149,16 @@ static int ata_dev_config_ncq(struct ata_device *dev,
 				    "failed to get NCQ Send/Recv Log Emask 0x%x\n",
 				    err_mask);
 		} else {
+			u8 *cmds = dev->ncq_send_recv_cmds;
+
 			dev->flags |= ATA_DFLAG_NCQ_SEND_RECV;
-			memcpy(dev->ncq_send_recv_cmds, ap->sector_buf,
-				ATA_LOG_NCQ_SEND_RECV_SIZE);
+			memcpy(cmds, ap->sector_buf, ATA_LOG_NCQ_SEND_RECV_SIZE);
+
+			if (dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) {
+				ata_dev_dbg(dev, "disabling queued TRIM support\n");
+				cmds[ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET] &=
+					~ATA_LOG_NCQ_SEND_RECV_DSM_TRIM;
+			}
 		}
 	}
 
@@ -4205,6 +4212,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "PIONEER DVD-RW  DVR-212D",	NULL,	ATA_HORKAGE_NOSETXFER },
 	{ "PIONEER DVD-RW  DVR-216D",	NULL,	ATA_HORKAGE_NOSETXFER },
 
+	/* devices that don't properly handle queued TRIM commands */
+	{ "Micron_M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+	{ "Crucial_CT???M500SSD1",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+
 	/* End Marker */
 	{ }
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0e23c26485f..9b503376738 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -418,6 +418,7 @@ enum {
 	ATA_HORKAGE_DUMP_ID	= (1 << 16),	/* dump IDENTIFY data */
 	ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17),	/* Set max sects to 65535 */
 	ATA_HORKAGE_ATAPI_DMADIR = (1 << 18),	/* device requires dmadir */
+	ATA_HORKAGE_NO_NCQ_TRIM	= (1 << 19),	/* don't use queued TRIM */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
-- 
cgit v1.2.3-70-g09d2


From 189b84fb54490ae24111124346a8e63f8e019385 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vince@deater.net>
Date: Fri, 13 Dec 2013 15:52:25 -0500
Subject: perf: Document the new transaction sample type

Commit fdfbbd07e91f8fe3871 ("perf: Add generic transaction flags")
added support for PERF_SAMPLE_TRANSACTION but forgot to add documentation
for the sample type to include/uapi/linux/perf_event.h

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1312131548450.10372@pianoman.cluster.toy
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e1802d6153a..959d454f76a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -679,6 +679,7 @@ enum perf_event_type {
 	 *
 	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
 	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
+	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
-- 
cgit v1.2.3-70-g09d2


From 85328240c625f322af9f69c7b60e619717101d77 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 26 Nov 2013 06:33:52 +0000
Subject: net: allow netdev_all_upper_get_next_dev_rcu with rtnl lock held

It is useful to be able to walk all upper devices when bringing
a device online where the RTNL lock is held. In this case it
is safe to walk the all_adj_list because the RTNL lock is used
to protect the write side as well.

This patch adds a check to see if the rtnl lock is held before
throwing a warning in netdev_all_upper_get_next_dev_rcu().

Also because we now have a call site for lockdep_rtnl_is_held()
outside COFIG_LOCK_PROVING an inline definition returning 1 is
needed. Similar to the rcu_read_lock_is_held().

Fixes: 2a47fa45d4df ("ixgbe: enable l2 forwarding acceleration for macvlans")
CC: Veaceslav Falico <vfalico@redhat.com>
Reported-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/rtnetlink.h | 5 +++++
 net/core/dev.c            | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 939428ad25a..8e3e66ac0a5 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -24,6 +24,11 @@ extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 #ifdef CONFIG_PROVE_LOCKING
 extern int lockdep_rtnl_is_held(void);
+#else
+static inline int lockdep_rtnl_is_held(void)
+{
+	return 1;
+}
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
 /**
diff --git a/net/core/dev.c b/net/core/dev.c
index ba3b7ea5ebb..4fc17221545 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4500,7 +4500,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 {
 	struct netdev_adjacent *upper;
 
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 
 	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 
-- 
cgit v1.2.3-70-g09d2


From 0e3da5bb8da45890b1dc413404e0f978ab71173e Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Mon, 16 Dec 2013 11:02:09 +0200
Subject: ip_gre: fix msg_name parsing for recvfrom/recvmsg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ipgre_header_parse() needs to parse the tunnel's ip header and it
uses mac_header to locate the iphdr. This got broken when gre tunneling
was refactored as mac_header is no longer updated to point to iphdr.
Introduce skb_pop_mac_header() helper to do the mac_header assignment
and use it in ipgre_rcv() to fix msg_name parsing.

Bug introduced in commit c54419321455 (GRE: Refactor GRE tunneling code.)

Cc: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/ipv4/ip_gre.c      | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 215b5ea1cb3..6aae8389052 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1638,6 +1638,11 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 	skb->mac_header += offset;
 }
 
+static inline void skb_pop_mac_header(struct sk_buff *skb)
+{
+	skb->mac_header = skb->network_header;
+}
+
 static inline void skb_probe_transport_header(struct sk_buff *skb,
 					      const int offset_hint)
 {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d7aea4c5b94..e560ef34cf4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -217,6 +217,7 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 				  iph->saddr, iph->daddr, tpi->key);
 
 	if (tunnel) {
+		skb_pop_mac_header(skb);
 		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
 		return PACKET_RCVD;
 	}
-- 
cgit v1.2.3-70-g09d2


From c97102ba96324da330078ad8619ba4dfe840dbe3 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 18 Dec 2013 17:08:31 -0800
Subject: kexec: migrate to reboot cpu

Commit 1b3a5d02ee07 ("reboot: move arch/x86 reboot= handling to generic
kernel") moved reboot= handling to generic code.  In the process it also
removed the code in native_machine_shutdown() which are moving reboot
process to reboot_cpu/cpu0.

I guess that thought must have been that all reboot paths are calling
migrate_to_reboot_cpu(), so we don't need this special handling.  But
kexec reboot path (kernel_kexec()) is not calling
migrate_to_reboot_cpu() so above change broke kexec.  Now reboot can
happen on non-boot cpu and when INIT is sent in second kerneo to bring
up BP, it brings down the machine.

So start calling migrate_to_reboot_cpu() in kexec reboot path to avoid
this problem.

Bisected by WANG Chao.

Reported-by: Matthew Whitehead <mwhitehe@redhat.com>
Reported-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Tested-by: Baoquan He <bhe@redhat.com>
Tested-by: WANG Chao <chaowang@redhat.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reboot.h | 1 +
 kernel/kexec.c         | 1 +
 kernel/reboot.c        | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 8e00f9f6f96..9e7db9e73cc 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -43,6 +43,7 @@ extern int unregister_reboot_notifier(struct notifier_block *);
  * Architecture-specific implementations of sys_reboot commands.
  */
 
+extern void migrate_to_reboot_cpu(void);
 extern void machine_restart(char *cmd);
 extern void machine_halt(void);
 extern void machine_power_off(void);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index d0d8fca5406..9c970167e40 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1680,6 +1680,7 @@ int kernel_kexec(void)
 	{
 		kexec_in_progress = true;
 		kernel_restart_prepare(NULL);
+		migrate_to_reboot_cpu();
 		printk(KERN_EMERG "Starting new kernel\n");
 		machine_shutdown();
 	}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index f813b347464..662c83fc16b 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -104,7 +104,7 @@ int unregister_reboot_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
-static void migrate_to_reboot_cpu(void)
+void migrate_to_reboot_cpu(void)
 {
 	/* The boot cpu is always logical cpu 0 */
 	int cpu = reboot_cpu;
-- 
cgit v1.2.3-70-g09d2


From de466bd628e8d663fdf3f791bc8db318ee85c714 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 18 Dec 2013 17:08:42 -0800
Subject: mm: numa: avoid unnecessary disruption of NUMA hinting during
 migration

do_huge_pmd_numa_page() handles the case where there is parallel THP
migration.  However, by the time it is checked the NUMA hinting
information has already been disrupted.  This patch adds an earlier
check with some helpers.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h |  9 +++++++++
 mm/huge_memory.c        | 22 ++++++++++++++++------
 mm/migrate.c            | 12 ++++++++++++
 3 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f5096b58b20..b7717d74da7 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -90,10 +90,19 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
+extern bool pmd_trans_migrating(pmd_t pmd);
+extern void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd);
 extern int migrate_misplaced_page(struct page *page,
 				  struct vm_area_struct *vma, int node);
 extern bool migrate_ratelimited(int node);
 #else
+static inline bool pmd_trans_migrating(pmd_t pmd)
+{
+	return false;
+}
+static inline void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
+{
+}
 static inline int migrate_misplaced_page(struct page *page,
 					 struct vm_area_struct *vma, int node)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 70e7429fd8e..7de1bf85f68 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		ret = 0;
 		goto out_unlock;
 	}
+
+	/* mmap_sem prevents this happening but warn if that changes */
+	WARN_ON(pmd_trans_migrating(pmd));
+
 	if (unlikely(pmd_trans_splitting(pmd))) {
 		/* split huge page running from under us */
 		spin_unlock(src_ptl);
@@ -1299,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (unlikely(!pmd_same(pmd, *pmdp)))
 		goto out_unlock;
 
+	/*
+	 * If there are potential migrations, wait for completion and retry
+	 * without disrupting NUMA hinting information. Do not relock and
+	 * check_same as the page may no longer be mapped.
+	 */
+	if (unlikely(pmd_trans_migrating(*pmdp))) {
+		spin_unlock(ptl);
+		wait_migrate_huge_page(vma->anon_vma, pmdp);
+		goto out;
+	}
+
 	page = pmd_page(pmd);
 	BUG_ON(is_huge_zero_page(page));
 	page_nid = page_to_nid(page);
@@ -1329,12 +1344,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			goto clear_pmdnuma;
 	}
 
-	/*
-	 * If there are potential migrations, wait for completion and retry. We
-	 * do not relock and check_same as the page may no longer be mapped.
-	 * Furtermore, even if the page is currently misplaced, there is no
-	 * guarantee it is still misplaced after the migration completes.
-	 */
+	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
 		spin_unlock(ptl);
 		wait_on_page_locked(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index a987525810a..cfb41908526 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1655,6 +1655,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 	return 1;
 }
 
+bool pmd_trans_migrating(pmd_t pmd)
+{
+	struct page *page = pmd_page(pmd);
+	return PageLocked(page);
+}
+
+void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
+{
+	struct page *page = pmd_page(*pmd);
+	wait_on_page_locked(page);
+}
+
 /*
  * Attempt to migrate a misplaced page to the specified destination
  * node. Caller is expected to have an elevated reference count on
-- 
cgit v1.2.3-70-g09d2


From 20841405940e7be0617612d521e206e4b6b325db Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 18 Dec 2013 17:08:44 -0800
Subject: mm: fix TLB flush race between migration, and change_protection_range

There are a few subtle races, between change_protection_range (used by
mprotect and change_prot_numa) on one side, and NUMA page migration and
compaction on the other side.

The basic race is that there is a time window between when the PTE gets
made non-present (PROT_NONE or NUMA), and the TLB is flushed.

During that time, a CPU may continue writing to the page.

This is fine most of the time, however compaction or the NUMA migration
code may come in, and migrate the page away.

When that happens, the CPU may continue writing, through the cached
translation, to what is no longer the current memory location of the
process.

This only affects x86, which has a somewhat optimistic pte_accessible.
All other architectures appear to be safe, and will either always flush,
or flush whenever there is a valid mapping, even with no permissions
(SPARC).

The basic race looks like this:

CPU A			CPU B			CPU C

						load TLB entry
make entry PTE/PMD_NUMA
			fault on entry
						read/write old page
			start migrating page
			change PTE/PMD to new page
						read/write old page [*]
flush TLB
						reload TLB from new entry
						read/write new page
						lose data

[*] the old page may belong to a new user at this point!

The obvious fix is to flush remote TLB entries, by making sure that
pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may
still be accessible if there is a TLB flush pending for the mm.

This should fix both NUMA migration and compaction.

[mgorman@suse.de: fix build]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/pgtable_64.h |  4 ++--
 arch/x86/include/asm/pgtable.h      | 11 ++++++++--
 include/asm-generic/pgtable.h       |  2 +-
 include/linux/mm_types.h            | 44 +++++++++++++++++++++++++++++++++++++
 kernel/fork.c                       |  1 +
 mm/huge_memory.c                    |  7 ++++++
 mm/mprotect.c                       |  2 ++
 mm/pgtable-generic.c                |  5 +++--
 8 files changed, 69 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 8358dc14495..0f9e94537ee 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -619,7 +619,7 @@ static inline unsigned long pte_present(pte_t pte)
 }
 
 #define pte_accessible pte_accessible
-static inline unsigned long pte_accessible(pte_t a)
+static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
 {
 	return pte_val(a) & _PAGE_VALID;
 }
@@ -847,7 +847,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
 	 *             and SUN4V pte layout, so this inline test is fine.
 	 */
-	if (likely(mm != &init_mm) && pte_accessible(orig))
+	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
 		tlb_batch_add(mm, addr, ptep, orig, fullmm);
 }
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3d199945870..bbc8b12fa44 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -452,9 +452,16 @@ static inline int pte_present(pte_t a)
 }
 
 #define pte_accessible pte_accessible
-static inline int pte_accessible(pte_t a)
+static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 {
-	return pte_flags(a) & _PAGE_PRESENT;
+	if (pte_flags(a) & _PAGE_PRESENT)
+		return true;
+
+	if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
+			mm_tlb_flush_pending(mm))
+		return true;
+
+	return false;
 }
 
 static inline int pte_hidden(pte_t pte)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f330d28e4d0..b12079afbd5 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #endif
 
 #ifndef pte_accessible
-# define pte_accessible(pte)		((void)(pte),1)
+# define pte_accessible(mm, pte)	((void)(pte), 1)
 #endif
 
 #ifndef flush_tlb_fix_spurious_fault
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bd299418a93..e5c49c30460 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -442,6 +442,14 @@ struct mm_struct {
 
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
+#endif
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+	/*
+	 * An operation with batched TLB flushing is going on. Anything that
+	 * can move process memory needs to flush the TLB when moving a
+	 * PROT_NONE or PROT_NUMA mapped page.
+	 */
+	bool tlb_flush_pending;
 #endif
 	struct uprobes_state uprobes_state;
 };
@@ -459,4 +467,40 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+/*
+ * Memory barriers to keep this state in sync are graciously provided by
+ * the page table locks, outside of which no page table modifications happen.
+ * The barriers below prevent the compiler from re-ordering the instructions
+ * around the memory barriers that are already present in the code.
+ */
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+{
+	barrier();
+	return mm->tlb_flush_pending;
+}
+static inline void set_tlb_flush_pending(struct mm_struct *mm)
+{
+	mm->tlb_flush_pending = true;
+	barrier();
+}
+/* Clearing is done after a TLB flush, which also provides a barrier. */
+static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+{
+	barrier();
+	mm->tlb_flush_pending = false;
+}
+#else
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+{
+	return false;
+}
+static inline void set_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+#endif
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 728d5be9548..5721f0e3f2d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	spin_lock_init(&mm->page_table_lock);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
+	clear_tlb_flush_pending(mm);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7de1bf85f68..3d2783e1059 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1376,6 +1376,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto clear_pmdnuma;
 	}
 
+	/*
+	 * The page_table_lock above provides a memory barrier
+	 * with change_protection_range.
+	 */
+	if (mm_tlb_flush_pending(mm))
+		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
 	/*
 	 * Migrate the THP to the requested node, returns with page unlocked
 	 * and pmd_numa cleared.
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f8421722acb..bb53a6591ae 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -188,6 +188,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
+	set_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -199,6 +200,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
 		flush_tlb_range(vma, start, end);
+	clear_tlb_flush_pending(mm);
 
 	return pages;
 }
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index e84cad27a80..a8b91992593 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma,
 pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
 		       pte_t *ptep)
 {
+	struct mm_struct *mm = (vma)->vm_mm;
 	pte_t pte;
-	pte = ptep_get_and_clear((vma)->vm_mm, address, ptep);
-	if (pte_accessible(pte))
+	pte = ptep_get_and_clear(mm, address, ptep);
+	if (pte_accessible(mm, pte))
 		flush_tlb_page(vma, address);
 	return pte;
 }
-- 
cgit v1.2.3-70-g09d2


From af2c1401e6f9177483be4fad876d0073669df9df Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 18 Dec 2013 17:08:45 -0800
Subject: mm: numa: guarantee that tlb_flush_pending updates are visible before
 page table updates

According to documentation on barriers, stores issued before a LOCK can
complete after the lock implying that it's possible tlb_flush_pending
can be visible after a page table update.  As per revised documentation,
this patch adds a smp_mb__before_spinlock to guarantee the correct
ordering.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e5c49c30460..ad0616f2fe2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -482,7 +482,12 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 static inline void set_tlb_flush_pending(struct mm_struct *mm)
 {
 	mm->tlb_flush_pending = true;
-	barrier();
+
+	/*
+	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * critical section updating the page tables
+	 */
+	smp_mb__before_spinlock();
 }
 /* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void clear_tlb_flush_pending(struct mm_struct *mm)
-- 
cgit v1.2.3-70-g09d2


From 95cadace8f3959282e76ebf8b382bd0930807d2c Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 12 Dec 2013 12:24:11 -0800
Subject: target/file: Update hw_max_sectors based on current block_size

This patch allows FILEIO to update hw_max_sectors based on the current
max_bytes_per_io.  This is required because vfs_[writev,readv]() can accept
a maximum of 2048 iovecs per call, so the enforced hw_max_sectors really
needs to be calculated based on block_size.

This addresses a >= v3.5 bug where block_size=512 was rejecting > 1M
sized I/O requests, because FD_MAX_SECTORS was hardcoded to 2048 for
the block_size=4096 case.

(v2: Use max_bytes_per_io instead of ->update_hw_max_sectors)

Reported-by: Henrik Goldman <hg@x-formation.com>
Cc: <stable@vger.kernel.org> #3.5+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 5 +++++
 drivers/target/target_core_file.c   | 8 ++++----
 drivers/target/target_core_file.h   | 5 ++++-
 include/target/target_core_base.h   | 1 +
 4 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 207b340498a..d06de84b069 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1106,6 +1106,11 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 	dev->dev_attrib.block_size = block_size;
 	pr_debug("dev[%p]: SE Device block_size changed to %u\n",
 			dev, block_size);
+
+	if (dev->dev_attrib.max_bytes_per_io)
+		dev->dev_attrib.hw_max_sectors =
+			dev->dev_attrib.max_bytes_per_io / block_size;
+
 	return 0;
 }
 
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 0e34cda3271..78241a53b55 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -66,9 +66,8 @@ static int fd_attach_hba(struct se_hba *hba, u32 host_id)
 	pr_debug("CORE_HBA[%d] - TCM FILEIO HBA Driver %s on Generic"
 		" Target Core Stack %s\n", hba->hba_id, FD_VERSION,
 		TARGET_CORE_MOD_VERSION);
-	pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic"
-		" MaxSectors: %u\n",
-		hba->hba_id, fd_host->fd_host_id, FD_MAX_SECTORS);
+	pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic\n",
+		hba->hba_id, fd_host->fd_host_id);
 
 	return 0;
 }
@@ -220,7 +219,8 @@ static int fd_configure_device(struct se_device *dev)
 	}
 
 	dev->dev_attrib.hw_block_size = fd_dev->fd_block_size;
-	dev->dev_attrib.hw_max_sectors = FD_MAX_SECTORS;
+	dev->dev_attrib.max_bytes_per_io = FD_MAX_BYTES;
+	dev->dev_attrib.hw_max_sectors = FD_MAX_BYTES / fd_dev->fd_block_size;
 	dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH;
 
 	if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) {
diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index 37ffc5bd239..d7772c16768 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -7,7 +7,10 @@
 #define FD_DEVICE_QUEUE_DEPTH	32
 #define FD_MAX_DEVICE_QUEUE_DEPTH 128
 #define FD_BLOCKSIZE		512
-#define FD_MAX_SECTORS		2048
+/*
+ * Limited by the number of iovecs (2048) per vfs_[writev,readv] call
+ */
+#define FD_MAX_BYTES		8388608
 
 #define RRF_EMULATE_CDB		0x01
 #define RRF_GOT_LBA		0x02
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 9f1dda659c5..321301c0a64 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -620,6 +620,7 @@ struct se_dev_attrib {
 	u32		unmap_granularity;
 	u32		unmap_granularity_alignment;
 	u32		max_write_same_len;
+	u32		max_bytes_per_io;
 	struct se_device *da_dev;
 	struct config_group da_group;
 };
-- 
cgit v1.2.3-70-g09d2


From ee53664bda169f519ce3c6a22d378f0b946c8178 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 20 Dec 2013 15:10:03 +0200
Subject: mm: Fix NULL pointer dereference in madvise(MADV_WILLNEED) support

Sasha Levin found a NULL pointer dereference that is due to a missing
page table lock, which in turn is due to the pmd entry in question being
a transparent huge-table entry.

The code - introduced in commit 1998cc048901 ("mm: make
madvise(MADV_WILLNEED) support swap file prefetch") - correctly checks
for this situation using pmd_none_or_trans_huge_or_clear_bad(), but it
turns out that that function doesn't work correctly.

pmd_none_or_trans_huge_or_clear_bad() expected that pmd_bad() would
trigger if the transparent hugepage bit was set, but it doesn't do that
if pmd_numa() is also set. Note that the NUMA bit only gets set on real
NUMA machines, so people trying to reproduce this on most normal
development systems would never actually trigger this.

Fix it by removing the very subtle (and subtly incorrect) expectation,
and instead just checking pmd_trans_huge() explicitly.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
[ Additionally remove the now stale test for pmd_trans_huge() inside the
  pmd_bad() case - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b12079afbd5..db092345894 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -599,11 +599,10 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	barrier();
 #endif
-	if (pmd_none(pmdval))
+	if (pmd_none(pmdval) || pmd_trans_huge(pmdval))
 		return 1;
 	if (unlikely(pmd_bad(pmdval))) {
-		if (!pmd_trans_huge(pmdval))
-			pmd_clear_bad(pmd);
+		pmd_clear_bad(pmd);
 		return 1;
 	}
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 597d795a2a786d22dd872332428e2b9439ede639 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 20 Dec 2013 13:35:58 +0200
Subject: mm: do not allocate page->ptl dynamically, if spinlock_t fits to long

In struct page we have enough space to fit long-size page->ptl there,
but we use dynamically-allocated page->ptl if size(spinlock_t) is larger
than sizeof(int).

It hurts 64-bit architectures with CONFIG_GENERIC_LOCKBREAK, where
sizeof(spinlock_t) == 8, but it easily fits into struct page.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockref.h  | 2 +-
 include/linux/mm.h       | 6 +++---
 include/linux/mm_types.h | 3 ++-
 kernel/bounds.c          | 2 +-
 mm/memory.c              | 2 +-
 5 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index c8929c3832d..4bfde0e99ed 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -19,7 +19,7 @@
 
 #define USE_CMPXCHG_LOCKREF \
 	(IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \
-	 IS_ENABLED(CONFIG_SMP) && !BLOATED_SPINLOCKS)
+	 IS_ENABLED(CONFIG_SMP) && SPINLOCK_SIZE <= 4)
 
 struct lockref {
 	union {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1cedd000cf2..35527173cf5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1317,7 +1317,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 
 #if USE_SPLIT_PTE_PTLOCKS
-#if BLOATED_SPINLOCKS
+#if ALLOC_SPLIT_PTLOCKS
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1325,7 +1325,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
 {
 	return page->ptl;
 }
-#else /* BLOATED_SPINLOCKS */
+#else /* ALLOC_SPLIT_PTLOCKS */
 static inline bool ptlock_alloc(struct page *page)
 {
 	return true;
@@ -1339,7 +1339,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
 {
 	return &page->ptl;
 }
-#endif /* BLOATED_SPINLOCKS */
+#endif /* ALLOC_SPLIT_PTLOCKS */
 
 static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ad0616f2fe2..290901a8c1d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -26,6 +26,7 @@ struct address_space;
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
 		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
+#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -155,7 +156,7 @@ struct page {
 						 * system if PG_buddy is set.
 						 */
 #if USE_SPLIT_PTE_PTLOCKS
-#if BLOATED_SPINLOCKS
+#if ALLOC_SPLIT_PTLOCKS
 		spinlock_t *ptl;
 #else
 		spinlock_t ptl;
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 5253204afdc..9fd4246b04b 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -22,6 +22,6 @@ void foo(void)
 #ifdef CONFIG_SMP
 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 #endif
-	DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int));
+	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
 	/* End of constants */
 }
diff --git a/mm/memory.c b/mm/memory.c
index 5d9025f3b3e..b6e211b779d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4271,7 +4271,7 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
-#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
+#if ALLOC_SPLIT_PTLOCKS
 bool ptlock_alloc(struct page *page)
 {
 	spinlock_t *ptl;
-- 
cgit v1.2.3-70-g09d2


From df36ac1bc2a166eef90785d584e4cfed6f52bd32 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Wed, 18 Dec 2013 15:17:10 -0800
Subject: pstore: Don't allow high traffic options on fragile devices

Some pstore backing devices use on board flash as persistent
storage. These have limited numbers of write cycles so it
is a poor idea to use them from high frequency operations.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/apei/erst.c          | 1 +
 drivers/firmware/efi/efi-pstore.c | 1 +
 fs/pstore/platform.c              | 7 +++++--
 include/linux/pstore.h            | 3 +++
 4 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 26311f23c82..cb1d557fc22 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -942,6 +942,7 @@ static int erst_clearer(enum pstore_type_id type, u64 id, int count,
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
 	.name		= "erst",
+	.flags		= PSTORE_FLAGS_FRAGILE,
 	.open		= erst_open_pstore,
 	.close		= erst_close_pstore,
 	.read		= erst_reader,
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 743fd426f21..4b9dc836dcf 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -356,6 +356,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 static struct pstore_info efi_pstore_info = {
 	.owner		= THIS_MODULE,
 	.name		= "efi",
+	.flags		= PSTORE_FLAGS_FRAGILE,
 	.open		= efi_pstore_open,
 	.close		= efi_pstore_close,
 	.read		= efi_pstore_read,
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d..78c3c209778 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
 		pstore_get_records(0);
 
 	kmsg_dump_register(&pstore_dumper);
-	pstore_register_console();
-	pstore_register_ftrace();
+
+	if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
+		pstore_register_console();
+		pstore_register_ftrace();
+	}
 
 	if (pstore_update_ms >= 0) {
 		pstore_timer.expires = jiffies +
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index abd437d0a8a..ece0c6bbfcc 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -51,6 +51,7 @@ struct pstore_info {
 	char		*buf;
 	size_t		bufsize;
 	struct mutex	read_mutex;	/* serialize open/read/close */
+	int		flags;
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
@@ -70,6 +71,8 @@ struct pstore_info {
 	void		*data;
 };
 
+#define	PSTORE_FLAGS_FRAGILE	1
+
 #ifdef CONFIG_PSTORE
 extern int pstore_register(struct pstore_info *);
 extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
-- 
cgit v1.2.3-70-g09d2


From 8e321fefb0e60bae4e2a28d20fc4fa30758d27c6 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Sat, 21 Dec 2013 17:56:08 -0500
Subject: aio/migratepages: make aio migrate pages sane

The arbitrary restriction on page counts offered by the core
migrate_page_move_mapping() code results in rather suspicious looking
fiddling with page reference counts in the aio_migratepage() operation.
To fix this, make migrate_page_move_mapping() take an extra_count parameter
that allows aio to tell the code about its own reference count on the page
being migrated.

While cleaning up aio_migratepage(), make it validate that the old page
being passed in is actually what aio_migratepage() expects to prevent
misbehaviour in the case of races.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
---
 fs/aio.c                | 52 +++++++++++++++++++++++++++++++++++++++++--------
 include/linux/migrate.h |  3 ++-
 mm/migrate.c            | 13 +++++++------
 3 files changed, 53 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index fd1c0baf15b..efa708b2905 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
 	int i;
 
 	for (i = 0; i < ctx->nr_pages; i++) {
+		struct page *page;
 		pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
 				page_count(ctx->ring_pages[i]));
-		put_page(ctx->ring_pages[i]);
+		page = ctx->ring_pages[i];
+		if (!page)
+			continue;
+		ctx->ring_pages[i] = NULL;
+		put_page(page);
 	}
 
 	put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 	unsigned long flags;
 	int rc;
 
+	rc = 0;
+
+	/* Make sure the old page hasn't already been changed */
+	spin_lock(&mapping->private_lock);
+	ctx = mapping->private_data;
+	if (ctx) {
+		pgoff_t idx;
+		spin_lock_irqsave(&ctx->completion_lock, flags);
+		idx = old->index;
+		if (idx < (pgoff_t)ctx->nr_pages) {
+			if (ctx->ring_pages[idx] != old)
+				rc = -EAGAIN;
+		} else
+			rc = -EINVAL;
+		spin_unlock_irqrestore(&ctx->completion_lock, flags);
+	} else
+		rc = -EINVAL;
+	spin_unlock(&mapping->private_lock);
+
+	if (rc != 0)
+		return rc;
+
 	/* Writeback must be complete */
 	BUG_ON(PageWriteback(old));
-	put_page(old);
+	get_page(new);
 
-	rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+	rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
 	if (rc != MIGRATEPAGE_SUCCESS) {
-		get_page(old);
+		put_page(new);
 		return rc;
 	}
 
-	get_page(new);
-
 	/* We can potentially race against kioctx teardown here.  Use the
 	 * address_space's private data lock to protect the mapping's
 	 * private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 		spin_lock_irqsave(&ctx->completion_lock, flags);
 		migrate_page_copy(new, old);
 		idx = old->index;
-		if (idx < (pgoff_t)ctx->nr_pages)
-			ctx->ring_pages[idx] = new;
+		if (idx < (pgoff_t)ctx->nr_pages) {
+			/* And only do the move if things haven't changed */
+			if (ctx->ring_pages[idx] == old)
+				ctx->ring_pages[idx] = new;
+			else
+				rc = -EAGAIN;
+		} else
+			rc = -EINVAL;
 		spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	} else
 		rc = -EBUSY;
 	spin_unlock(&mapping->private_lock);
 
+	if (rc == MIGRATEPAGE_SUCCESS)
+		put_page(old);
+	else
+		put_page(new);
+
 	return rc;
 }
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b7717d74da7..f015c059e15 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -55,7 +55,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
-		struct buffer_head *head, enum migrate_mode mode);
+		struct buffer_head *head, enum migrate_mode mode,
+		int extra_count);
 #else
 
 static inline void putback_lru_pages(struct list_head *l) {}
diff --git a/mm/migrate.c b/mm/migrate.c
index e9b71020133..9194375b230 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -317,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
  */
 int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
-		struct buffer_head *head, enum migrate_mode mode)
+		struct buffer_head *head, enum migrate_mode mode,
+		int extra_count)
 {
-	int expected_count = 0;
+	int expected_count = 1 + extra_count;
 	void **pslot;
 
 	if (!mapping) {
 		/* Anonymous page without mapping */
-		if (page_count(page) != 1)
+		if (page_count(page) != expected_count)
 			return -EAGAIN;
 		return MIGRATEPAGE_SUCCESS;
 	}
@@ -334,7 +335,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
 
-	expected_count = 2 + page_has_private(page);
+	expected_count += 1 + page_has_private(page);
 	if (page_count(page) != expected_count ||
 		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
 		spin_unlock_irq(&mapping->tree_lock);
@@ -584,7 +585,7 @@ int migrate_page(struct address_space *mapping,
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
 
 	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
@@ -611,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping,
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
 
 	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
-- 
cgit v1.2.3-70-g09d2


From d00adcc8ae9e22eca9d8af5f66c59ad9a74c90ec Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 23 Dec 2013 09:31:58 -0500
Subject: drm/radeon: 0x9649 is SUMO2 not SUMO

Fixes rendering corruption due to incorrect
gfx configuration.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=63599

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 include/drm/drm_pciids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 87578c109e4..49376aec2fb 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -600,7 +600,7 @@
 	{0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
 	{0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
-	{0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
+	{0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\
 	{0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
-- 
cgit v1.2.3-70-g09d2


From 439a1cfffe2c1a06e5a6394ccd5d18a8e89b15d3 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Sun, 22 Dec 2013 02:18:01 +0100
Subject: drm/radeon: expose render backend mask to the userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will allow userspace to correctly program the PA_SC_RASTER_CONFIG
register, so it can be considered a fix.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/cik.c        | 2 ++
 drivers/gpu/drm/radeon/radeon.h     | 4 ++--
 drivers/gpu/drm/radeon/radeon_kms.c | 9 +++++++++
 drivers/gpu/drm/radeon/si.c         | 2 ++
 include/uapi/drm/radeon_drm.h       | 2 ++
 5 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 138a7764694..e950fabd7f5 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3114,6 +3114,8 @@ static void cik_setup_rb(struct radeon_device *rdev,
 		mask <<= 1;
 	}
 
+	rdev->config.cik.backend_enable_mask = enabled_rbs;
+
 	for (i = 0; i < se_num; i++) {
 		cik_select_se_sh(rdev, i, 0xffffffff);
 		data = 0;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b1f990d0eaa..45e1f447bc7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1940,7 +1940,7 @@ struct si_asic {
 	unsigned sc_earlyz_tile_fifo_size;
 
 	unsigned num_tile_pipes;
-	unsigned num_backends_per_se;
+	unsigned backend_enable_mask;
 	unsigned backend_disable_mask_per_asic;
 	unsigned backend_map;
 	unsigned num_texture_channel_caches;
@@ -1970,7 +1970,7 @@ struct cik_asic {
 	unsigned sc_earlyz_tile_fifo_size;
 
 	unsigned num_tile_pipes;
-	unsigned num_backends_per_se;
+	unsigned backend_enable_mask;
 	unsigned backend_disable_mask_per_asic;
 	unsigned backend_map;
 	unsigned num_texture_channel_caches;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 55d0b474bd3..21d593c0eca 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -461,6 +461,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case RADEON_INFO_SI_CP_DMA_COMPUTE:
 		*value = 1;
 		break;
+	case RADEON_INFO_SI_BACKEND_ENABLED_MASK:
+		if (rdev->family >= CHIP_BONAIRE) {
+			*value = rdev->config.cik.backend_enable_mask;
+		} else if (rdev->family >= CHIP_TAHITI) {
+			*value = rdev->config.si.backend_enable_mask;
+		} else {
+			DRM_DEBUG_KMS("BACKEND_ENABLED_MASK is si+ only!\n");
+		}
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 3eed9a1e44e..85e1edfaa3b 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2855,6 +2855,8 @@ static void si_setup_rb(struct radeon_device *rdev,
 		mask <<= 1;
 	}
 
+	rdev->config.si.backend_enable_mask = enabled_rbs;
+
 	for (i = 0; i < se_num; i++) {
 		si_select_se_sh(rdev, i, 0xffffffff);
 		data = 0;
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 2f3f7ea8c77..fe421e8a431 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -983,6 +983,8 @@ struct drm_radeon_cs {
 #define RADEON_INFO_SI_CP_DMA_COMPUTE	0x17
 /* CIK macrotile mode array */
 #define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY	0x18
+/* query the number of render backends */
+#define RADEON_INFO_SI_BACKEND_ENABLED_MASK	0x19
 
 
 struct drm_radeon_info {
-- 
cgit v1.2.3-70-g09d2


From f60900f2609e893c7f8d0bccc7ada4947dac4cd5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 23 Dec 2013 18:49:30 +0100
Subject: auxvec.h: account for AT_HWCAP2 in AT_VECTOR_SIZE_BASE

Commit 2171364d1a92 ("powerpc: Add HWCAP2 aux entry") introduced a new
AT_ auxv entry type AT_HWCAP2 but failed to update AT_VECTOR_SIZE_BASE
accordingly.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: 2171364d1a92 (powerpc: Add HWCAP2 aux entry)
Cc: stable@vger.kernel.org
Acked-by: Michael Neuling <michael@neuling.org>
Cc: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/auxvec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 669fef5c745..3e0fbe44176 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -3,6 +3,6 @@
 
 #include <uapi/linux/auxvec.h>
 
-#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 20 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif /* _LINUX_AUXVEC_H */
-- 
cgit v1.2.3-70-g09d2


From 73409f3b0ff0ae7bc1f647936b23e6d5d5dcbe28 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 27 Dec 2013 13:04:33 -0500
Subject: net: Add some clarification to skb_tx_timestamp() comment.

We've seen so many instances of people invoking skb_tx_timestamp()
after the device already has been given the packet, that it's worth
being a little bit more verbose and explicit in this comment.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6aae8389052..6f69b3f914f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2531,6 +2531,10 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
  * Ethernet MAC Drivers should call this function in their hard_xmit()
  * function immediately before giving the sk_buff to the MAC hardware.
  *
+ * Specifically, one should make absolutely sure that this function is
+ * called before TX completion of this packet can trigger.  Otherwise
+ * the packet could potentially already be freed.
+ *
  * @skb: A socket buffer.
  */
 static inline void skb_tx_timestamp(struct sk_buff *skb)
-- 
cgit v1.2.3-70-g09d2


From f244d8b623dae7a7bc695b0336f67729b95a9736 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 31 Dec 2013 13:39:42 +0100
Subject: ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to
 hotplug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The changes in the ACPI-based PCI hotplug (ACPIPHP) subsystem made
during the 3.12 development cycle uncovered a problem with VGA
switcheroo that on some systems, when the device-specific method
(ATPX in the radeon case, _DSM in the nouveau case) is used to turn
off the discrete graphics, the BIOS generates ACPI hotplug events for
that device and those events cause ACPIPHP to attempt to remove the
device from the system (they are events for a device that was present
previously and is not present any more, so that's what should be done
according to the spec).  Then, the system stops functioning correctly.

Since the hotplug events in question were simply silently ignored
previously, the least intrusive way to address that problem is to
make ACPIPHP ignore them again.  For this purpose, introduce a new
ACPI device flag, no_hotplug, and modify ACPIPHP to ignore hotplug
events for PCI devices whose ACPI companions have that flag set.
Next, make the radeon and nouveau switcheroo detection code set the
no_hotplug flag for the discrete graphics' ACPI companion.

Fixes: bbd34fcdd1b2 (ACPI / hotplug / PCI: Register all devices under the given bridge)
References: https://bugzilla.kernel.org/show_bug.cgi?id=61891
References: https://bugzilla.kernel.org/show_bug.cgi?id=64891
Reported-and-tested-by: Mike Lothian <mike@fireburn.co.uk>
Reported-and-tested-by: <madcatx@atlas.cz>
Reported-and-tested-by: Joaquín Aramendía <samsagax@gmail.com>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 3.12+ <stable@vger.kernel.org> # 3.12+
---
 drivers/acpi/bus.c                           | 10 ++++++++++
 drivers/gpu/drm/nouveau/nouveau_acpi.c       | 16 ++++++++++++++--
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 16 ++++++++++++++--
 drivers/pci/hotplug/acpiphp_glue.c           | 26 +++++++++++++++++++++++---
 include/acpi/acpi_bus.h                      |  4 +++-
 5 files changed, 64 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index bba9b72e25f..0710004055c 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -156,6 +156,16 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data)
 }
 EXPORT_SYMBOL(acpi_bus_get_private_data);
 
+void acpi_bus_no_hotplug(acpi_handle handle)
+{
+	struct acpi_device *adev = NULL;
+
+	acpi_bus_get_device(handle, &adev);
+	if (adev)
+		adev->flags.no_hotplug = true;
+}
+EXPORT_SYMBOL_GPL(acpi_bus_no_hotplug);
+
 static void acpi_print_osc_error(acpi_handle handle,
 	struct acpi_osc_context *context, char *error)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 95c74045404..ba0183fb84f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -51,6 +51,7 @@ static struct nouveau_dsm_priv {
 	bool dsm_detected;
 	bool optimus_detected;
 	acpi_handle dhandle;
+	acpi_handle other_handle;
 	acpi_handle rom_handle;
 } nouveau_dsm_priv;
 
@@ -260,9 +261,10 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
 	if (!dhandle)
 		return false;
 
-	if (!acpi_has_method(dhandle, "_DSM"))
+	if (!acpi_has_method(dhandle, "_DSM")) {
+		nouveau_dsm_priv.other_handle = dhandle;
 		return false;
-
+	}
 	if (nouveau_test_dsm(dhandle, nouveau_dsm, NOUVEAU_DSM_POWER))
 		retval |= NOUVEAU_DSM_HAS_MUX;
 
@@ -338,6 +340,16 @@ static bool nouveau_dsm_detect(void)
 		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
 			acpi_method_name);
 		nouveau_dsm_priv.dsm_detected = true;
+		/*
+		 * On some systems hotplug events are generated for the device
+		 * being switched off when _DSM is executed.  They cause ACPI
+		 * hotplug to trigger and attempt to remove the device from
+		 * the system, which causes it to break down.  Prevent that from
+		 * happening by setting the no_hotplug flag for the involved
+		 * ACPI device objects.
+		 */
+		acpi_bus_no_hotplug(nouveau_dsm_priv.dhandle);
+		acpi_bus_no_hotplug(nouveau_dsm_priv.other_handle);
 		ret = true;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 9d302eaeea1..485848f889f 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -33,6 +33,7 @@ static struct radeon_atpx_priv {
 	bool atpx_detected;
 	/* handle for device - and atpx */
 	acpi_handle dhandle;
+	acpi_handle other_handle;
 	struct radeon_atpx atpx;
 } radeon_atpx_priv;
 
@@ -451,9 +452,10 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 		return false;
 
 	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status)) {
+		radeon_atpx_priv.other_handle = dhandle;
 		return false;
-
+	}
 	radeon_atpx_priv.dhandle = dhandle;
 	radeon_atpx_priv.atpx.handle = atpx_handle;
 	return true;
@@ -530,6 +532,16 @@ static bool radeon_atpx_detect(void)
 		printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		radeon_atpx_priv.atpx_detected = true;
+		/*
+		 * On some systems hotplug events are generated for the device
+		 * being switched off when ATPX is executed.  They cause ACPI
+		 * hotplug to trigger and attempt to remove the device from
+		 * the system, which causes it to break down.  Prevent that from
+		 * happening by setting the no_hotplug flag for the involved
+		 * ACPI device objects.
+		 */
+		acpi_bus_no_hotplug(radeon_atpx_priv.dhandle);
+		acpi_bus_no_hotplug(radeon_atpx_priv.other_handle);
 		return true;
 	}
 	return false;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 438a4d09958..e86439283a5 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -645,6 +645,24 @@ static void disable_slot(struct acpiphp_slot *slot)
 	slot->flags &= (~SLOT_ENABLED);
 }
 
+static bool acpiphp_no_hotplug(acpi_handle handle)
+{
+	struct acpi_device *adev = NULL;
+
+	acpi_bus_get_device(handle, &adev);
+	return adev && adev->flags.no_hotplug;
+}
+
+static bool slot_no_hotplug(struct acpiphp_slot *slot)
+{
+	struct acpiphp_func *func;
+
+	list_for_each_entry(func, &slot->funcs, sibling)
+		if (acpiphp_no_hotplug(func_to_handle(func)))
+			return true;
+
+	return false;
+}
 
 /**
  * get_slot_status - get ACPI slot status
@@ -703,7 +721,8 @@ static void trim_stale_devices(struct pci_dev *dev)
 		unsigned long long sta;
 
 		status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
-		alive = ACPI_SUCCESS(status) && sta == ACPI_STA_ALL;
+		alive = (ACPI_SUCCESS(status) && sta == ACPI_STA_ALL)
+			|| acpiphp_no_hotplug(handle);
 	}
 	if (!alive) {
 		u32 v;
@@ -743,8 +762,9 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 		struct pci_dev *dev, *tmp;
 
 		mutex_lock(&slot->crit_sect);
-		/* wake up all functions */
-		if (get_slot_status(slot) == ACPI_STA_ALL) {
+		if (slot_no_hotplug(slot)) {
+			; /* do nothing */
+		} else if (get_slot_status(slot) == ACPI_STA_ALL) {
 			/* remove stale devices if any */
 			list_for_each_entry_safe(dev, tmp, &bus->devices,
 						 bus_list)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 7b2de026a4f..ca49ebd231c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -168,7 +168,8 @@ struct acpi_device_flags {
 	u32 ejectable:1;
 	u32 power_manageable:1;
 	u32 match_driver:1;
-	u32 reserved:27;
+	u32 no_hotplug:1;
+	u32 reserved:26;
 };
 
 /* File System */
@@ -343,6 +344,7 @@ extern struct kobject *acpi_kobj;
 extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int);
 void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
+void acpi_bus_no_hotplug(acpi_handle handle);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
 extern int unregister_acpi_notifier(struct notifier_block *);
-- 
cgit v1.2.3-70-g09d2


From 2205369a314e12fcec4781cc73ac9c08fc2b47de Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 31 Dec 2013 16:23:35 -0500
Subject: vlan: Fix header ops passthru when doing TX VLAN offload.

When the vlan code detects that the real device can do TX VLAN offloads
in hardware, it tries to arrange for the real device's header_ops to
be invoked directly.

But it does so illegally, by simply hooking the real device's
header_ops up to the VLAN device.

This doesn't work because we will end up invoking a set of header_ops
routines which expect a device type which matches the real device, but
will see a VLAN device instead.

Fix this by providing a pass-thru set of header_ops which will arrange
to pass the proper real device instead.

To facilitate this add a dev_rebuild_header().  There are
implementations which provide a ->cache and ->create but not a
->rebuild (f.e. PLIP).  So we need a helper function just like
dev_hard_header() to avoid crashes.

Use this helper in the one existing place where the
header_ops->rebuild was being invoked, the neighbour code.

With lots of help from Florian Westphal.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 +++++++++
 net/8021q/vlan_dev.c      | 19 ++++++++++++++++++-
 net/core/neighbour.c      |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d9a550bf3e8..7514b9c37a3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1912,6 +1912,15 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 	return dev->header_ops->parse(skb, haddr);
 }
 
+static inline int dev_rebuild_header(struct sk_buff *skb)
+{
+	const struct net_device *dev = skb->dev;
+
+	if (!dev->header_ops || !dev->header_ops->rebuild)
+		return 0;
+	return dev->header_ops->rebuild(skb);
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 762896ebfcf..47c908f1f62 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -530,6 +530,23 @@ static const struct header_ops vlan_header_ops = {
 	.parse	 = eth_header_parse,
 };
 
+static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev,
+				     unsigned short type,
+				     const void *daddr, const void *saddr,
+				     unsigned int len)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+	struct net_device *real_dev = vlan->real_dev;
+
+	return dev_hard_header(skb, real_dev, type, daddr, saddr, len);
+}
+
+static const struct header_ops vlan_passthru_header_ops = {
+	.create	 = vlan_passthru_hard_header,
+	.rebuild = dev_rebuild_header,
+	.parse	 = eth_header_parse,
+};
+
 static struct device_type vlan_type = {
 	.name	= "vlan",
 };
@@ -573,7 +590,7 @@ static int vlan_dev_init(struct net_device *dev)
 
 	dev->needed_headroom = real_dev->needed_headroom;
 	if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
-		dev->header_ops      = real_dev->header_ops;
+		dev->header_ops      = &vlan_passthru_header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
 	} else {
 		dev->header_ops      = &vlan_header_ops;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 36b1443f9ae..932c6d7cf66 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1275,7 +1275,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
 
 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
 			    skb->len) < 0 &&
-	    dev->header_ops->rebuild(skb))
+	    dev_rebuild_header(skb))
 		return 0;
 
 	return dev_queue_xmit(skb);
-- 
cgit v1.2.3-70-g09d2


From 7e0309631ecf0cd16edba72ff74747fa1b96ead3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 1 Jan 2014 23:04:25 +0100
Subject: net: llc: fix order of evaluation in llc_conn_ac_inc_vr_by_1

Function llc_conn_ac_inc_vr_by_1() evaluates via macro
PDU_GET_NEXT_Vr() into ...

  llc_sk(sk)->vR = ++llc_sk(sk)->vR & 0xffffffffffffff7f

... but the order in which the side effects take place is
undefined because there is no intervening sequence point.

As llc_sk(sk)->vR is written in llc_sk(sk)->vR (assignment
left-hand side) and written in ++llc_sk(sk)->vR & 0xffffffffffffff7f
this might possibly yield undefined behavior.

The final value of llc_sk(sk)->vR is ambiguous, because,
depending on the order of expression evaluation, the
increment may occur before, after, or interleaved with
the assignment. In C, evaluating such an expression yields
undefined behavior.

Since we're doing the increment via PDU_GET_NEXT_Vr() macro
and the only place it is being used is from
llc_conn_ac_inc_vr_by_1(), in order to increment vR by 1
with a follow-up optimized modulo, rewrite the expression
into ((vR + 1) & CONST) in order to fix this.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_pdu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 31e2de7d57c..c0f0a13ed81 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -142,7 +142,7 @@
 #define LLC_S_PF_IS_1(pdu)     ((pdu->ctrl_2 & LLC_S_PF_BIT_MASK) ? 1 : 0)
 
 #define PDU_SUPV_GET_Nr(pdu)   ((pdu->ctrl_2 & 0xFE) >> 1)
-#define PDU_GET_NEXT_Vr(sn)    (++sn & ~LLC_2_SEQ_NBR_MODULO)
+#define PDU_GET_NEXT_Vr(sn)    (((sn) + 1) & ~LLC_2_SEQ_NBR_MODULO)
 
 /* FRMR information field macros */
 
-- 
cgit v1.2.3-70-g09d2


From 619a60ee04be33238721a15c1f9704a2a515a33e Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Thu, 2 Jan 2014 14:39:44 -0500
Subject: sctp: Remove outqueue empty state

The SCTP outqueue structure maintains a data chunks
that are pending transmission, the list of chunks that
are pending a retransmission and a length of data in
flight.  It also tries to keep the emtpy state so that
it can performe shutdown sequence or notify user.

The problem is that the empy state is inconsistently
tracked.  It is possible to completely drain the queue
without sending anything when using PR-SCTP.  In this
case, the empty state will not be correctly state as
report by Jamal Hadi Salim <jhs@mojatatu.com>.  This
can cause an association to be perminantly stuck in the
SHUTDOWN_PENDING state.

Additionally, SCTP is incredibly inefficient when setting
the empty state.  Even though all the data is availaible
in the outqueue structure, we ignore it and walk a list
of trasnports.

In the end, we can completely remove the extra empty
state and figure out if the queue is empty by looking
at 3 things:  length of pending data, length of in-flight
data, and exisiting of retransmit data.  All of these
are already in the strucutre.

Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 ---
 net/sctp/outqueue.c        | 32 +++++++-------------------------
 2 files changed, 7 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 67b5d006827..0a248b323d8 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1046,9 +1046,6 @@ struct sctp_outq {
 
 	/* Corked? */
 	char cork;
-
-	/* Is this structure empty?  */
-	char empty;
 };
 
 void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index f51ba985a36..59268f6e2c3 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,8 +208,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->retransmit);
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
-
-	q->empty = 1;
 }
 
 /* Free the outqueue structure and any related pending chunks.
@@ -332,7 +330,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
 			else
 				SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
-			q->empty = 0;
 			break;
 		}
 	} else {
@@ -654,7 +651,6 @@ redo:
 			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
 				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
-			q->empty = 0;
 			q->asoc->stats.rtxchunks++;
 			break;
 		}
@@ -1065,8 +1061,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 
 			sctp_transport_reset_timers(transport);
 
-			q->empty = 0;
-
 			/* Only let one DATA chunk get bundled with a
 			 * COOKIE-ECHO chunk.
 			 */
@@ -1275,29 +1269,17 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 		 "advertised peer ack point:0x%x\n", __func__, asoc, ctsn,
 		 asoc->adv_peer_ack_point);
 
-	/* See if all chunks are acked.
-	 * Make sure the empty queue handler will get run later.
-	 */
-	q->empty = (list_empty(&q->out_chunk_list) &&
-		    list_empty(&q->retransmit));
-	if (!q->empty)
-		goto finish;
-
-	list_for_each_entry(transport, transport_list, transports) {
-		q->empty = q->empty && list_empty(&transport->transmitted);
-		if (!q->empty)
-			goto finish;
-	}
-
-	pr_debug("%s: sack queue is empty\n", __func__);
-finish:
-	return q->empty;
+	return sctp_outq_is_empty(q);
 }
 
-/* Is the outqueue empty?  */
+/* Is the outqueue empty?
+ * The queue is empty when we have not pending data, no in-flight data
+ * and nothing pending retransmissions.
+ */
 int sctp_outq_is_empty(const struct sctp_outq *q)
 {
-	return q->empty;
+	return q->out_qlen == 0 && q->outstanding_bytes == 0 &&
+	       list_empty(&q->retransmit);
 }
 
 /********************************************************************
-- 
cgit v1.2.3-70-g09d2


From 7a7ffbabf99445704be01bff5d7e360da908cf8e Mon Sep 17 00:00:00 2001
From: Wei-Chun Chao <weichunc@plumgrid.com>
Date: Thu, 26 Dec 2013 13:10:22 -0800
Subject: ipv4: fix tunneled VM traffic over hw VXLAN/GRE GSO NIC

VM to VM GSO traffic is broken if it goes through VXLAN or GRE
tunnel and the physical NIC on the host supports hardware VXLAN/GRE
GSO offload (e.g. bnx2x and next-gen mlx4).

Two issues -
(VXLAN) VM traffic has SKB_GSO_DODGY and SKB_GSO_UDP_TUNNEL with
SKB_GSO_TCP/UDP set depending on the inner protocol. GSO header
integrity check fails in udp4_ufo_fragment if inner protocol is
TCP. Also gso_segs is calculated incorrectly using skb->len that
includes tunnel header. Fix: robust check should only be applied
to the inner packet.

(VXLAN & GRE) Once GSO header integrity check passes, NULL segs
is returned and the original skb is sent to hardware. However the
tunnel header is already pulled. Fix: tunnel header needs to be
restored so that hardware can perform GSO properly on the original
packet.

Signed-off-by: Wei-Chun Chao <weichunc@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 13 +++++++++++++
 net/ipv4/gre_offload.c    | 11 +++++++----
 net/ipv4/udp.c            |  6 +++++-
 net/ipv4/udp_offload.c    | 37 +++++++++++++++++++------------------
 4 files changed, 44 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7514b9c37a3..5faaadb0c74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3017,6 +3017,19 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
+					int pulled_hlen, u16 mac_offset,
+					int mac_len)
+{
+	skb->protocol = protocol;
+	skb->encapsulation = 1;
+	skb_push(skb, pulled_hlen);
+	skb_reset_transport_header(skb);
+	skb->mac_header = mac_offset;
+	skb->network_header = skb->mac_header + mac_len;
+	skb->mac_len = mac_len;
+}
+
 static inline bool netif_is_macvlan(struct net_device *dev)
 {
 	return dev->priv_flags & IFF_MACVLAN;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index e5d43618846..2cd02f32f99 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -28,6 +28,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	netdev_features_t enc_features;
 	int ghl = GRE_HEADER_SECTION;
 	struct gre_base_hdr *greh;
+	u16 mac_offset = skb->mac_header;
 	int mac_len = skb->mac_len;
 	__be16 protocol = skb->protocol;
 	int tnl_hlen;
@@ -58,13 +59,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	} else
 		csum = false;
 
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+
 	/* setup inner skb. */
 	skb->protocol = greh->protocol;
 	skb->encapsulation = 0;
 
-	if (unlikely(!pskb_may_pull(skb, ghl)))
-		goto out;
-
 	__skb_pull(skb, ghl);
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
@@ -73,8 +74,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
 	segs = skb_mac_gso_segment(skb, enc_features);
-	if (!segs || IS_ERR(segs))
+	if (!segs || IS_ERR(segs)) {
+		skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
 		goto out;
+	}
 
 	skb = segs;
 	tnl_hlen = skb_tnl_header_len(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f140048334c..a7e4729e974 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	u16 mac_offset = skb->mac_header;
 	int mac_len = skb->mac_len;
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
 	__be16 protocol = skb->protocol;
@@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
 	segs = skb_mac_gso_segment(skb, enc_features);
-	if (!segs || IS_ERR(segs))
+	if (!segs || IS_ERR(segs)) {
+		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+				     mac_len);
 		goto out;
+	}
 
 	outer_hlen = skb_tnl_header_len(skb);
 	skb = segs;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 83206de2bc7..79c62bdcd3c 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
+	int offset;
+	__wsum csum;
+
+	if (skb->encapsulation &&
+	    skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+		segs = skb_udp_tunnel_segment(skb, features);
+		goto out;
+	}
 
 	mss = skb_shinfo(skb)->gso_size;
 	if (unlikely(skb->len <= mss))
@@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
+	/* Do software UFO. Complete and fill in the UDP checksum as
+	 * HW cannot do checksum of UDP packets sent as multiple
+	 * IP fragments.
+	 */
+	offset = skb_checksum_start_offset(skb);
+	csum = skb_checksum(skb, offset, skb->len - offset, 0);
+	offset += skb->csum_offset;
+	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+	skb->ip_summed = CHECKSUM_NONE;
+
 	/* Fragment the skb. IP headers of the fragments are updated in
 	 * inet_gso_segment()
 	 */
-	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
-		segs = skb_udp_tunnel_segment(skb, features);
-	else {
-		int offset;
-		__wsum csum;
-
-		/* Do software UFO. Complete and fill in the UDP checksum as
-		 * HW cannot do checksum of UDP packets sent as multiple
-		 * IP fragments.
-		 */
-		offset = skb_checksum_start_offset(skb);
-		csum = skb_checksum(skb, offset, skb->len - offset, 0);
-		offset += skb->csum_offset;
-		*(__sum16 *)(skb->data + offset) = csum_fold(csum);
-		skb->ip_summed = CHECKSUM_NONE;
-
-		segs = skb_segment(skb, features);
-	}
+	segs = skb_segment(skb, features);
 out:
 	return segs;
 }
-- 
cgit v1.2.3-70-g09d2


From f663dd9aaf9ed124f25f0f8452edf238f087ad50 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 10 Jan 2014 16:18:26 +0800
Subject: net: core: explicitly select a txq before doing l2 forwarding

Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The
will cause several issues:

- NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan
  instead of lower device which misses the necessary txq synchronization for
  lower device such as txq stopping or frozen required by dev watchdog or
  control path.
- dev_hard_start_xmit() was called with NULL txq which bypasses the net device
  watchdog.
- dev_hard_start_xmit() does not check txq everywhere which will lead a crash
  when tso is disabled for lower device.

Fix this by explicitly introducing a new param for .ndo_select_queue() for just
selecting queues in the case of l2 forwarding offload. netdev_pick_tx() was also
extended to accept this parameter and dev_queue_xmit_accel() was used to do l2
forwarding transmission.

With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need
to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep
a dedicated ndo_dfwd_start_xmit() and we can just reuse the code of
dev_queue_xmit() to do the transmission.

In the future, it was also required for macvtap l2 forwarding support since it
provides a necessary synchronization method.

Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: e1000-devel@lists.sourceforge.net
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                 |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   | 33 ++++++++++---------------
 drivers/net/ethernet/lantiq_etop.c              |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c      |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |  3 ++-
 drivers/net/ethernet/tile/tilegx.c              |  3 ++-
 drivers/net/macvlan.c                           |  9 +++----
 drivers/net/team/team.c                         |  3 ++-
 drivers/net/tun.c                               |  3 ++-
 drivers/net/wireless/mwifiex/main.c             |  3 ++-
 drivers/staging/bcm/Bcmnet.c                    |  3 ++-
 drivers/staging/netlogic/xlr_net.c              |  3 ++-
 drivers/staging/rtl8188eu/os_dep/os_intfs.c     |  3 ++-
 include/linux/netdevice.h                       | 12 ++++++---
 net/core/dev.c                                  | 29 +++++++++++++---------
 net/core/flow_dissector.c                       | 10 +++++---
 net/core/netpoll.c                              |  2 +-
 net/mac80211/iface.c                            |  6 +++--
 net/sched/sch_generic.c                         |  2 +-
 21 files changed, 80 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 398e299ee1b..4b8c58b0ec2 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3732,7 +3732,8 @@ static inline int bond_slave_override(struct bonding *bond,
 }
 
 
-static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
+			     void *accel_priv)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 550412088dd..bf811565ee2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1833,7 +1833,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 		bnx2x_napi_disable_cnic(bp);
 }
 
-u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
+u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index da8fcaa7449..41f3ca5ad97 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -524,7 +524,8 @@ int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
 int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
 
 /* select_queue callback */
-u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb);
+u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index cc06854296a..5bcc870f836 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6827,12 +6827,20 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
 	return __ixgbe_maybe_stop_tx(tx_ring, size);
 }
 
-#ifdef IXGBE_FCOE
-static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
+			      void *accel_priv)
 {
+	struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
+#ifdef IXGBE_FCOE
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
 	int txq;
+#endif
+
+	if (fwd_adapter)
+		return skb->queue_mapping + fwd_adapter->tx_base_queue;
+
+#ifdef IXGBE_FCOE
 
 	/*
 	 * only execute the code below if protocol is FCoE
@@ -6858,9 +6866,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb)
 		txq -= f->indices;
 
 	return txq + f->offset;
+#else
+	return __netdev_pick_tx(dev, skb);
+#endif
 }
 
-#endif
 netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 			  struct ixgbe_adapter *adapter,
 			  struct ixgbe_ring *tx_ring)
@@ -7629,27 +7639,11 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	kfree(fwd_adapter);
 }
 
-static netdev_tx_t ixgbe_fwd_xmit(struct sk_buff *skb,
-				  struct net_device *dev,
-				  void *priv)
-{
-	struct ixgbe_fwd_adapter *fwd_adapter = priv;
-	unsigned int queue;
-	struct ixgbe_ring *tx_ring;
-
-	queue = skb->queue_mapping + fwd_adapter->tx_base_queue;
-	tx_ring = fwd_adapter->real_adapter->tx_ring[queue];
-
-	return __ixgbe_xmit_frame(skb, dev, tx_ring);
-}
-
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
 	.ndo_start_xmit		= ixgbe_xmit_frame,
-#ifdef IXGBE_FCOE
 	.ndo_select_queue	= ixgbe_select_queue,
-#endif
 	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ixgbe_set_mac,
@@ -7689,7 +7683,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 	.ndo_dfwd_add_station	= ixgbe_fwd_add,
 	.ndo_dfwd_del_station	= ixgbe_fwd_del,
-	.ndo_dfwd_start_xmit	= ixgbe_fwd_xmit,
 };
 
 /**
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 6a6c1f76d8e..ec94a20d709 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -619,7 +619,8 @@ ltq_etop_set_multicast_list(struct net_device *dev)
 }
 
 static u16
-ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb)
+ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
+		      void *accel_priv)
 {
 	/* we are currently only using the first queue */
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index f54ebd5a170..a7fcd593b2d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -592,7 +592,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk
 	}
 }
 
-u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb)
+u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
+			 void *accel_priv)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f3758de59c0..d5758adceaa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -714,7 +714,8 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
-u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb);
+u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
+			 void *accel_priv);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 628b736e5ae..0e9fb3301b1 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -2080,7 +2080,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
 }
 
 /* Return subqueue id on this core (one per core). */
-static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb,
+				 void *accel_priv)
 {
 	return smp_processor_id();
 }
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5360f73c981..bc8faaec33f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -299,7 +299,7 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 
 	if (vlan->fwd_priv) {
 		skb->dev = vlan->lowerdev;
-		ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
+		ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
 	} else {
 		ret = macvlan_queue_xmit(skb, dev);
 	}
@@ -365,10 +365,8 @@ static int macvlan_open(struct net_device *dev)
 		 */
 		if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
 			vlan->fwd_priv = NULL;
-		} else {
-			dev->features &= ~NETIF_F_LLTX;
+		} else
 			return 0;
-		}
 	}
 
 	err = -EBUSY;
@@ -702,8 +700,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
 	features = netdev_increment_features(vlan->lowerdev->features,
 					     features,
 					     mask);
-	if (!vlan->fwd_priv)
-		features |= NETIF_F_LLTX;
+	features |= NETIF_F_LLTX;
 
 	return features;
 }
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 736050d6b45..b75ae5bde67 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1647,7 +1647,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
+			     void *accel_priv)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7c8343a4f91..ecec8029c5e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -348,7 +348,8 @@ unlock:
  * different rxq no. here. If we could not get rxhash, then we would
  * hope the rxq no. may help here.
  */
-static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    void *accel_priv)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_flow_entry *e;
diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index 78e8a6666cc..8bb8988c435 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -746,7 +746,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 }
 
 static u16
-mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb)
+mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
+				void *accel_priv)
 {
 	skb->priority = cfg80211_classify8021d(skb);
 	return mwifiex_1d_to_wmm_queue[skb->priority];
diff --git a/drivers/staging/bcm/Bcmnet.c b/drivers/staging/bcm/Bcmnet.c
index 53fee2f9a49..8dfdd2732bd 100644
--- a/drivers/staging/bcm/Bcmnet.c
+++ b/drivers/staging/bcm/Bcmnet.c
@@ -39,7 +39,8 @@ static INT bcm_close(struct net_device *dev)
 	return 0;
 }
 
-static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    void *accel_priv)
 {
 	return ClassifyPacket(netdev_priv(dev), skb);
 }
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index 235d2b1ec59..eedffed17e3 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -306,7 +306,8 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb)
+static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
+				void *accel_priv)
 {
 	return (u16)smp_processor_id();
 }
diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index 17659bb04be..dd69e344e40 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
@@ -652,7 +652,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 	return dscp >> 5;
 }
 
-static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    void *accel_priv)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5faaadb0c74..ce2a1f5f9a1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -769,7 +769,8 @@ struct netdev_phys_port_id {
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
- * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb);
+ * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
+ *                         void *accel_priv);
  *	Called to decide which queue to when device supports multiple
  *	transmit queues.
  *
@@ -990,7 +991,8 @@ struct net_device_ops {
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
 	u16			(*ndo_select_queue)(struct net_device *dev,
-						    struct sk_buff *skb);
+						    struct sk_buff *skb,
+						    void *accel_priv);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
@@ -1529,7 +1531,8 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 }
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb);
+				    struct sk_buff *skb,
+				    void *accel_priv);
 u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
@@ -1819,6 +1822,7 @@ int dev_close(struct net_device *dev);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
+int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
 void unregister_netdevice_many(struct list_head *head);
@@ -2426,7 +2430,7 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq, void *accel_priv);
+			struct netdev_queue *txq);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fc17221545..0ce469e5ec8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2539,7 +2539,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq, void *accel_priv)
+			struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
@@ -2605,13 +2605,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			dev_queue_xmit_nit(skb, dev);
 
 		skb_len = skb->len;
-		if (accel_priv)
-			rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
-		else
 			rc = ops->ndo_start_xmit(skb, dev);
 
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK && txq)
+		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
 		return rc;
 	}
@@ -2627,10 +2624,7 @@ gso:
 			dev_queue_xmit_nit(nskb, dev);
 
 		skb_len = nskb->len;
-		if (accel_priv)
-			rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
-		else
-			rc = ops->ndo_start_xmit(nskb, dev);
+		rc = ops->ndo_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
@@ -2811,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-int dev_queue_xmit(struct sk_buff *skb)
+int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -2827,7 +2821,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 	skb_update_prio(skb);
 
-	txq = netdev_pick_tx(dev, skb);
+	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -2863,7 +2857,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
-				rc = dev_hard_start_xmit(skb, dev, txq, NULL);
+				rc = dev_hard_start_xmit(skb, dev, txq);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
@@ -2892,8 +2886,19 @@ out:
 	rcu_read_unlock_bh();
 	return rc;
 }
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+	return __dev_queue_xmit(skb, NULL);
+}
 EXPORT_SYMBOL(dev_queue_xmit);
 
+int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+{
+	return __dev_queue_xmit(skb, accel_priv);
+}
+EXPORT_SYMBOL(dev_queue_xmit_accel);
+
 
 /*=======================================================================
 			Receiver routines
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d6ef1732250..2fc5beaf578 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -395,17 +395,21 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 EXPORT_SYMBOL(__netdev_pick_tx);
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb)
+				    struct sk_buff *skb,
+				    void *accel_priv)
 {
 	int queue_index = 0;
 
 	if (dev->real_num_tx_queues != 1) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb);
+			queue_index = ops->ndo_select_queue(dev, skb,
+							    accel_priv);
 		else
 			queue_index = __netdev_pick_tx(dev, skb);
-		queue_index = dev_cap_txqueue(dev, queue_index);
+
+		if (!accel_priv)
+			queue_index = dev_cap_txqueue(dev, queue_index);
 	}
 
 	skb_set_queue_mapping(skb, queue_index);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 30309787463..19fe9c717ce 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -375,7 +375,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
 
-		txq = netdev_pick_tx(dev, skb);
+		txq = netdev_pick_tx(dev, skb, NULL);
 
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 36c3a4cbcab..a0757913046 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1061,7 +1061,8 @@ static void ieee80211_uninit(struct net_device *dev)
 }
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
-					 struct sk_buff *skb)
+					 struct sk_buff *skb,
+					 void *accel_priv)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
 }
@@ -1078,7 +1079,8 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 };
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
-					  struct sk_buff *skb)
+					  struct sk_buff *skb,
+					  void *accel_priv)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 922a09406ba..7fc899a943a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -126,7 +126,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))
-		ret = dev_hard_start_xmit(skb, dev, txq, NULL);
+		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
 
-- 
cgit v1.2.3-70-g09d2