14 files changed, 466 insertions, 158 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2bb78..fbb87cf138c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -653,6 +653,21 @@ config DEBUG_NOTIFIERS
 	  This is a relatively cheap check but if you care about maximum
 	  performance, say N.
 
+config DEBUG_CREDENTIALS
+	bool "Debug credential management"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on some debug checking for credential
+	  management.  The additional code keeps track of the number of
+	  pointers from task_structs to any given cred struct, and checks to
+	  see that this number never exceeds the usage count of the cred
+	  struct.
+
+	  Furthermore, if SELinux is enabled, this also checks that the
+	  security pointer in the cred struct is never seen to be invalid.
+
+	  If unsure, say N.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # it is preferred to always offer frame pointers as a config
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf0..2e78277eff9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index c5e72556241..8bee16ec752 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -13,6 +13,7 @@
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <asm/atomic.h>
 
 /*
@@ -52,6 +53,7 @@ long long atomic64_read(const atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_read);
 
 void atomic64_set(atomic64_t *v, long long i)
 {
@@ -62,6 +64,7 @@ void atomic64_set(atomic64_t *v, long long i)
 	v->counter = i;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_set);
 
 void atomic64_add(long long a, atomic64_t *v)
 {
@@ -72,6 +75,7 @@ void atomic64_add(long long a, atomic64_t *v)
 	v->counter += a;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_add);
 
 long long atomic64_add_return(long long a, atomic64_t *v)
 {
@@ -84,6 +88,7 @@ long long atomic64_add_return(long long a, atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_add_return);
 
 void atomic64_sub(long long a, atomic64_t *v)
 {
@@ -94,6 +99,7 @@ void atomic64_sub(long long a, atomic64_t *v)
 	v->counter -= a;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_sub);
 
 long long atomic64_sub_return(long long a, atomic64_t *v)
 {
@@ -106,6 +112,7 @@ long long atomic64_sub_return(long long a, atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_sub_return);
 
 long long atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -120,6 +127,7 @@ long long atomic64_dec_if_positive(atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_dec_if_positive);
 
 long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 {
@@ -134,6 +142,7 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_cmpxchg);
 
 long long atomic64_xchg(atomic64_t *v, long long new)
 {
@@ -147,6 +156,7 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_xchg);
 
 int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
@@ -162,6 +172,7 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 	spin_unlock_irqrestore(lock, flags);
 	return ret;
 }
+EXPORT_SYMBOL(atomic64_add_unless);
 
 static int init_atomic64_lock(void)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 35a1f7ff414..702565821c9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst,
 }
 EXPORT_SYMBOL(__bitmap_shift_left);
 
-void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
@@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_xor);
 
-void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & ~bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 708e2a86d87..600f473a561 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -45,12 +45,14 @@
 */
 
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/bunzip2.h>
-#endif /* !STATIC */
+#include <linux/slab.h>
+#endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #ifndef INT_MAX
 #define INT_MAX 0x7fffffff
@@ -681,9 +683,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	set_error_fn(error_fn);
 	if (flush)
 		outbuf = malloc(BZIP2_IOBUF_SIZE);
-	else
-		len -= 4; /* Uncompressed size hack active in pre-boot
-			     environment */
+
 	if (!outbuf) {
 		error("Could not allocate output bufer");
 		return -1;
@@ -733,4 +733,14 @@ exit_0:
 	return i;
 }
 
-#define decompress bunzip2
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int len,
+			int(*fill)(void*, unsigned int),
+			int(*flush)(void*, unsigned int),
+			unsigned char *outbuf,
+			int *pos,
+			void(*error_fn)(char *x))
+{
+	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
+}
+#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index e36b296fc9f..68dfce59c1b 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,13 +19,13 @@
 #include "zlib_inflate/inflate.h"
 
 #include "zlib_inflate/infutil.h"
+#include <linux/slab.h>
 
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
-#define INBUF_LEN (16*1024)
+#define GZIP_IOBUF_SIZE (16*1024)
 
 /* Included from initramfs et al code */
 STATIC int INIT gunzip(unsigned char *buf, int len,
@@ -55,7 +55,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	if (buf)
 		zbuf = buf;
 	else {
-		zbuf = malloc(INBUF_LEN);
+		zbuf = malloc(GZIP_IOBUF_SIZE);
 		len = 0;
 	}
 	if (!zbuf) {
@@ -77,7 +77,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	}
 
 	if (len == 0)
-		len = fill(zbuf, INBUF_LEN);
+		len = fill(zbuf, GZIP_IOBUF_SIZE);
 
 	/* verify the gzip header */
 	if (len < 10 ||
@@ -113,7 +113,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	while (rc == Z_OK) {
 		if (strm->avail_in == 0) {
 			/* TODO: handle case where both pos and fill are set */
-			len = fill(zbuf, INBUF_LEN);
+			len = fill(zbuf, GZIP_IOBUF_SIZE);
 			if (len < 0) {
 				rc = -1;
 				error("read error");
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32123a1340e..0b954e04bd3 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -29,12 +29,14 @@
  *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/unlzma.h>
+#include <linux/slab.h>
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #define	MIN(a, b) (((a) < (b)) ? (a) : (b))
 
@@ -543,9 +545,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
 	int ret = -1;
 
 	set_error_fn(error_fn);
-	if (!flush)
-		in_len -= 4; /* Uncompressed size hack active in pre-boot
-				environment */
+
 	if (buf)
 		inbuf = buf;
 	else
@@ -645,4 +645,15 @@ exit_0:
 	return ret;
 }
 
-#define decompress unlzma
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int in_len,
+			      int(*fill)(void*, unsigned int),
+			      int(*flush)(void*, unsigned int),
+			      unsigned char *output,
+			      int *posp,
+			      void(*error_fn)(char *x)
+	)
+{
+	return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
+}
+#endif
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 3b93129a968..58a9f9fc609 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -156,9 +156,13 @@ static bool driver_filter(struct device *dev)
 		return true;
 
 	/* driver filter on and initialized */
-	if (current_driver && dev->driver == current_driver)
+	if (current_driver && dev && dev->driver == current_driver)
 		return true;
 
+	/* driver filter on, but we can't filter on a NULL device... */
+	if (!dev)
+		return false;
+
 	if (current_driver || !current_driver_name[0])
 		return false;
 
@@ -183,17 +187,17 @@ static bool driver_filter(struct device *dev)
 	return ret;
 }
 
-#define err_printk(dev, entry, format, arg...) do {		\
-		error_count += 1;				\
-		if (driver_filter(dev) &&			\
-		    (show_all_errors || show_num_errors > 0)) {	\
-			WARN(1, "%s %s: " format,		\
-			     dev_driver_string(dev),		\
-			     dev_name(dev) , ## arg);		\
-			dump_entry_trace(entry);		\
-		}						\
-		if (!show_all_errors && show_num_errors > 0)	\
-			show_num_errors -= 1;			\
+#define err_printk(dev, entry, format, arg...) do {			\
+		error_count += 1;					\
+		if (driver_filter(dev) &&				\
+		    (show_all_errors || show_num_errors > 0)) {		\
+			WARN(1, "%s %s: " format,			\
+			     dev ? dev_driver_string(dev) : "NULL",	\
+			     dev ? dev_name(dev) : "NULL", ## arg);	\
+			dump_entry_trace(entry);			\
+		}							\
+		if (!show_all_errors && show_num_errors > 0)		\
+			show_num_errors -= 1;				\
 	} while (0);
 
 /*
@@ -716,7 +720,7 @@ void dma_debug_init(u32 num_entries)
 
 	for (i = 0; i < HASH_SIZE; ++i) {
 		INIT_LIST_HEAD(&dma_entry_hash[i].list);
-		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
+		spin_lock_init(&dma_entry_hash[i].lock);
 	}
 
 	if (dma_debug_fs_init() != 0) {
@@ -856,22 +860,21 @@ static void check_for_stack(struct device *dev, void *addr)
 				"stack [addr=%p]\n", addr);
 }
 
-static inline bool overlap(void *addr, u64 size, void *start, void *end)
+static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
 {
-	void *addr2 = (char *)addr + size;
+	unsigned long a1 = (unsigned long)addr;
+	unsigned long b1 = a1 + len;
+	unsigned long a2 = (unsigned long)start;
+	unsigned long b2 = (unsigned long)end;
 
-	return ((addr >= start && addr < end) ||
-		(addr2 >= start && addr2 < end) ||
-		((addr < start) && (addr2 >= end)));
+	return !(b1 <= a2 || a1 >= b2);
 }
 
-static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
+static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
 {
-	if (overlap(addr, size, _text, _etext) ||
-	    overlap(addr, size, __start_rodata, __end_rodata))
-		err_printk(dev, NULL, "DMA-API: device driver maps "
-				"memory from kernel text or rodata "
-				"[addr=%p] [size=%llu]\n", addr, size);
+	if (overlap(addr, len, _text, _etext) ||
+	    overlap(addr, len, __start_rodata, __end_rodata))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
 }
 
 static void check_sync(struct device *dev,
@@ -969,7 +972,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 		entry->type = dma_debug_single;
 
 	if (!PageHighMem(page)) {
-		void *addr = ((char *)page_address(page)) + offset;
+		void *addr = page_address(page) + offset;
+
 		check_for_stack(dev, addr);
 		check_for_illegal_area(dev, addr, size);
 	}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 833139ce1e2..e22c148e4b7 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -164,7 +164,7 @@ static void ddebug_change(const struct ddebug_query *query,
 
 			if (!newflags)
 				dt->num_enabled--;
-			else if (!dp-flags)
+			else if (!dp->flags)
 				dt->num_enabled++;
 			dp->flags = newflags;
 			if (newflags) {
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 00000000000..7baed2fc3bc
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,268 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+	char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+static inline int __elements_per_part(int element_size)
+{
+	return FLEX_ARRAY_PART_SIZE / element_size;
+}
+
+static inline int bytes_left_in_base(void)
+{
+	int element_offset = offsetof(struct flex_array, parts);
+	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
+	return bytes_left;
+}
+
+static inline int nr_base_part_ptrs(void)
+{
+	return bytes_left_in_base() / sizeof(struct flex_array_part *);
+}
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+	int data_size = fa->element_size * fa->total_nr_elements;
+	if (data_size <= bytes_left_in_base())
+		return 1;
+	return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size:	the size of individual elements in the array
+ * @total:		total number of elements that this should hold
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures.  If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities.  Note that the maximum
+ * index that the get/put() functions is just nr_objects-1.   This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k |  32-bit |  64-bit |
+ * ---------------------------------|
+ *      1 bytes | 4186112 | 2093056 |
+ *      2 bytes | 2093056 | 1046528 |
+ *      3 bytes | 1395030 |  697515 |
+ *      4 bytes | 1046528 |  523264 |
+ *     32 bytes |  130816 |   65408 |
+ *     33 bytes |  126728 |   63364 |
+ *   2048 bytes |    2044 |    1022 |
+ *   2049 bytes |    1022 |     511 |
+ *       void * | 1046528 |  261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure.  Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, unsigned int total,
+					gfp_t flags)
+{
+	struct flex_array *ret;
+	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+
+	/* max_size will end up 0 if element_size > PAGE_SIZE */
+	if (total > max_size)
+		return NULL;
+	ret = kzalloc(sizeof(struct flex_array), flags);
+	if (!ret)
+		return NULL;
+	ret->element_size = element_size;
+	ret->total_nr_elements = total;
+	return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa,
+					unsigned int element_nr)
+{
+	return element_nr / __elements_per_part(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+	int part_nr;
+	int max_part = nr_base_part_ptrs();
+
+	if (elements_fit_in_base(fa))
+		return;
+	for (part_nr = 0; part_nr < max_part; part_nr++)
+		kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+	flex_array_free_parts(fa);
+	kfree(fa);
+}
+
+static unsigned int index_inside_part(struct flex_array *fa,
+					unsigned int element_nr)
+{
+	unsigned int part_offset;
+
+	part_offset = element_nr % __elements_per_part(fa->element_size);
+	return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+	struct flex_array_part *part = fa->parts[part_nr];
+	if (!part) {
+		/*
+		 * This leaves the part pages uninitialized
+		 * and with potentially random data, just
+		 * as if the user had kmalloc()'d the whole.
+		 * __GFP_ZERO can be used to zero it.
+		 */
+		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		if (!part)
+			return NULL;
+		fa->parts[part_nr] = part;
+	}
+	return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * Note that this *copies* the contents of @src into
+ * the array.  If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
+			gfp_t flags)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memcpy(dst, src, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @start:	index of first array element for which space is allocated
+ * @end:	index of last (inclusive) element for which space is allocated
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory.  It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, unsigned int start,
+			unsigned int end, gfp_t flags)
+{
+	int start_part;
+	int end_part;
+	int part_nr;
+	struct flex_array_part *part;
+
+	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		return 0;
+	start_part = fa_element_to_part_nr(fa, start);
+	end_part = fa_element_to_part_nr(fa, end);
+	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr.  Note
+ * that this is a copy of the data that was passed in.  If you
+ * are using this to store pointers, you'll get back &ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return NULL;
+	}
+	return &part->elements[index_inside_part(fa, element_nr)];
+}
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index f1ed2fe76c6..bd2bea96336 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -12,34 +12,47 @@
 
 #include <linux/sched.h>
 
-/**
- * is_single_threaded - Determine if a thread group is single-threaded or not
- * @p: A task in the thread group in question
- *
- * This returns true if the thread group to which a task belongs is single
- * threaded, false if it is not.
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
  */
-bool is_single_threaded(struct task_struct *p)
+bool current_is_single_threaded(void)
 {
-	struct task_struct *g, *t;
-	struct mm_struct *mm = p->mm;
+	struct task_struct *task = current;
+	struct mm_struct *mm = task->mm;
+	struct task_struct *p, *t;
+	bool ret;
 
-	if (atomic_read(&p->signal->count) != 1)
-		goto no;
+	if (atomic_read(&task->signal->live) != 1)
+		return false;
 
-	if (atomic_read(&p->mm->mm_users) != 1) {
-		read_lock(&tasklist_lock);
-		do_each_thread(g, t) {
-			if (t->mm == mm && t != p)
-				goto no_unlock;
-		} while_each_thread(g, t);
-		read_unlock(&tasklist_lock);
-	}
+	if (atomic_read(&mm->mm_users) == 1)
+		return true;
 
-	return true;
+	ret = false;
+	rcu_read_lock();
+	for_each_process(p) {
+		if (unlikely(p->flags & PF_KTHREAD))
+			continue;
+		if (unlikely(p == task->group_leader))
+			continue;
+
+		t = p;
+		do {
+			if (unlikely(t->mm == mm))
+				goto found;
+			if (likely(t->mm))
+				break;
+			/*
+			 * t->mm == NULL. Make sure next_thread/next_task
+			 * will see other CLONE_VM tasks which might be
+			 * forked before exiting.
+			 */
+			smp_rmb();
+		} while_each_thread(p, t);
+	}
+	ret = true;
+found:
+	rcu_read_unlock();
 
-no_unlock:
-	read_unlock(&tasklist_lock);
-no:
-	return false;
+	return ret;
 }
diff --git a/lib/lmb.c b/lib/lmb.c
index e4a6482d8b2..0343c05609f 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -429,7 +429,7 @@ u64 __init lmb_phys_mem_size(void)
 	return lmb.memory.size;
 }
 
-u64 __init lmb_end_of_DRAM(void)
+u64 lmb_end_of_DRAM(void)
 {
 	int idx = lmb.memory.cnt - 1;
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a295e404e90..0d475d8167b 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -314,6 +314,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
 	miter->__sg = sgl;
 	miter->__nents = nents;
 	miter->__offset = 0;
+	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
 	miter->__flags = flags;
 }
 EXPORT_SYMBOL(sg_miter_start);
@@ -394,6 +395,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 	if (miter->addr) {
 		miter->__offset += miter->consumed;
 
+		if (miter->__flags & SG_MITER_TO_SG)
+			flush_kernel_dcache_page(miter->page);
+
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
 			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
@@ -426,8 +430,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
 	unsigned long flags;
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+
+	if (to_buffer)
+		sg_flags |= SG_MITER_FROM_SG;
+	else
+		sg_flags |= SG_MITER_TO_SG;
 
-	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+	sg_miter_start(&miter, sgl, nents, sg_flags);
 
 	local_irq_save(flags);
 
@@ -438,10 +448,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 		if (to_buffer)
 			memcpy(buf + offset, miter.addr, len);
-		else {
+		else
 			memcpy(miter.addr, buf + offset, len);
-			flush_kernel_dcache_page(miter.page);
-		}
 
 		offset += len;
 	}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bffe6d7ef9d..ac25cd28e80 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -114,46 +114,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	return alloc_bootmem_low_pages(size);
-}
-
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
-dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return paddr;
-}
-
-phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return baddr;
-}
-
+/* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
-	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
-}
-
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
-					       dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
+	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
 static void swiotlb_print_info(unsigned long bytes)
@@ -189,7 +154,7 @@ swiotlb_init_with_default_size(size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
+	io_tlb_start = alloc_bootmem_low_pages(bytes);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + bytes;
@@ -245,7 +210,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
+		io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+							order);
 		if (io_tlb_start)
 			break;
 		order--;
@@ -315,20 +281,10 @@ cleanup1:
 	return -ENOMEM;
 }
 
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
-}
-
-static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
-}
-
-static int is_swiotlb_buffer(char *addr)
-{
-	return addr >= io_tlb_start && addr < io_tlb_end;
+	return paddr >= virt_to_phys(io_tlb_start) &&
+		paddr < virt_to_phys(io_tlb_end);
 }
 
 /*
@@ -561,9 +517,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret &&
-	    !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
-				   size)) {
+	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 */
@@ -585,7 +539,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
 
 	/* Confirm address can be DMA'd by device */
-	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
+	if (dev_addr + size > dma_mask) {
 		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
 		       (unsigned long long)dma_mask,
 		       (unsigned long long)dev_addr);
@@ -601,11 +555,13 @@ EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
 void
 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dma_handle)
+		      dma_addr_t dev_addr)
 {
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
 	WARN_ON(irqs_disabled());
-	if (!is_swiotlb_buffer(vaddr))
-		free_pages((unsigned long) vaddr, get_order(size));
+	if (!is_swiotlb_buffer(paddr))
+		free_pages((unsigned long)vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
 		do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
@@ -625,12 +581,15 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
 	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
 	       "device %s\n", size, dev ? dev_name(dev) : "?");
 
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Memory would be corrupted\n");
-		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Random memory would be DMAed\n");
-	}
+	if (size <= io_tlb_overflow || !do_panic)
+		return;
+
+	if (dir == DMA_BIDIRECTIONAL)
+		panic("DMA: Random memory could be DMA accessed\n");
+	if (dir == DMA_FROM_DEVICE)
+		panic("DMA: Random memory could be DMA written\n");
+	if (dir == DMA_TO_DEVICE)
+		panic("DMA: Random memory could be DMA read\n");
 }
 
 /*
@@ -646,7 +605,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
+	dma_addr_t dev_addr = phys_to_dma(dev, phys);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -655,8 +614,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(phys, size))
+	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
@@ -673,7 +631,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(dev, dev_addr, size))
+	if (!dma_capable(dev, dev_addr, size))
 		panic("map_single: bounce buffer is not DMA'ble");
 
 	return dev_addr;
@@ -691,19 +649,25 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (is_swiotlb_buffer(dma_addr)) {
-		do_unmap_single(hwdev, dma_addr, size, dir);
+	if (is_swiotlb_buffer(paddr)) {
+		do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
 		return;
 	}
 
 	if (dir != DMA_FROM_DEVICE)
 		return;
 
-	dma_mark_clean(dma_addr, size);
+	/*
+	 * phys_to_virt doesn't work with hihgmem page but we could
+	 * call dma_mark_clean() with hihgmem page here. However, we
+	 * are fine since dma_mark_clean() is null on POWERPC. We can
+	 * make dma_mark_clean() take a physical address if necessary.
+	 */
+	dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
@@ -728,19 +692,19 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (is_swiotlb_buffer(dma_addr)) {
-		sync_single(hwdev, dma_addr, size, dir, target);
+	if (is_swiotlb_buffer(paddr)) {
+		sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
 		return;
 	}
 
 	if (dir != DMA_FROM_DEVICE)
 		return;
 
-	dma_mark_clean(dma_addr, size);
+	dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void
@@ -817,10 +781,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
+		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
 
-		if (range_needs_mapping(paddr, sg->length) ||
-		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
+		if (swiotlb_force ||
+		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, sg_phys(sg),
 					       sg->length, dir);
 			if (!map) {