12 files changed, 492 insertions, 209 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 0ee67e08ad3..3a55a43c43e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -201,6 +201,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
        bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
        depends on EXPERIMENTAL && BROKEN
 
+config CPU_RMAP
+	bool
+	depends on SMP
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
@@ -217,6 +221,13 @@ config LRU_CACHE
 	tristate
 
 config AVERAGE
-	bool
+	bool "Averaging functions"
+	help
+	  This option is provided for the case where no in-kernel-tree
+	  modules require averaging functions, but a module built outside
+	  the kernel tree does. Such modules that use library averaging
+	  functions require Y here.
+
+	  If unsure, say N.
 
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3967c2356e3..6f440d82b58 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -470,15 +470,6 @@ config DEBUG_MUTEXES
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
-config BKL
-	bool "Big Kernel Lock" if (SMP || PREEMPT)
-	default y
-	help
-	  This is the traditional lock that is used in old code instead
-	  of proper locking. All drivers that use the BKL should depend
-	  on this symbol.
-	  Say Y here unless you are working on removing the BKL.
-
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -805,7 +796,7 @@ config ARCH_WANT_FRAME_POINTERS
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
 	depends on DEBUG_KERNEL && \
-		(CRIS || M68K || M68KNOMMU || FRV || UML || \
+		(CRIS || M68K || FRV || UML || \
 		 AVR32 || SUPERH || BLACKFIN || MN10300) || \
 		ARCH_WANT_FRAME_POINTERS
 	default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
diff --git a/lib/Makefile b/lib/Makefile
index cbb774f7d41..ef7ed71a6ff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,7 +43,6 @@ obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
-obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -110,6 +109,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
 obj-$(CONFIG_AVERAGE) += average.o
 
+obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
new file mode 100644
index 00000000000..987acfafeb8
--- /dev/null
+++ b/lib/cpu_rmap.c
@@ -0,0 +1,269 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpu_rmap.h>
+#ifdef CONFIG_GENERIC_HARDIRQS
+#include <linux/interrupt.h>
+#endif
+#include <linux/module.h>
+
+/*
+ * These functions maintain a mapping from CPUs to some ordered set of
+ * objects with CPU affinities.  This can be seen as a reverse-map of
+ * CPU affinity.  However, we do not assume that the object affinities
+ * cover all CPUs in the system.  For those CPUs not directly covered
+ * by object affinities, we attempt to find a nearest object based on
+ * CPU topology.
+ */
+
+/**
+ * alloc_cpu_rmap - allocate CPU affinity reverse-map
+ * @size: Number of objects to be mapped
+ * @flags: Allocation flags e.g. %GFP_KERNEL
+ */
+struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
+{
+	struct cpu_rmap *rmap;
+	unsigned int cpu;
+	size_t obj_offset;
+
+	/* This is a silly number of objects, and we use u16 indices. */
+	if (size > 0xffff)
+		return NULL;
+
+	/* Offset of object pointer array from base structure */
+	obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
+			   sizeof(void *));
+
+	rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
+	if (!rmap)
+		return NULL;
+
+	rmap->obj = (void **)((char *)rmap + obj_offset);
+
+	/* Initially assign CPUs to objects on a rota, since we have
+	 * no idea where the objects are.  Use infinite distance, so
+	 * any object with known distance is preferable.  Include the
+	 * CPUs that are not present/online, since we definitely want
+	 * any newly-hotplugged CPUs to have some object assigned.
+	 */
+	for_each_possible_cpu(cpu) {
+		rmap->near[cpu].index = cpu % size;
+		rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+	}
+
+	rmap->size = size;
+	return rmap;
+}
+EXPORT_SYMBOL(alloc_cpu_rmap);
+
+/* Reevaluate nearest object for given CPU, comparing with the given
+ * neighbours at the given distance.
+ */
+static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
+				const struct cpumask *mask, u16 dist)
+{
+	int neigh;
+
+	for_each_cpu(neigh, mask) {
+		if (rmap->near[cpu].dist > dist &&
+		    rmap->near[neigh].dist <= dist) {
+			rmap->near[cpu].index = rmap->near[neigh].index;
+			rmap->near[cpu].dist = dist;
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef DEBUG
+static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+	unsigned index;
+	unsigned int cpu;
+
+	pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
+
+	for_each_possible_cpu(cpu) {
+		index = rmap->near[cpu].index;
+		pr_info("cpu %d -> obj %u (distance %u)\n",
+			cpu, index, rmap->near[cpu].dist);
+	}
+}
+#else
+static inline void
+debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+}
+#endif
+
+/**
+ * cpu_rmap_add - add object to a rmap
+ * @rmap: CPU rmap allocated with alloc_cpu_rmap()
+ * @obj: Object to add to rmap
+ *
+ * Return index of object.
+ */
+int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
+{
+	u16 index;
+
+	BUG_ON(rmap->used >= rmap->size);
+	index = rmap->used++;
+	rmap->obj[index] = obj;
+	return index;
+}
+EXPORT_SYMBOL(cpu_rmap_add);
+
+/**
+ * cpu_rmap_update - update CPU rmap following a change of object affinity
+ * @rmap: CPU rmap to update
+ * @index: Index of object whose affinity changed
+ * @affinity: New CPU affinity of object
+ */
+int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+		    const struct cpumask *affinity)
+{
+	cpumask_var_t update_mask;
+	unsigned int cpu;
+
+	if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* Invalidate distance for all CPUs for which this used to be
+	 * the nearest object.  Mark those CPUs for update.
+	 */
+	for_each_online_cpu(cpu) {
+		if (rmap->near[cpu].index == index) {
+			rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+			cpumask_set_cpu(cpu, update_mask);
+		}
+	}
+
+	debug_print_rmap(rmap, "after invalidating old distances");
+
+	/* Set distance to 0 for all CPUs in the new affinity mask.
+	 * Mark all CPUs within their NUMA nodes for update.
+	 */
+	for_each_cpu(cpu, affinity) {
+		rmap->near[cpu].index = index;
+		rmap->near[cpu].dist = 0;
+		cpumask_or(update_mask, update_mask,
+			   cpumask_of_node(cpu_to_node(cpu)));
+	}
+
+	debug_print_rmap(rmap, "after updating neighbours");
+
+	/* Update distances based on topology */
+	for_each_cpu(cpu, update_mask) {
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_thread_cpumask(cpu), 1))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_core_cpumask(cpu), 2))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					cpumask_of_node(cpu_to_node(cpu)), 3))
+			continue;
+		/* We could continue into NUMA node distances, but for now
+		 * we give up.
+		 */
+	}
+
+	debug_print_rmap(rmap, "after copying neighbours");
+
+	free_cpumask_var(update_mask);
+	return 0;
+}
+EXPORT_SYMBOL(cpu_rmap_update);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/* Glue between IRQ affinity notifiers and CPU rmaps */
+
+struct irq_glue {
+	struct irq_affinity_notify notify;
+	struct cpu_rmap *rmap;
+	u16 index;
+};
+
+/**
+ * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
+ * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
+ *
+ * Must be called in process context, before freeing the IRQs, and
+ * without holding any locks required by global workqueue items.
+ */
+void free_irq_cpu_rmap(struct cpu_rmap *rmap)
+{
+	struct irq_glue *glue;
+	u16 index;
+
+	if (!rmap)
+		return;
+
+	for (index = 0; index < rmap->used; index++) {
+		glue = rmap->obj[index];
+		irq_set_affinity_notifier(glue->notify.irq, NULL);
+	}
+	irq_run_affinity_notifiers();
+
+	kfree(rmap);
+}
+EXPORT_SYMBOL(free_irq_cpu_rmap);
+
+static void
+irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
+{
+	struct irq_glue *glue =
+		container_of(notify, struct irq_glue, notify);
+	int rc;
+
+	rc = cpu_rmap_update(glue->rmap, glue->index, mask);
+	if (rc)
+		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
+}
+
+static void irq_cpu_rmap_release(struct kref *ref)
+{
+	struct irq_glue *glue =
+		container_of(ref, struct irq_glue, notify.kref);
+	kfree(glue);
+}
+
+/**
+ * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
+ * @rmap: The reverse-map
+ * @irq: The IRQ number
+ *
+ * This adds an IRQ affinity notifier that will update the reverse-map
+ * automatically.
+ *
+ * Must be called in process context, after the IRQ is allocated but
+ * before it is bound with request_irq().
+ */
+int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
+{
+	struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
+	int rc;
+
+	if (!glue)
+		return -ENOMEM;
+	glue->notify.notify = irq_cpu_rmap_notify;
+	glue->notify.release = irq_cpu_rmap_release;
+	glue->rmap = rmap;
+	glue->index = cpu_rmap_add(rmap, glue);
+	rc = irq_set_affinity_notifier(irq, &glue->notify);
+	if (rc)
+		kfree(glue);
+	return rc;
+}
+EXPORT_SYMBOL(irq_cpu_rmap_add);
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e..9d86e45086f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
 
 static void debug_print_object(struct debug_obj *obj, char *msg)
 {
+	struct debug_obj_descr *descr = obj->descr;
 	static int limit;
 
-	if (limit < 5 && obj->descr != descr_test) {
+	if (limit < 5 && descr != descr_test) {
+		void *hint = descr->debug_hint ?
+			descr->debug_hint(obj->object) : NULL;
 		limit++;
 		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
-				 "object type: %s\n",
+				 "object type: %s hint: %pS\n",
 			msg, obj_states[obj->state], obj->astate,
-			obj->descr->name);
+			descr->name, hint);
 	}
 	debug_objects_warnings++;
 }
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index b335acb43be..75ca78f3a8c 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2008 Jason Baron <jbaron@redhat.com>
  * By Greg Banks <gnb@melbourne.sgi.com>
  * Copyright (c) 2008 Silicon Graphics Inc.  All Rights Reserved.
+ * Copyright (C) 2011 Bart Van Assche.  All Rights Reserved.
  */
 
 #include <linux/kernel.h>
@@ -27,6 +28,8 @@
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 #include <linux/jump_label.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
@@ -63,15 +66,25 @@ static inline const char *basename(const char *path)
 	return tail ? tail+1 : path;
 }
 
+static struct { unsigned flag:8; char opt_char; } opt_array[] = {
+	{ _DPRINTK_FLAGS_PRINT, 'p' },
+	{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
+	{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
+	{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
+	{ _DPRINTK_FLAGS_INCL_TID, 't' },
+};
+
 /* format a string into buf[] which describes the _ddebug's flags */
 static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 				    size_t maxlen)
 {
 	char *p = buf;
+	int i;
 
 	BUG_ON(maxlen < 4);
-	if (dp->flags & _DPRINTK_FLAGS_PRINT)
-		*p++ = 'p';
+	for (i = 0; i < ARRAY_SIZE(opt_array); ++i)
+		if (dp->flags & opt_array[i].flag)
+			*p++ = opt_array[i].opt_char;
 	if (p == buf)
 		*p++ = '-';
 	*p = '\0';
@@ -343,7 +356,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 			       unsigned int *maskp)
 {
 	unsigned flags = 0;
-	int op = '=';
+	int op = '=', i;
 
 	switch (*str) {
 	case '+':
@@ -358,13 +371,14 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		printk(KERN_INFO "%s: op='%c'\n", __func__, op);
 
 	for ( ; *str ; ++str) {
-		switch (*str) {
-		case 'p':
-			flags |= _DPRINTK_FLAGS_PRINT;
-			break;
-		default:
-			return -EINVAL;
+		for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) {
+			if (*str == opt_array[i].opt_char) {
+				flags |= opt_array[i].flag;
+				break;
+			}
 		}
+		if (i < 0)
+			return -EINVAL;
 	}
 	if (flags == 0)
 		return -EINVAL;
@@ -413,6 +427,35 @@ static int ddebug_exec_query(char *query_string)
 	return 0;
 }
 
+int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
+{
+	va_list args;
+	int res;
+
+	BUG_ON(!descriptor);
+	BUG_ON(!fmt);
+
+	va_start(args, fmt);
+	res = printk(KERN_DEBUG);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_TID) {
+		if (in_interrupt())
+			res += printk(KERN_CONT "<intr> ");
+		else
+			res += printk(KERN_CONT "[%d] ", task_pid_vnr(current));
+	}
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_MODNAME)
+		res += printk(KERN_CONT "%s:", descriptor->modname);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_FUNCNAME)
+		res += printk(KERN_CONT "%s:", descriptor->function);
+	if (descriptor->flags & _DPRINTK_FLAGS_INCL_LINENO)
+		res += printk(KERN_CONT "%d ", descriptor->lineno);
+	res += vprintk(fmt, args);
+	va_end(args);
+
+	return res;
+}
+EXPORT_SYMBOL(__dynamic_pr_debug);
+
 static __initdata char ddebug_setup_string[1024];
 static __init int ddebug_setup_query(char *str)
 {
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
deleted file mode 100644
index b135d04aa48..00000000000
--- a/lib/kernel_lock.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * lib/kernel_lock.c
- *
- * This is the traditional BKL - big kernel lock. Largely
- * relegated to obsolescence, but used by various less
- * important (or lazy) subsystems.
- */
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/semaphore.h>
-#include <linux/smp_lock.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/bkl.h>
-
-/*
- * The 'big kernel lock'
- *
- * This spinlock is taken and released recursively by lock_kernel()
- * and unlock_kernel().  It is transparently dropped and reacquired
- * over schedule().  It is used to protect legacy code that hasn't
- * been migrated to a proper locking design yet.
- *
- * Don't use in new code.
- */
-static  __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
-
-
-/*
- * Acquire/release the underlying lock from the scheduler.
- *
- * This is called with preemption disabled, and should
- * return an error value if it cannot get the lock and
- * TIF_NEED_RESCHED gets set.
- *
- * If it successfully gets the lock, it should increment
- * the preemption count like any spinlock does.
- *
- * (This works on UP too - do_raw_spin_trylock will never
- * return false in that case)
- */
-int __lockfunc __reacquire_kernel_lock(void)
-{
-	while (!do_raw_spin_trylock(&kernel_flag)) {
-		if (need_resched())
-			return -EAGAIN;
-		cpu_relax();
-	}
-	preempt_disable();
-	return 0;
-}
-
-void __lockfunc __release_kernel_lock(void)
-{
-	do_raw_spin_unlock(&kernel_flag);
-	preempt_enable_no_resched();
-}
-
-/*
- * These are the BKL spinlocks - we try to be polite about preemption.
- * If SMP is not on (ie UP preemption), this all goes away because the
- * do_raw_spin_trylock() will always succeed.
- */
-#ifdef CONFIG_PREEMPT
-static inline void __lock_kernel(void)
-{
-	preempt_disable();
-	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
-		/*
-		 * If preemption was disabled even before this
-		 * was called, there's nothing we can be polite
-		 * about - just spin.
-		 */
-		if (preempt_count() > 1) {
-			do_raw_spin_lock(&kernel_flag);
-			return;
-		}
-
-		/*
-		 * Otherwise, let's wait for the kernel lock
-		 * with preemption enabled..
-		 */
-		do {
-			preempt_enable();
-			while (raw_spin_is_locked(&kernel_flag))
-				cpu_relax();
-			preempt_disable();
-		} while (!do_raw_spin_trylock(&kernel_flag));
-	}
-}
-
-#else
-
-/*
- * Non-preemption case - just get the spinlock
- */
-static inline void __lock_kernel(void)
-{
-	do_raw_spin_lock(&kernel_flag);
-}
-#endif
-
-static inline void __unlock_kernel(void)
-{
-	/*
-	 * the BKL is not covered by lockdep, so we open-code the
-	 * unlocking sequence (and thus avoid the dep-chain ops):
-	 */
-	do_raw_spin_unlock(&kernel_flag);
-	preempt_enable();
-}
-
-/*
- * Getting the big kernel lock.
- *
- * This cannot happen asynchronously, so we only need to
- * worry about other CPU's.
- */
-void __lockfunc _lock_kernel(const char *func, const char *file, int line)
-{
-	int depth = current->lock_depth + 1;
-
-	trace_lock_kernel(func, file, line);
-
-	if (likely(!depth)) {
-		might_sleep();
-		__lock_kernel();
-	}
-	current->lock_depth = depth;
-}
-
-void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
-{
-	BUG_ON(current->lock_depth < 0);
-	if (likely(--current->lock_depth < 0))
-		__unlock_kernel();
-
-	trace_unlock_kernel(func, file, line);
-}
-
-EXPORT_SYMBOL(_lock_kernel);
-EXPORT_SYMBOL(_unlock_kernel);
-
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 344c710d16c..b8029a5583f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -35,6 +35,31 @@ void __list_add(struct list_head *new,
 }
 EXPORT_SYMBOL(__list_add);
 
+void __list_del_entry(struct list_head *entry)
+{
+	struct list_head *prev, *next;
+
+	prev = entry->prev;
+	next = entry->next;
+
+	if (WARN(next == LIST_POISON1,
+		"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+		entry, LIST_POISON1) ||
+	    WARN(prev == LIST_POISON2,
+		"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+		entry, LIST_POISON2) ||
+	    WARN(prev->next != entry,
+		"list_del corruption. prev->next should be %p, "
+		"but was %p\n", entry, prev->next) ||
+	    WARN(next->prev != entry,
+		"list_del corruption. next->prev should be %p, "
+		"but was %p\n", entry, next->prev))
+		return;
+
+	__list_del(prev, next);
+}
+EXPORT_SYMBOL(__list_del_entry);
+
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
@@ -43,19 +68,7 @@ EXPORT_SYMBOL(__list_add);
  */
 void list_del(struct list_head *entry)
 {
-	WARN(entry->next == LIST_POISON1,
-		"list_del corruption, next is LIST_POISON1 (%p)\n",
-		LIST_POISON1);
-	WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
-		"list_del corruption, prev is LIST_POISON2 (%p)\n",
-		LIST_POISON2);
-	WARN(entry->prev->next != entry,
-		"list_del corruption. prev->next should be %p, "
-		"but was %p\n", entry, entry->prev->next);
-	WARN(entry->next->prev != entry,
-		"list_del corruption. next->prev should be %p, "
-		"but was %p\n", entry, entry->next->prev);
-	__list_del(entry->prev, entry->next);
+	__list_del_entry(entry);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
 }
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5021cbc3441..ac09f2226dc 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n)
 {
 	int i, len = 0;
 
-	for (i = 0; i < n; i++) {
+	for (i = 0; i < n; i++, p++) {
 		if (p->len)
 			len += nla_total_size(p->len);
 		else if (nla_attr_minlen[p->type])
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d919..0ae7e643172 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
 
 #ifdef CONFIG_DEBUG_PI_LIST
 
+static struct plist_head test_head;
+
 static void plist_check_prev_next(struct list_head *t, struct list_head *p,
 				  struct list_head *n)
 {
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
 
 static void plist_check_head(struct plist_head *head)
 {
-	WARN_ON(!head->rawlock && !head->spinlock);
+	WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
 	if (head->rawlock)
 		WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
 	if (head->spinlock)
 		WARN_ON_SMP(!spin_is_locked(head->spinlock));
-	plist_check_list(&head->prio_list);
+	if (!plist_head_empty(head))
+		plist_check_list(&plist_first(head)->prio_list);
 	plist_check_list(&head->node_list);
 }
 
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
  */
 void plist_add(struct plist_node *node, struct plist_head *head)
 {
-	struct plist_node *iter;
+	struct plist_node *first, *iter, *prev = NULL;
+	struct list_head *node_next = &head->node_list;
 
 	plist_check_head(head);
 	WARN_ON(!plist_node_empty(node));
+	WARN_ON(!list_empty(&node->prio_list));
+
+	if (plist_head_empty(head))
+		goto ins_node;
 
-	list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
-		if (node->prio < iter->prio)
-			goto lt_prio;
-		else if (node->prio == iter->prio) {
-			iter = list_entry(iter->plist.prio_list.next,
-					struct plist_node, plist.prio_list);
-			goto eq_prio;
+	first = iter = plist_first(head);
+
+	do {
+		if (node->prio < iter->prio) {
+			node_next = &iter->node_list;
+			break;
 		}
-	}
 
-lt_prio:
-	list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
-eq_prio:
-	list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+		prev = iter;
+		iter = list_entry(iter->prio_list.next,
+				struct plist_node, prio_list);
+	} while (iter != first);
+
+	if (!prev || prev->prio != node->prio)
+		list_add_tail(&node->prio_list, &iter->prio_list);
+ins_node:
+	list_add_tail(&node->node_list, node_next);
 
 	plist_check_head(head);
 }
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
 {
 	plist_check_head(head);
 
-	if (!list_empty(&node->plist.prio_list)) {
-		struct plist_node *next = plist_first(&node->plist);
+	if (!list_empty(&node->prio_list)) {
+		if (node->node_list.next != &head->node_list) {
+			struct plist_node *next;
+
+			next = list_entry(node->node_list.next,
+					struct plist_node, node_list);
 
-		list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
-		list_del_init(&node->plist.prio_list);
+			/* add the next plist_node into prio_list */
+			if (list_empty(&next->prio_list))
+				list_add(&next->prio_list, &node->prio_list);
+		}
+		list_del_init(&node->prio_list);
 	}
 
-	list_del_init(&node->plist.node_list);
+	list_del_init(&node->node_list);
 
 	plist_check_head(head);
 }
+
+#ifdef CONFIG_DEBUG_PI_LIST
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static struct plist_node __initdata test_node[241];
+
+static void __init plist_test_check(int nr_expect)
+{
+	struct plist_node *first, *prio_pos, *node_pos;
+
+	if (plist_head_empty(&test_head)) {
+		BUG_ON(nr_expect != 0);
+		return;
+	}
+
+	prio_pos = first = plist_first(&test_head);
+	plist_for_each(node_pos, &test_head) {
+		if (nr_expect-- < 0)
+			break;
+		if (node_pos == first)
+			continue;
+		if (node_pos->prio == prio_pos->prio) {
+			BUG_ON(!list_empty(&node_pos->prio_list));
+			continue;
+		}
+
+		BUG_ON(prio_pos->prio > node_pos->prio);
+		BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
+		prio_pos = node_pos;
+	}
+
+	BUG_ON(nr_expect != 0);
+	BUG_ON(prio_pos->prio_list.next != &first->prio_list);
+}
+
+static int  __init plist_test(void)
+{
+	int nr_expect = 0, i, loop;
+	unsigned int r = local_clock();
+
+	printk(KERN_INFO "start plist test\n");
+	plist_head_init(&test_head, NULL);
+	for (i = 0; i < ARRAY_SIZE(test_node); i++)
+		plist_node_init(test_node + i, 0);
+
+	for (loop = 0; loop < 1000; loop++) {
+		r = r * 193939 % 47629;
+		i = r % ARRAY_SIZE(test_node);
+		if (plist_node_empty(test_node + i)) {
+			r = r * 193939 % 47629;
+			test_node[i].prio = r % 99;
+			plist_add(test_node + i, &test_head);
+			nr_expect++;
+		} else {
+			plist_del(test_node + i, &test_head);
+			nr_expect--;
+		}
+		plist_test_check(nr_expect);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+		if (plist_node_empty(test_node + i))
+			continue;
+		plist_del(test_node + i, &test_head);
+		nr_expect--;
+		plist_test_check(nr_expect);
+	}
+
+	printk(KERN_INFO "end plist test\n");
+	return 0;
+}
+
+module_init(plist_test);
+
+#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf..aa7c3052261 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 /*
  * wait for the read lock to be granted
  */
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 {
 	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
 					-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 /*
  * wait for the write lock to be granted
  */
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
 	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
 					-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here
  */
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  * - caller incremented waiting part of count and discovered it still negative
  * - just wake up any readers at the front of the queue
  */
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c47bbe11b80..93ca08b8a45 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -686,8 +686,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (!dma_capable(dev, dev_addr, size))
-		panic("map_single: bounce buffer is not DMA'ble");
+	if (!dma_capable(dev, dev_addr, size)) {
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+	}
 
 	return dev_addr;
 }