16 files changed, 1897 insertions, 1113 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 75485e163ca..991c98bc4a3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -51,13 +51,6 @@ config PERCPU_RWSEM
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 
-config CMPXCHG_LOCKREF
-	def_bool y if ARCH_USE_CMPXCHG_LOCKREF
-	depends on SMP
-	depends on !GENERIC_LOCKBREAK
-	depends on !DEBUG_SPINLOCK
-	depends on !DEBUG_LOCK_ALLOC
-
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -329,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
 	boolean
 
+config ASSOCIATIVE_ARRAY
+	bool
+	help
+	  Generic associative array.  Can be searched and iterated over whilst
+	  it is being modified.  It is also reasonably quick to search and
+	  modify.  The algorithms are non-recursive, and the trees are highly
+	  capacious.
+
+	  See:
+
+		Documentation/assoc_array.txt
+
+	  for more information.
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index bb016e116ba..a459c31e8c6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o percpu-refcount.o percpu_ida.o
+	 earlycpio.o
 
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu_ida.o
+	 percpu-refcount.o percpu_ida.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
@@ -42,15 +42,12 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
-lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
-lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
 
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 00000000000..17edeaf1918
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+				       const struct assoc_array_ptr *stop,
+				       int (*iterator)(const void *leaf,
+						       void *iterator_data),
+				       void *iterator_data)
+{
+	const struct assoc_array_shortcut *shortcut;
+	const struct assoc_array_node *node;
+	const struct assoc_array_ptr *cursor, *ptr, *parent;
+	unsigned long has_meta;
+	int slot, ret;
+
+	cursor = root;
+
+begin_node:
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		/* Descend through a shortcut */
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		smp_read_barrier_depends();
+		cursor = ACCESS_ONCE(shortcut->next_node);
+	}
+
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+	slot = 0;
+
+	/* We perform two passes of each node.
+	 *
+	 * The first pass does all the leaves in this node.  This means we
+	 * don't miss any leaves if the node is split up by insertion whilst
+	 * we're iterating over the branches rooted here (we may, however, see
+	 * some leaves twice).
+	 */
+	has_meta = 0;
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		has_meta |= (unsigned long)ptr;
+		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+			/* We need a barrier between the read of the pointer
+			 * and dereferencing the pointer - but only if we are
+			 * actually going to dereference it.
+			 */
+			smp_read_barrier_depends();
+
+			/* Invoke the callback */
+			ret = iterator(assoc_array_ptr_to_leaf(ptr),
+				       iterator_data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* The second pass attends to all the metadata pointers.  If we follow
+	 * one of these we may find that we don't come back here, but rather go
+	 * back to a replacement node with the leaves in a different layout.
+	 *
+	 * We are guaranteed to make progress, however, as the slot number for
+	 * a particular portion of the key space cannot change - and we
+	 * continue at the back pointer + 1.
+	 */
+	if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+		goto finished_node;
+	slot = 0;
+
+continue_node:
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		if (assoc_array_ptr_is_meta(ptr)) {
+			cursor = ptr;
+			goto begin_node;
+		}
+	}
+
+finished_node:
+	/* Move up to the parent (may need to skip back over a shortcut) */
+	parent = ACCESS_ONCE(node->back_pointer);
+	slot = node->parent_slot;
+	if (parent == stop)
+		return 0;
+
+	if (assoc_array_ptr_is_shortcut(parent)) {
+		shortcut = assoc_array_ptr_to_shortcut(parent);
+		smp_read_barrier_depends();
+		cursor = parent;
+		parent = ACCESS_ONCE(shortcut->back_pointer);
+		slot = shortcut->parent_slot;
+		if (parent == stop)
+			return 0;
+	}
+
+	/* Ascend to next slot in parent node */
+	cursor = parent;
+	slot++;
+	goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+			int (*iterator)(const void *object,
+					void *iterator_data),
+			void *iterator_data)
+{
+	struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+	if (!root)
+		return 0;
+	return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+	assoc_array_walk_tree_empty,
+	assoc_array_walk_found_terminal_node,
+	assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+	struct {
+		struct assoc_array_node	*node;	/* Node in which leaf might be found */
+		int		level;
+		int		slot;
+	} terminal_node;
+	struct {
+		struct assoc_array_shortcut *shortcut;
+		int		level;
+		int		sc_level;
+		unsigned long	sc_segments;
+		unsigned long	dissimilarity;
+	} wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+		 const struct assoc_array_ops *ops,
+		 const void *index_key,
+		 struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *cursor, *ptr;
+	unsigned long sc_segments, dissimilarity;
+	unsigned long segments;
+	int level, sc_level, next_sc_level;
+	int slot;
+
+	pr_devel("-->%s()\n", __func__);
+
+	cursor = ACCESS_ONCE(array->root);
+	if (!cursor)
+		return assoc_array_walk_tree_empty;
+
+	level = 0;
+
+	/* Use segments from the key for the new leaf to navigate through the
+	 * internal tree, skipping through nodes and shortcuts that are on
+	 * route to the destination.  Eventually we'll come to a slot that is
+	 * either empty or contains a leaf at which point we've found a node in
+	 * which the leaf we're looking for might be found or into which it
+	 * should be inserted.
+	 */
+jumped:
+	segments = ops->get_key_chunk(index_key, level);
+	pr_devel("segments[%d]: %lx\n", level, segments);
+
+	if (assoc_array_ptr_is_shortcut(cursor))
+		goto follow_shortcut;
+
+consider_node:
+	node = assoc_array_ptr_to_node(cursor);
+	smp_read_barrier_depends();
+
+	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	slot &= ASSOC_ARRAY_FAN_MASK;
+	ptr = ACCESS_ONCE(node->slots[slot]);
+
+	pr_devel("consider slot %x [ix=%d type=%lu]\n",
+		 slot, level, (unsigned long)ptr & 3);
+
+	if (!assoc_array_ptr_is_meta(ptr)) {
+		/* The node doesn't have a node/shortcut pointer in the slot
+		 * corresponding to the index key that we have to follow.
+		 */
+		result->terminal_node.node = node;
+		result->terminal_node.level = level;
+		result->terminal_node.slot = slot;
+		pr_devel("<--%s() = terminal_node\n", __func__);
+		return assoc_array_walk_found_terminal_node;
+	}
+
+	if (assoc_array_ptr_is_node(ptr)) {
+		/* There is a pointer to a node in the slot corresponding to
+		 * this index key segment, so we need to follow it.
+		 */
+		cursor = ptr;
+		level += ASSOC_ARRAY_LEVEL_STEP;
+		if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+			goto consider_node;
+		goto jumped;
+	}
+
+	/* There is a shortcut in the slot corresponding to the index key
+	 * segment.  We follow the shortcut if its partial index key matches
+	 * this leaf's.  Otherwise we need to split the shortcut.
+	 */
+	cursor = ptr;
+follow_shortcut:
+	shortcut = assoc_array_ptr_to_shortcut(cursor);
+	smp_read_barrier_depends();
+	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+	BUG_ON(sc_level > shortcut->skip_to_level);
+
+	do {
+		/* Check the leaf against the shortcut's index key a word at a
+		 * time, trimming the final word (the shortcut stores the index
+		 * key completely from the root to the shortcut's target).
+		 */
+		if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+			segments = ops->get_key_chunk(index_key, sc_level);
+
+		sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+		dissimilarity = segments ^ sc_segments;
+
+		if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+			/* Trim segments that are beyond the shortcut */
+			int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+			dissimilarity &= ~(ULONG_MAX << shift);
+			next_sc_level = shortcut->skip_to_level;
+		} else {
+			next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+			next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		}
+
+		if (dissimilarity != 0) {
+			/* This shortcut points elsewhere */
+			result->wrong_shortcut.shortcut = shortcut;
+			result->wrong_shortcut.level = level;
+			result->wrong_shortcut.sc_level = sc_level;
+			result->wrong_shortcut.sc_segments = sc_segments;
+			result->wrong_shortcut.dissimilarity = dissimilarity;
+			return assoc_array_walk_found_wrong_shortcut;
+		}
+
+		sc_level = next_sc_level;
+	} while (sc_level < shortcut->skip_to_level);
+
+	/* The shortcut matches the leaf's index to this point. */
+	cursor = ACCESS_ONCE(shortcut->next_node);
+	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+		level = sc_level;
+		goto jumped;
+	} else {
+		level = sc_level;
+		goto consider_node;
+	}
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+		       const struct assoc_array_ops *ops,
+		       const void *index_key)
+{
+	struct assoc_array_walk_result result;
+	const struct assoc_array_node *node;
+	const struct assoc_array_ptr *ptr;
+	const void *leaf;
+	int slot;
+
+	if (assoc_array_walk(array, ops, index_key, &result) !=
+	    assoc_array_walk_found_terminal_node)
+		return NULL;
+
+	node = result.terminal_node.node;
+	smp_read_barrier_depends();
+
+	/* If the target key is available to us, it's has to be pointed to by
+	 * the terminal node.
+	 */
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = ACCESS_ONCE(node->slots[slot]);
+		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+			/* We need a barrier between the read of the pointer
+			 * and dereferencing the pointer - but only if we are
+			 * actually going to dereference it.
+			 */
+			leaf = assoc_array_ptr_to_leaf(ptr);
+			smp_read_barrier_depends();
+			if (ops->compare_object(leaf, index_key))
+				return (void *)leaf;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+					const struct assoc_array_ops *ops)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *cursor, *parent = NULL;
+	int slot = -1;
+
+	pr_devel("-->%s()\n", __func__);
+
+	cursor = root;
+	if (!cursor) {
+		pr_devel("empty\n");
+		return;
+	}
+
+move_to_meta:
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		/* Descend through a shortcut */
+		pr_devel("[%d] shortcut\n", slot);
+		BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		BUG_ON(shortcut->back_pointer != parent);
+		BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+		parent = cursor;
+		cursor = shortcut->next_node;
+		slot = -1;
+		BUG_ON(!assoc_array_ptr_is_node(cursor));
+	}
+
+	pr_devel("[%d] node\n", slot);
+	node = assoc_array_ptr_to_node(cursor);
+	BUG_ON(node->back_pointer != parent);
+	BUG_ON(slot != -1 && node->parent_slot != slot);
+	slot = 0;
+
+continue_node:
+	pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		struct assoc_array_ptr *ptr = node->slots[slot];
+		if (!ptr)
+			continue;
+		if (assoc_array_ptr_is_meta(ptr)) {
+			parent = cursor;
+			cursor = ptr;
+			goto move_to_meta;
+		}
+
+		if (ops) {
+			pr_devel("[%d] free leaf\n", slot);
+			ops->free_object(assoc_array_ptr_to_leaf(ptr));
+		}
+	}
+
+	parent = node->back_pointer;
+	slot = node->parent_slot;
+	pr_devel("free node\n");
+	kfree(node);
+	if (!parent)
+		return; /* Done */
+
+	/* Move back up to the parent (may need to free a shortcut on
+	 * the way up) */
+	if (assoc_array_ptr_is_shortcut(parent)) {
+		shortcut = assoc_array_ptr_to_shortcut(parent);
+		BUG_ON(shortcut->next_node != cursor);
+		cursor = parent;
+		parent = shortcut->back_pointer;
+		slot = shortcut->parent_slot;
+		pr_devel("free shortcut\n");
+		kfree(shortcut);
+		if (!parent)
+			return;
+
+		BUG_ON(!assoc_array_ptr_is_node(parent));
+	}
+
+	/* Ascend to next slot in parent node */
+	pr_devel("ascend to %p[%d]\n", parent, slot);
+	cursor = parent;
+	node = assoc_array_ptr_to_node(cursor);
+	slot++;
+	goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+			 const struct assoc_array_ops *ops)
+{
+	assoc_array_destroy_subtree(array->root, ops);
+	array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+	struct assoc_array_node *new_n0;
+
+	pr_devel("-->%s()\n", __func__);
+
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	edit->leaf_p = &new_n0->slots[0];
+	edit->adjust_count_on = new_n0;
+	edit->set[0].ptr = &edit->array->root;
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+	pr_devel("<--%s() = ok [no root]\n", __func__);
+	return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+						  const struct assoc_array_ops *ops,
+						  const void *index_key,
+						  struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut, *new_s0;
+	struct assoc_array_node *node, *new_n0, *new_n1, *side;
+	struct assoc_array_ptr *ptr;
+	unsigned long dissimilarity, base_seg, blank;
+	size_t keylen;
+	bool have_meta;
+	int level, diff;
+	int slot, next_slot, free_slot, i, j;
+
+	node	= result->terminal_node.node;
+	level	= result->terminal_node.level;
+	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* We arrived at a node which doesn't have an onward node or shortcut
+	 * pointer that we have to follow.  This means that (a) the leaf we
+	 * want must go here (either by insertion or replacement) or (b) we
+	 * need to split this node and insert in one of the fragments.
+	 */
+	free_slot = -1;
+
+	/* Firstly, we have to check the leaves in this node to see if there's
+	 * a matching one we should replace in place.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		if (!ptr) {
+			free_slot = i;
+			continue;
+		}
+		if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+			pr_devel("replace in slot %d\n", i);
+			edit->leaf_p = &node->slots[i];
+			edit->dead_leaf = node->slots[i];
+			pr_devel("<--%s() = ok [replace]\n", __func__);
+			return true;
+		}
+	}
+
+	/* If there is a free slot in this node then we can just insert the
+	 * leaf here.
+	 */
+	if (free_slot >= 0) {
+		pr_devel("insert in free slot %d\n", free_slot);
+		edit->leaf_p = &node->slots[free_slot];
+		edit->adjust_count_on = node;
+		pr_devel("<--%s() = ok [insert]\n", __func__);
+		return true;
+	}
+
+	/* The node has no spare slots - so we're either going to have to split
+	 * it or insert another node before it.
+	 *
+	 * Whatever, we're going to need at least two new nodes - so allocate
+	 * those now.  We may also need a new shortcut, but we deal with that
+	 * when we need it.
+	 */
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n1)
+		return false;
+	edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+	/* We need to find out how similar the leaves are. */
+	pr_devel("no spare slots\n");
+	have_meta = false;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		if (assoc_array_ptr_is_meta(ptr)) {
+			edit->segment_cache[i] = 0xff;
+			have_meta = true;
+			continue;
+		}
+		base_seg = ops->get_object_key_chunk(
+			assoc_array_ptr_to_leaf(ptr), level);
+		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	}
+
+	if (have_meta) {
+		pr_devel("have meta\n");
+		goto split_node;
+	}
+
+	/* The node contains only leaves */
+	dissimilarity = 0;
+	base_seg = edit->segment_cache[0];
+	for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+		dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+	pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+	if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+		/* The old leaves all cluster in the same slot.  We will need
+		 * to insert a shortcut if the new node wants to cluster with them.
+		 */
+		if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+			goto all_leaves_cluster_together;
+
+		/* Otherwise we can just insert a new node ahead of the old
+		 * one.
+		 */
+		goto present_leaves_cluster_but_not_new_leaf;
+	}
+
+split_node:
+	pr_devel("split node\n");
+
+	/* We need to split the current node; we know that the node doesn't
+	 * simply contain a full set of leaves that cluster together (it
+	 * contains meta pointers and/or non-clustering leaves).
+	 *
+	 * We need to expel at least two leaves out of a set consisting of the
+	 * leaves in the node and the new leaf.
+	 *
+	 * We need a new node (n0) to replace the current one and a new node to
+	 * take the expelled nodes (n1).
+	 */
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+	new_n0->back_pointer = node->back_pointer;
+	new_n0->parent_slot = node->parent_slot;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+	pr_devel("do_split_node\n");
+
+	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	new_n1->nr_leaves_on_branch = 0;
+
+	/* Begin by finding two matching leaves.  There have to be at least two
+	 * that match - even if there are meta pointers - because any leaf that
+	 * would match a slot with a meta pointer in it must be somewhere
+	 * behind that meta pointer and cannot be here.  Further, given N
+	 * remaining leaf slots, we now have N+1 leaves to go in them.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		slot = edit->segment_cache[i];
+		if (slot != 0xff)
+			for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+				if (edit->segment_cache[j] == slot)
+					goto found_slot_for_multiple_occupancy;
+	}
+found_slot_for_multiple_occupancy:
+	pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+	BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+	BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+	BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+	new_n1->parent_slot = slot;
+
+	/* Metadata pointers cannot change slot */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+		if (assoc_array_ptr_is_meta(node->slots[i]))
+			new_n0->slots[i] = node->slots[i];
+		else
+			new_n0->slots[i] = NULL;
+	BUG_ON(new_n0->slots[slot] != NULL);
+	new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+	/* Filter the leaf pointers between the new nodes */
+	free_slot = -1;
+	next_slot = 0;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		if (assoc_array_ptr_is_meta(node->slots[i]))
+			continue;
+		if (edit->segment_cache[i] == slot) {
+			new_n1->slots[next_slot++] = node->slots[i];
+			new_n1->nr_leaves_on_branch++;
+		} else {
+			do {
+				free_slot++;
+			} while (new_n0->slots[free_slot] != NULL);
+			new_n0->slots[free_slot] = node->slots[i];
+		}
+	}
+
+	pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+	if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+		do {
+			free_slot++;
+		} while (new_n0->slots[free_slot] != NULL);
+		edit->leaf_p = &new_n0->slots[free_slot];
+		edit->adjust_count_on = new_n0;
+	} else {
+		edit->leaf_p = &new_n1->slots[next_slot++];
+		edit->adjust_count_on = new_n1;
+	}
+
+	BUG_ON(next_slot <= 1);
+
+	edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		if (edit->segment_cache[i] == 0xff) {
+			ptr = node->slots[i];
+			BUG_ON(assoc_array_ptr_is_leaf(ptr));
+			if (assoc_array_ptr_is_node(ptr)) {
+				side = assoc_array_ptr_to_node(ptr);
+				edit->set_backpointers[i] = &side->back_pointer;
+			} else {
+				shortcut = assoc_array_ptr_to_shortcut(ptr);
+				edit->set_backpointers[i] = &shortcut->back_pointer;
+			}
+		}
+	}
+
+	ptr = node->back_pointer;
+	if (!ptr)
+		edit->set[0].ptr = &edit->array->root;
+	else if (assoc_array_ptr_is_node(ptr))
+		edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+	else
+		edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+	pr_devel("<--%s() = ok [split node]\n", __func__);
+	return true;
+
+present_leaves_cluster_but_not_new_leaf:
+	/* All the old leaves cluster in the same slot, but the new leaf wants
+	 * to go into a different slot, so we create a new node to hold the new
+	 * leaf and a pointer to a new node holding all the old leaves.
+	 */
+	pr_devel("present leaves cluster but not new leaf\n");
+
+	new_n0->back_pointer = node->back_pointer;
+	new_n0->parent_slot = node->parent_slot;
+	new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = edit->segment_cache[0];
+	new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+	edit->adjust_count_on = new_n0;
+
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+		new_n1->slots[i] = node->slots[i];
+
+	new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+	edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+	edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+	edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+	edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+	pr_devel("<--%s() = ok [insert node before]\n", __func__);
+	return true;
+
+all_leaves_cluster_together:
+	/* All the leaves, new and old, want to cluster together in this node
+	 * in the same slot, so we have to replace this node with a shortcut to
+	 * skip over the identical parts of the key and then place a pair of
+	 * nodes, one inside the other, at the end of the shortcut and
+	 * distribute the keys between them.
+	 *
+	 * Firstly we need to work out where the leaves start diverging as a
+	 * bit position into their keys so that we know how big the shortcut
+	 * needs to be.
+	 *
+	 * We only need to make a single pass of N of the N+1 leaves because if
+	 * any keys differ between themselves at bit X then at least one of
+	 * them must also differ with the base key at bit X or before.
+	 */
+	pr_devel("all leaves cluster together\n");
+	diff = INT_MAX;
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+					  assoc_array_ptr_to_leaf(node->slots[i]));
+		if (x < diff) {
+			BUG_ON(x < 0);
+			diff = x;
+		}
+	}
+	BUG_ON(diff == INT_MAX);
+	BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+	keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+	keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+	new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+			 keylen * sizeof(unsigned long), GFP_KERNEL);
+	if (!new_s0)
+		return false;
+	edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+	edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+	new_s0->back_pointer = node->back_pointer;
+	new_s0->parent_slot = node->parent_slot;
+	new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+	new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+	new_n0->parent_slot = 0;
+	new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+	new_n1->parent_slot = -1; /* Need to calculate this */
+
+	new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+	pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+	BUG_ON(level <= 0);
+
+	for (i = 0; i < keylen; i++)
+		new_s0->index_key[i] =
+			ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+	blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+	new_s0->index_key[keylen - 1] &= ~blank;
+
+	/* This now reduces to a node splitting exercise for which we'll need
+	 * to regenerate the disparity table.
+	 */
+	for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+		ptr = node->slots[i];
+		base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+						     level);
+		base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+		edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	}
+
+	base_seg = ops->get_key_chunk(index_key, level);
+	base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+	edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+	goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+					    const struct assoc_array_ops *ops,
+					    struct assoc_array_walk_result *result)
+{
+	struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+	struct assoc_array_node *node, *new_n0, *side;
+	unsigned long sc_segments, dissimilarity, blank;
+	size_t keylen;
+	int level, sc_level, diff;
+	int sc_slot;
+
+	shortcut	= result->wrong_shortcut.shortcut;
+	level		= result->wrong_shortcut.level;
+	sc_level	= result->wrong_shortcut.sc_level;
+	sc_segments	= result->wrong_shortcut.sc_segments;
+	dissimilarity	= result->wrong_shortcut.dissimilarity;
+
+	pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+		 __func__, level, dissimilarity, sc_level);
+
+	/* We need to split a shortcut and insert a node between the two
+	 * pieces.  Zero-length pieces will be dispensed with entirely.
+	 *
+	 * First of all, we need to find out in which level the first
+	 * difference was.
+	 */
+	diff = __ffs(dissimilarity);
+	diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+	diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+	pr_devel("diff=%d\n", diff);
+
+	if (!shortcut->back_pointer) {
+		edit->set[0].ptr = &edit->array->root;
+	} else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+		node = assoc_array_ptr_to_node(shortcut->back_pointer);
+		edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+	} else {
+		BUG();
+	}
+
+	edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+	/* Create a new node now since we're going to need it anyway */
+	new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n0)
+		return false;
+	edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+	edit->adjust_count_on = new_n0;
+
+	/* Insert a new shortcut before the new node if this segment isn't of
+	 * zero length - otherwise we just connect the new node directly to the
+	 * parent.
+	 */
+	level += ASSOC_ARRAY_LEVEL_STEP;
+	if (diff > level) {
+		pr_devel("pre-shortcut %d...%d\n", level, diff);
+		keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+		new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+				 keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s0)
+			return false;
+		edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+		edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+		new_s0->back_pointer = shortcut->back_pointer;
+		new_s0->parent_slot = shortcut->parent_slot;
+		new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+		new_s0->skip_to_level = diff;
+
+		new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+		new_n0->parent_slot = 0;
+
+		memcpy(new_s0->index_key, shortcut->index_key,
+		       keylen * sizeof(unsigned long));
+
+		blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+		new_s0->index_key[keylen - 1] &= ~blank;
+	} else {
+		pr_devel("no pre-shortcut\n");
+		edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+		new_n0->back_pointer = shortcut->back_pointer;
+		new_n0->parent_slot = shortcut->parent_slot;
+	}
+
+	side = assoc_array_ptr_to_node(shortcut->next_node);
+	new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+	/* We need to know which slot in the new node is going to take a
+	 * metadata pointer.
+	 */
+	sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+	sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+	pr_devel("new slot %lx >> %d -> %d\n",
+		 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+	/* Determine whether we need to follow the new node with a replacement
+	 * for the current shortcut.  We could in theory reuse the current
+	 * shortcut if its parent slot number doesn't change - but that's a
+	 * 1-in-16 chance so not worth expending the code upon.
+	 */
+	level = diff + ASSOC_ARRAY_LEVEL_STEP;
+	if (level < shortcut->skip_to_level) {
+		pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+		new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+				 keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s1)
+			return false;
+		edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+		new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+		new_s1->parent_slot = sc_slot;
+		new_s1->next_node = shortcut->next_node;
+		new_s1->skip_to_level = shortcut->skip_to_level;
+
+		new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+		memcpy(new_s1->index_key, shortcut->index_key,
+		       keylen * sizeof(unsigned long));
+
+		edit->set[1].ptr = &side->back_pointer;
+		edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+	} else {
+		pr_devel("no post-shortcut\n");
+
+		/* We don't have to replace the pointed-to node as long as we
+		 * use memory barriers to make sure the parent slot number is
+		 * changed before the back pointer (the parent slot number is
+		 * irrelevant to the old parent shortcut).
+		 */
+		new_n0->slots[sc_slot] = shortcut->next_node;
+		edit->set_parent_slot[0].p = &side->parent_slot;
+		edit->set_parent_slot[0].to = sc_slot;
+		edit->set[1].ptr = &side->back_pointer;
+		edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+	}
+
+	/* Install the new leaf in a spare slot in the new node. */
+	if (sc_slot == 0)
+		edit->leaf_p = &new_n0->slots[1];
+	else
+		edit->leaf_p = &new_n0->slots[0];
+
+	pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+	return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+					    const struct assoc_array_ops *ops,
+					    const void *index_key,
+					    void *object)
+{
+	struct assoc_array_walk_result result;
+	struct assoc_array_edit *edit;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* The leaf pointer we're given must not have the bottom bit set as we
+	 * use those for type-marking the pointer.  NULL pointers are also not
+	 * allowed as they indicate an empty slot but we have to allow them
+	 * here as they can be updated later.
+	 */
+	BUG_ON(assoc_array_ptr_is_meta(object));
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->leaf = assoc_array_leaf_to_ptr(object);
+	edit->adjust_count_by = 1;
+
+	switch (assoc_array_walk(array, ops, index_key, &result)) {
+	case assoc_array_walk_tree_empty:
+		/* Allocate a root node if there isn't one yet */
+		if (!assoc_array_insert_in_empty_tree(edit))
+			goto enomem;
+		return edit;
+
+	case assoc_array_walk_found_terminal_node:
+		/* We found a node that doesn't have a node/shortcut pointer in
+		 * the slot corresponding to the index key that we have to
+		 * follow.
+		 */
+		if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+							   &result))
+			goto enomem;
+		return edit;
+
+	case assoc_array_walk_found_wrong_shortcut:
+		/* We found a shortcut that didn't match our key in a slot we
+		 * needed to follow.
+		 */
+		if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+			goto enomem;
+		return edit;
+	}
+
+enomem:
+	/* Clean up after an out of memory error */
+	pr_devel("enomem\n");
+	assoc_array_cancel_edit(edit);
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+	BUG_ON(!object);
+	edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+	struct assoc_array_node	*node;
+	const void		*skip_leaf;
+	int			slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+						void *iterator_data)
+{
+	struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+	if (leaf == collapse->skip_leaf)
+		return 0;
+
+	BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+	collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+	return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+					    const struct assoc_array_ops *ops,
+					    const void *index_key)
+{
+	struct assoc_array_delete_collapse_context collapse;
+	struct assoc_array_walk_result result;
+	struct assoc_array_node *node, *new_n0;
+	struct assoc_array_edit *edit;
+	struct assoc_array_ptr *ptr;
+	bool has_meta;
+	int slot, i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->adjust_count_by = -1;
+
+	switch (assoc_array_walk(array, ops, index_key, &result)) {
+	case assoc_array_walk_found_terminal_node:
+		/* We found a node that should contain the leaf we've been
+		 * asked to remove - *if* it's in the tree.
+		 */
+		pr_devel("terminal_node\n");
+		node = result.terminal_node.node;
+
+		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+			ptr = node->slots[slot];
+			if (ptr &&
+			    assoc_array_ptr_is_leaf(ptr) &&
+			    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+						index_key))
+				goto found_leaf;
+		}
+	case assoc_array_walk_tree_empty:
+	case assoc_array_walk_found_wrong_shortcut:
+	default:
+		assoc_array_cancel_edit(edit);
+		pr_devel("not found\n");
+		return NULL;
+	}
+
+found_leaf:
+	BUG_ON(array->nr_leaves_on_tree <= 0);
+
+	/* In the simplest form of deletion we just clear the slot and release
+	 * the leaf after a suitable interval.
+	 */
+	edit->dead_leaf = node->slots[slot];
+	edit->set[0].ptr = &node->slots[slot];
+	edit->set[0].to = NULL;
+	edit->adjust_count_on = node;
+
+	/* If that concludes erasure of the last leaf, then delete the entire
+	 * internal array.
+	 */
+	if (array->nr_leaves_on_tree == 1) {
+		edit->set[1].ptr = &array->root;
+		edit->set[1].to = NULL;
+		edit->adjust_count_on = NULL;
+		edit->excised_subtree = array->root;
+		pr_devel("all gone\n");
+		return edit;
+	}
+
+	/* However, we'd also like to clear up some metadata blocks if we
+	 * possibly can.
+	 *
+	 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+	 * leaves in it, then attempt to collapse it - and attempt to
+	 * recursively collapse up the tree.
+	 *
+	 * We could also try and collapse in partially filled subtrees to take
+	 * up space in this node.
+	 */
+	if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+		struct assoc_array_node *parent, *grandparent;
+		struct assoc_array_ptr *ptr;
+
+		/* First of all, we need to know if this node has metadata so
+		 * that we don't try collapsing if all the leaves are already
+		 * here.
+		 */
+		has_meta = false;
+		for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+			ptr = node->slots[i];
+			if (assoc_array_ptr_is_meta(ptr)) {
+				has_meta = true;
+				break;
+			}
+		}
+
+		pr_devel("leaves: %ld [m=%d]\n",
+			 node->nr_leaves_on_branch - 1, has_meta);
+
+		/* Look further up the tree to see if we can collapse this node
+		 * into a more proximal node too.
+		 */
+		parent = node;
+	collapse_up:
+		pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+		ptr = parent->back_pointer;
+		if (!ptr)
+			goto do_collapse;
+		if (assoc_array_ptr_is_shortcut(ptr)) {
+			struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+			ptr = s->back_pointer;
+			if (!ptr)
+				goto do_collapse;
+		}
+
+		grandparent = assoc_array_ptr_to_node(ptr);
+		if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+			parent = grandparent;
+			goto collapse_up;
+		}
+
+	do_collapse:
+		/* There's no point collapsing if the original node has no meta
+		 * pointers to discard and if we didn't merge into one of that
+		 * node's ancestry.
+		 */
+		if (has_meta || parent != node) {
+			node = parent;
+
+			/* Create a new node to collapse into */
+			new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+			if (!new_n0)
+				goto enomem;
+			edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+			new_n0->back_pointer = node->back_pointer;
+			new_n0->parent_slot = node->parent_slot;
+			new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+			edit->adjust_count_on = new_n0;
+
+			collapse.node = new_n0;
+			collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+			collapse.slot = 0;
+			assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+						    node->back_pointer,
+						    assoc_array_delete_collapse_iterator,
+						    &collapse);
+			pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+			BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+			if (!node->back_pointer) {
+				edit->set[1].ptr = &array->root;
+			} else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+				BUG();
+			} else if (assoc_array_ptr_is_node(node->back_pointer)) {
+				struct assoc_array_node *p =
+					assoc_array_ptr_to_node(node->back_pointer);
+				edit->set[1].ptr = &p->slots[node->parent_slot];
+			} else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+				struct assoc_array_shortcut *s =
+					assoc_array_ptr_to_shortcut(node->back_pointer);
+				edit->set[1].ptr = &s->next_node;
+			}
+			edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+			edit->excised_subtree = assoc_array_node_to_ptr(node);
+		}
+	}
+
+	return edit;
+
+enomem:
+	/* Clean up after an out of memory error */
+	pr_devel("enomem\n");
+	assoc_array_cancel_edit(edit);
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+					   const struct assoc_array_ops *ops)
+{
+	struct assoc_array_edit *edit;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (!array->root)
+		return NULL;
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return ERR_PTR(-ENOMEM);
+	edit->array = array;
+	edit->ops = ops;
+	edit->set[1].ptr = &array->root;
+	edit->set[1].to = NULL;
+	edit->excised_subtree = array->root;
+	edit->ops_for_excised_subtree = ops;
+	pr_devel("all gone\n");
+	return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+	struct assoc_array_edit *edit =
+		container_of(head, struct assoc_array_edit, rcu);
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (edit->dead_leaf)
+		edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+	for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+		if (edit->excised_meta[i])
+			kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+	if (edit->excised_subtree) {
+		BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+		if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+			struct assoc_array_node *n =
+				assoc_array_ptr_to_node(edit->excised_subtree);
+			n->back_pointer = NULL;
+		} else {
+			struct assoc_array_shortcut *s =
+				assoc_array_ptr_to_shortcut(edit->excised_subtree);
+			s->back_pointer = NULL;
+		}
+		assoc_array_destroy_subtree(edit->excised_subtree,
+					    edit->ops_for_excised_subtree);
+	}
+
+	kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+	struct assoc_array_shortcut *shortcut;
+	struct assoc_array_node *node;
+	struct assoc_array_ptr *ptr;
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	smp_wmb();
+	if (edit->leaf_p)
+		*edit->leaf_p = edit->leaf;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+		if (edit->set_parent_slot[i].p)
+			*edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+		if (edit->set_backpointers[i])
+			*edit->set_backpointers[i] = edit->set_backpointers_to;
+
+	smp_wmb();
+	for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+		if (edit->set[i].ptr)
+			*edit->set[i].ptr = edit->set[i].to;
+
+	if (edit->array->root == NULL) {
+		edit->array->nr_leaves_on_tree = 0;
+	} else if (edit->adjust_count_on) {
+		node = edit->adjust_count_on;
+		for (;;) {
+			node->nr_leaves_on_branch += edit->adjust_count_by;
+
+			ptr = node->back_pointer;
+			if (!ptr)
+				break;
+			if (assoc_array_ptr_is_shortcut(ptr)) {
+				shortcut = assoc_array_ptr_to_shortcut(ptr);
+				ptr = shortcut->back_pointer;
+				if (!ptr)
+					break;
+			}
+			BUG_ON(!assoc_array_ptr_is_node(ptr));
+			node = assoc_array_ptr_to_node(ptr);
+		}
+
+		edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+	}
+
+	call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+	struct assoc_array_ptr *ptr;
+	int i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	/* Clean up after an out of memory error */
+	for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+		ptr = edit->new_meta[i];
+		if (ptr) {
+			if (assoc_array_ptr_is_node(ptr))
+				kfree(assoc_array_ptr_to_node(ptr));
+			else
+				kfree(assoc_array_ptr_to_shortcut(ptr));
+		}
+	}
+	kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+		   const struct assoc_array_ops *ops,
+		   bool (*iterator)(void *object, void *iterator_data),
+		   void *iterator_data)
+{
+	struct assoc_array_shortcut *shortcut, *new_s;
+	struct assoc_array_node *node, *new_n;
+	struct assoc_array_edit *edit;
+	struct assoc_array_ptr *cursor, *ptr;
+	struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+	unsigned long nr_leaves_on_tree;
+	int keylen, slot, nr_free, next_slot, i;
+
+	pr_devel("-->%s()\n", __func__);
+
+	if (!array->root)
+		return 0;
+
+	edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+	if (!edit)
+		return -ENOMEM;
+	edit->array = array;
+	edit->ops = ops;
+	edit->ops_for_excised_subtree = ops;
+	edit->set[0].ptr = &array->root;
+	edit->excised_subtree = array->root;
+
+	new_root = new_parent = NULL;
+	new_ptr_pp = &new_root;
+	cursor = array->root;
+
+descend:
+	/* If this point is a shortcut, then we need to duplicate it and
+	 * advance the target cursor.
+	 */
+	if (assoc_array_ptr_is_shortcut(cursor)) {
+		shortcut = assoc_array_ptr_to_shortcut(cursor);
+		keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+		keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+		new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+				keylen * sizeof(unsigned long), GFP_KERNEL);
+		if (!new_s)
+			goto enomem;
+		pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+		memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+					 keylen * sizeof(unsigned long)));
+		new_s->back_pointer = new_parent;
+		new_s->parent_slot = shortcut->parent_slot;
+		*new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+		new_ptr_pp = &new_s->next_node;
+		cursor = shortcut->next_node;
+	}
+
+	/* Duplicate the node at this position */
+	node = assoc_array_ptr_to_node(cursor);
+	new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+	if (!new_n)
+		goto enomem;
+	pr_devel("dup node %p -> %p\n", node, new_n);
+	new_n->back_pointer = new_parent;
+	new_n->parent_slot = node->parent_slot;
+	*new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+	new_ptr_pp = NULL;
+	slot = 0;
+
+continue_node:
+	/* Filter across any leaves and gc any subtrees */
+	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = node->slots[slot];
+		if (!ptr)
+			continue;
+
+		if (assoc_array_ptr_is_leaf(ptr)) {
+			if (iterator(assoc_array_ptr_to_leaf(ptr),
+				     iterator_data))
+				/* The iterator will have done any reference
+				 * counting on the object for us.
+				 */
+				new_n->slots[slot] = ptr;
+			continue;
+		}
+
+		new_ptr_pp = &new_n->slots[slot];
+		cursor = ptr;
+		goto descend;
+	}
+
+	pr_devel("-- compress node %p --\n", new_n);
+
+	/* Count up the number of empty slots in this node and work out the
+	 * subtree leaf count.
+	 */
+	new_n->nr_leaves_on_branch = 0;
+	nr_free = 0;
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		ptr = new_n->slots[slot];
+		if (!ptr)
+			nr_free++;
+		else if (assoc_array_ptr_is_leaf(ptr))
+			new_n->nr_leaves_on_branch++;
+	}
+	pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+	/* See what we can fold in */
+	next_slot = 0;
+	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+		struct assoc_array_shortcut *s;
+		struct assoc_array_node *child;
+
+		ptr = new_n->slots[slot];
+		if (!ptr || assoc_array_ptr_is_leaf(ptr))
+			continue;
+
+		s = NULL;
+		if (assoc_array_ptr_is_shortcut(ptr)) {
+			s = assoc_array_ptr_to_shortcut(ptr);
+			ptr = s->next_node;
+		}
+
+		child = assoc_array_ptr_to_node(ptr);
+		new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+		if (child->nr_leaves_on_branch <= nr_free + 1) {
+			/* Fold the child node into this one */
+			pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+				 slot, child->nr_leaves_on_branch, nr_free + 1,
+				 next_slot);
+
+			/* We would already have reaped an intervening shortcut
+			 * on the way back up the tree.
+			 */
+			BUG_ON(s);
+
+			new_n->slots[slot] = NULL;
+			nr_free++;
+			if (slot < next_slot)
+				next_slot = slot;
+			for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+				struct assoc_array_ptr *p = child->slots[i];
+				if (!p)
+					continue;
+				BUG_ON(assoc_array_ptr_is_meta(p));
+				while (new_n->slots[next_slot])
+					next_slot++;
+				BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+				new_n->slots[next_slot++] = p;
+				nr_free--;
+			}
+			kfree(child);
+		} else {
+			pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+				 slot, child->nr_leaves_on_branch, nr_free + 1,
+				 next_slot);
+		}
+	}
+
+	pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+	nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+	/* Excise this node if it is singly occupied by a shortcut */
+	if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+		for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+			if ((ptr = new_n->slots[slot]))
+				break;
+
+		if (assoc_array_ptr_is_meta(ptr) &&
+		    assoc_array_ptr_is_shortcut(ptr)) {
+			pr_devel("excise node %p with 1 shortcut\n", new_n);
+			new_s = assoc_array_ptr_to_shortcut(ptr);
+			new_parent = new_n->back_pointer;
+			slot = new_n->parent_slot;
+			kfree(new_n);
+			if (!new_parent) {
+				new_s->back_pointer = NULL;
+				new_s->parent_slot = 0;
+				new_root = ptr;
+				goto gc_complete;
+			}
+
+			if (assoc_array_ptr_is_shortcut(new_parent)) {
+				/* We can discard any preceding shortcut also */
+				struct assoc_array_shortcut *s =
+					assoc_array_ptr_to_shortcut(new_parent);
+
+				pr_devel("excise preceding shortcut\n");
+
+				new_parent = new_s->back_pointer = s->back_pointer;
+				slot = new_s->parent_slot = s->parent_slot;
+				kfree(s);
+				if (!new_parent) {
+					new_s->back_pointer = NULL;
+					new_s->parent_slot = 0;
+					new_root = ptr;
+					goto gc_complete;
+				}
+			}
+
+			new_s->back_pointer = new_parent;
+			new_s->parent_slot = slot;
+			new_n = assoc_array_ptr_to_node(new_parent);
+			new_n->slots[slot] = ptr;
+			goto ascend_old_tree;
+		}
+	}
+
+	/* Excise any shortcuts we might encounter that point to nodes that
+	 * only contain leaves.
+	 */
+	ptr = new_n->back_pointer;
+	if (!ptr)
+		goto gc_complete;
+
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		new_s = assoc_array_ptr_to_shortcut(ptr);
+		new_parent = new_s->back_pointer;
+		slot = new_s->parent_slot;
+
+		if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+			struct assoc_array_node *n;
+
+			pr_devel("excise shortcut\n");
+			new_n->back_pointer = new_parent;
+			new_n->parent_slot = slot;
+			kfree(new_s);
+			if (!new_parent) {
+				new_root = assoc_array_node_to_ptr(new_n);
+				goto gc_complete;
+			}
+
+			n = assoc_array_ptr_to_node(new_parent);
+			n->slots[slot] = assoc_array_node_to_ptr(new_n);
+		}
+	} else {
+		new_parent = ptr;
+	}
+	new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+	ptr = node->back_pointer;
+	if (assoc_array_ptr_is_shortcut(ptr)) {
+		shortcut = assoc_array_ptr_to_shortcut(ptr);
+		slot = shortcut->parent_slot;
+		cursor = shortcut->back_pointer;
+	} else {
+		slot = node->parent_slot;
+		cursor = ptr;
+	}
+	BUG_ON(!ptr);
+	node = assoc_array_ptr_to_node(cursor);
+	slot++;
+	goto continue_node;
+
+gc_complete:
+	edit->set[0].to = new_root;
+	assoc_array_apply_edit(edit);
+	edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+	return 0;
+
+enomem:
+	pr_devel("enomem\n");
+	assoc_array_destroy_subtree(new_root, edit->ops);
+	kfree(edit);
+	return -ENOMEM;
+}
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 7b7f83027b7..d79b9d22206 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
 	 * incrementing the fifo->in index counter
 	 */
 	smp_wmb();
-	*copied = len - ret;
+	*copied = len - ret * esize;
 	/* return the number of elements which are not copied */
 	return ret;
 }
@@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
 	 * incrementing the fifo->out index counter
 	 */
 	smp_wmb();
-	*copied = len - ret;
+	*copied = len - ret * esize;
 	/* return the number of elements which are not copied */
 	return ret;
 }
diff --git a/lib/llist.c b/lib/llist.c
index 4a70d120138..f76196d0740 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head)
 	return entry;
 }
 EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_reverse_order - reverse order of a llist chain
+ * @head:	first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+	struct llist_node *new_head = NULL;
+
+	while (head) {
+		struct llist_node *tmp = head;
+		head = head->next;
+		tmp->next = new_head;
+		new_head = tmp;
+	}
+
+	return new_head;
+}
+EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/lib/lockref.c b/lib/lockref.c
index af6e95d0bed..d2b123f8456 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,7 +1,7 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
 
-#ifdef CONFIG_CMPXCHG_LOCKREF
+#if USE_CMPXCHG_LOCKREF
 
 /*
  * Allow weakly-ordered memory architectures to provide barrier-less
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 657979f71be..bf076d281d4 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
 	kfree(a);
 }
 EXPORT_SYMBOL_GPL(mpi_free);
+
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
deleted file mode 100644
index 652a8ee8efe..00000000000
--- a/lib/percpu-rwsem.c
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <linux/atomic.h>
-#include <linux/rwsem.h>
-#include <linux/percpu.h>
-#include <linux/wait.h>
-#include <linux/lockdep.h>
-#include <linux/percpu-rwsem.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
-			const char *name, struct lock_class_key *rwsem_key)
-{
-	brw->fast_read_ctr = alloc_percpu(int);
-	if (unlikely(!brw->fast_read_ctr))
-		return -ENOMEM;
-
-	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
-	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	atomic_set(&brw->write_ctr, 0);
-	atomic_set(&brw->slow_read_ctr, 0);
-	init_waitqueue_head(&brw->write_waitq);
-	return 0;
-}
-
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
-{
-	free_percpu(brw->fast_read_ctr);
-	brw->fast_read_ctr = NULL; /* catch use after free bugs */
-}
-
-/*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- *	R_W: down_write() comes after up_read(), the writer should see all
- *	     changes done by the reader
- * or
- *	W_R: down_read() comes after up_write(), the reader should see all
- *	     changes done by the writer
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
-{
-	bool success = false;
-
-	preempt_disable();
-	if (likely(!atomic_read(&brw->write_ctr))) {
-		__this_cpu_add(*brw->fast_read_ctr, val);
-		success = true;
-	}
-	preempt_enable();
-
-	return success;
-}
-
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
-	might_sleep();
-	if (likely(update_fast_ctr(brw, +1))) {
-		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
-		return;
-	}
-
-	down_read(&brw->rw_sem);
-	atomic_inc(&brw->slow_read_ctr);
-	/* avoid up_read()->rwsem_release() */
-	__up_read(&brw->rw_sem);
-}
-
-void percpu_up_read(struct percpu_rw_semaphore *brw)
-{
-	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-
-	if (likely(update_fast_ctr(brw, -1)))
-		return;
-
-	/* false-positive is possible but harmless */
-	if (atomic_dec_and_test(&brw->slow_read_ctr))
-		wake_up_all(&brw->write_waitq);
-}
-
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
-{
-	unsigned int sum = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		sum += per_cpu(*brw->fast_read_ctr, cpu);
-		per_cpu(*brw->fast_read_ctr, cpu) = 0;
-	}
-
-	return sum;
-}
-
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
-void percpu_down_write(struct percpu_rw_semaphore *brw)
-{
-	/* tell update_fast_ctr() there is a pending writer */
-	atomic_inc(&brw->write_ctr);
-	/*
-	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-	 *    so that update_fast_ctr() can't succeed.
-	 *
-	 * 2. Ensures we see the result of every previous this_cpu_add() in
-	 *    update_fast_ctr().
-	 *
-	 * 3. Ensures that if any reader has exited its critical section via
-	 *    fast-path, it executes a full memory barrier before we return.
-	 *    See R_W case in the comment above update_fast_ctr().
-	 */
-	synchronize_sched_expedited();
-
-	/* exclude other writers, and block the new readers completely */
-	down_write(&brw->rw_sem);
-
-	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
-	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
-
-	/* wait for all readers to complete their percpu_up_read() */
-	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
-}
-
-void percpu_up_write(struct percpu_rw_semaphore *brw)
-{
-	/* release the lock, but the readers can't use the fast-path */
-	up_write(&brw->rw_sem);
-	/*
-	 * Insert the barrier before the next fast-path in down_read,
-	 * see W_R case in the comment above update_fast_ctr().
-	 */
-	synchronize_sched_expedited();
-	/* the last writer unblocks update_fast_ctr() */
-	atomic_dec(&brw->write_ctr);
-}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 93c5d5ecff4..7473ee3b4ee 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
 	int cpu;
+	unsigned long flags;
 
-	raw_spin_lock(&fbc->lock);
+	raw_spin_lock_irqsave(&fbc->lock, flags);
 	for_each_possible_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		*pcount = 0;
 	}
 	fbc->count = amount;
-	raw_spin_unlock(&fbc->lock);
+	raw_spin_unlock_irqrestore(&fbc->lock, flags);
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
@@ -78,9 +79,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 	preempt_disable();
 	count = __this_cpu_read(*fbc->counters) + amount;
 	if (count >= batch || count <= -batch) {
-		raw_spin_lock(&fbc->lock);
+		unsigned long flags;
+		raw_spin_lock_irqsave(&fbc->lock, flags);
 		fbc->count += count;
-		raw_spin_unlock(&fbc->lock);
+		raw_spin_unlock_irqrestore(&fbc->lock, flags);
 		__this_cpu_write(*fbc->counters, 0);
 	} else {
 		__this_cpu_write(*fbc->counters, count);
@@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
 	s64 ret;
 	int cpu;
+	unsigned long flags;
 
-	raw_spin_lock(&fbc->lock);
+	raw_spin_lock_irqsave(&fbc->lock, flags);
 	ret = fbc->count;
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
-	raw_spin_unlock(&fbc->lock);
+	raw_spin_unlock_irqrestore(&fbc->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index bab1ba2a4c7..9d054bf91d0 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -30,15 +30,6 @@
 #include <linux/spinlock.h>
 #include <linux/percpu_ida.h>
 
-/*
- * Number of tags we move between the percpu freelist and the global freelist at
- * a time
- */
-#define IDA_PCPU_BATCH_MOVE	32U
-
-/* Max size of percpu freelist, */
-#define IDA_PCPU_SIZE		((IDA_PCPU_BATCH_MOVE * 3) / 2)
-
 struct percpu_ida_cpu {
 	/*
 	 * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
 	struct percpu_ida_cpu *remote;
 
 	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-	     cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
+	     cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
 	     cpus_have_tags--) {
 		cpu = cpumask_next(cpu, &pool->cpus_have_tags);
 
@@ -123,11 +114,10 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 {
 	move_tags(tags->freelist, &tags->nr_free,
 		  pool->freelist, &pool->nr_free,
-		  min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
+		  min(pool->nr_free, pool->percpu_batch_size));
 }
 
-static inline unsigned alloc_local_tag(struct percpu_ida *pool,
-				       struct percpu_ida_cpu *tags)
+static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
 {
 	int tag = -ENOSPC;
 
@@ -168,7 +158,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
 	tags = this_cpu_ptr(pool->tag_cpu);
 
 	/* Fastpath */
-	tag = alloc_local_tag(pool, tags);
+	tag = alloc_local_tag(tags);
 	if (likely(tag >= 0)) {
 		local_irq_restore(flags);
 		return tag;
@@ -245,17 +235,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 		wake_up(&pool->wait);
 	}
 
-	if (nr_free == IDA_PCPU_SIZE) {
+	if (nr_free == pool->percpu_max_size) {
 		spin_lock(&pool->lock);
 
 		/*
 		 * Global lock held and irqs disabled, don't need percpu
 		 * lock
 		 */
-		if (tags->nr_free == IDA_PCPU_SIZE) {
+		if (tags->nr_free == pool->percpu_max_size) {
 			move_tags(pool->freelist, &pool->nr_free,
 				  tags->freelist, &tags->nr_free,
-				  IDA_PCPU_BATCH_MOVE);
+				  pool->percpu_batch_size);
 
 			wake_up(&pool->wait);
 		}
@@ -292,7 +282,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
  * Allocation is percpu, but sharding is limited by nr_tags - for best
  * performance, the workload should not span more cpus than nr_tags / 128.
  */
-int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
+	unsigned long max_size, unsigned long batch_size)
 {
 	unsigned i, cpu, order;
 
@@ -301,6 +292,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
 	init_waitqueue_head(&pool->wait);
 	spin_lock_init(&pool->lock);
 	pool->nr_tags = nr_tags;
+	pool->percpu_max_size = max_size;
+	pool->percpu_batch_size = batch_size;
 
 	/* Guard against overflow */
 	if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +312,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
 	pool->nr_free = nr_tags;
 
 	pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
-				       IDA_PCPU_SIZE * sizeof(unsigned),
+				       pool->percpu_max_size * sizeof(unsigned),
 				       sizeof(unsigned));
 	if (!pool->tag_cpu)
 		goto err;
@@ -332,4 +325,65 @@ err:
 	percpu_ida_destroy(pool);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(percpu_ida_init);
+EXPORT_SYMBOL_GPL(__percpu_ida_init);
+
+/**
+ * percpu_ida_for_each_free - iterate free ids of a pool
+ * @pool: pool to iterate
+ * @fn: interate callback function
+ * @data: parameter for @fn
+ *
+ * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
+ * ids might be missed, some might be iterated duplicated, and some might
+ * be iterated and not free soon.
+ */
+int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
+	void *data)
+{
+	unsigned long flags;
+	struct percpu_ida_cpu *remote;
+	unsigned cpu, i, err = 0;
+
+	local_irq_save(flags);
+	for_each_possible_cpu(cpu) {
+		remote = per_cpu_ptr(pool->tag_cpu, cpu);
+		spin_lock(&remote->lock);
+		for (i = 0; i < remote->nr_free; i++) {
+			err = fn(remote->freelist[i], data);
+			if (err)
+				break;
+		}
+		spin_unlock(&remote->lock);
+		if (err)
+			goto out;
+	}
+
+	spin_lock(&pool->lock);
+	for (i = 0; i < pool->nr_free; i++) {
+		err = fn(pool->freelist[i], data);
+		if (err)
+			break;
+	}
+	spin_unlock(&pool->lock);
+out:
+	local_irq_restore(flags);
+	return err;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
+
+/**
+ * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
+ * @pool: pool related
+ * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
+ *
+ * Note: this just returns a snapshot of free tags number.
+ */
+unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
+{
+	struct percpu_ida_cpu *remote;
+	if (cpu == nr_cpu_ids)
+		return pool->nr_free;
+	remote = per_cpu_ptr(pool->tag_cpu, cpu);
+	return remote->nr_free;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
diff --git a/lib/random32.c b/lib/random32.c
index 82da4f4c348..1e5b2df4429 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -214,18 +214,22 @@ static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
 static void __prandom_timer(unsigned long dontcare)
 {
 	u32 entropy;
+	unsigned long expires;
 
 	get_random_bytes(&entropy, sizeof(entropy));
 	prandom_seed(entropy);
+
 	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	seed_timer.expires = jiffies + (40 * HZ + (prandom_u32() % (40 * HZ)));
+	expires = 40 + (prandom_u32() % 40);
+	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
+
 	add_timer(&seed_timer);
 }
 
-static void prandom_start_seed_timer(void)
+static void __init __prandom_start_seed_timer(void)
 {
 	set_timer_slack(&seed_timer, HZ);
-	seed_timer.expires = jiffies + 40 * HZ;
+	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
 	add_timer(&seed_timer);
 }
 
@@ -270,7 +274,7 @@ void prandom_reseed_late(void)
 static int __init prandom_reseed(void)
 {
 	__prandom_reseed(false);
-	prandom_start_seed_timer();
+	__prandom_start_seed_timer();
 	return 0;
 }
 late_initcall(prandom_reseed);
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
deleted file mode 100644
index 9be8a914497..00000000000
--- a/lib/rwsem-spinlock.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
- * generic spinlock implementation
- *
- * Copyright (c) 2001   David Howells (dhowells@redhat.com).
- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/export.h>
-
-enum rwsem_waiter_type {
-	RWSEM_WAITING_FOR_WRITE,
-	RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
-	struct list_head list;
-	struct task_struct *task;
-	enum rwsem_waiter_type type;
-};
-
-int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	int ret = 1;
-	unsigned long flags;
-
-	if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
-		ret = (sem->activity != 0);
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(rwsem_is_locked);
-
-/*
- * initialise the semaphore
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-		  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held semaphore:
-	 */
-	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-	lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-	sem->activity = 0;
-	raw_spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-}
-EXPORT_SYMBOL(__init_rwsem);
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here, then:
- *   - the 'active count' _reached_ zero
- *   - the 'waiting count' is non-zero
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if wakewrite is non-zero
- */
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
-{
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	int woken;
-
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-
-	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-		if (wakewrite)
-			/* Wake up a writer. Note that we do not grant it the
-			 * lock - it will have to acquire it when it runs. */
-			wake_up_process(waiter->task);
-		goto out;
-	}
-
-	/* grant an infinite number of read locks to the front of the queue */
-	woken = 0;
-	do {
-		struct list_head *next = waiter->list.next;
-
-		list_del(&waiter->list);
-		tsk = waiter->task;
-		smp_mb();
-		waiter->task = NULL;
-		wake_up_process(tsk);
-		put_task_struct(tsk);
-		woken++;
-		if (next == &sem->wait_list)
-			break;
-		waiter = list_entry(next, struct rwsem_waiter, list);
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	sem->activity += woken;
-
- out:
-	return sem;
-}
-
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer(struct rw_semaphore *sem)
-{
-	struct rwsem_waiter *waiter;
-
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-	wake_up_process(waiter->task);
-
-	return sem;
-}
-
-/*
- * get a read lock on the semaphore
- */
-void __sched __down_read(struct rw_semaphore *sem)
-{
-	struct rwsem_waiter waiter;
-	struct task_struct *tsk;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
-		/* granted */
-		sem->activity++;
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-		goto out;
-	}
-
-	tsk = current;
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	waiter.type = RWSEM_WAITING_FOR_READ;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the lock */
-	for (;;) {
-		if (!waiter.task)
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
- out:
-	;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-int __down_read_trylock(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
-		/* granted */
-		sem->activity++;
-		ret = 1;
-	}
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return ret;
-}
-
-/*
- * get a write lock on the semaphore
- */
-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	struct rwsem_waiter waiter;
-	struct task_struct *tsk;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	/* set up my own style of waitqueue */
-	tsk = current;
-	waiter.task = tsk;
-	waiter.type = RWSEM_WAITING_FOR_WRITE;
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* wait for someone to release the lock */
-	for (;;) {
-		/*
-		 * That is the key to support write lock stealing: allows the
-		 * task already on CPU to get the lock soon rather than put
-		 * itself into sleep and waiting for system woke it or someone
-		 * else in the head of the wait list up.
-		 */
-		if (sem->activity == 0)
-			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-		schedule();
-		raw_spin_lock_irqsave(&sem->wait_lock, flags);
-	}
-	/* got the lock */
-	sem->activity = -1;
-	list_del(&waiter.list);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-void __sched __down_write(struct rw_semaphore *sem)
-{
-	__down_write_nested(sem, 0);
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-int __down_write_trylock(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->activity == 0) {
-		/* got the lock */
-		sem->activity = -1;
-		ret = 1;
-	}
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return ret;
-}
-
-/*
- * release a read lock on the semaphore
- */
-void __up_read(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
-		sem = __rwsem_wake_one_writer(sem);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * release a write lock on the semaphore
- */
-void __up_write(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	sem->activity = 0;
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 1);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * downgrade a write lock into a read lock
- * - just wake up any readers at the front of the queue
- */
-void __downgrade_write(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	sem->activity = 1;
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 0);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
diff --git a/lib/rwsem.c b/lib/rwsem.c
deleted file mode 100644
index 19c5fa95e0b..00000000000
--- a/lib/rwsem.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/* rwsem.c: R/W semaphores: contention handling functions
- *
- * Written by David Howells (dhowells@redhat.com).
- * Derived from arch/i386/kernel/semaphore.c
- *
- * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
- * and Michel Lespinasse <walken@google.com>
- */
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/export.h>
-
-/*
- * Initialize an rwsem:
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-		  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held semaphore:
-	 */
-	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-	lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	raw_spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-}
-
-EXPORT_SYMBOL(__init_rwsem);
-
-enum rwsem_waiter_type {
-	RWSEM_WAITING_FOR_WRITE,
-	RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
-	struct list_head list;
-	struct task_struct *task;
-	enum rwsem_waiter_type type;
-};
-
-enum rwsem_wake_type {
-	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
-	RWSEM_WAKE_READERS,	/* Wake readers only */
-	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */
-};
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here from up_xxxx(), then:
- *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
- *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- * - there must be someone on the queue
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
- */
-static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
-{
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	struct list_head *next;
-	long oldcount, woken, loop, adjustment;
-
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-		if (wake_type == RWSEM_WAKE_ANY)
-			/* Wake writer at the front of the queue, but do not
-			 * grant it the lock yet as we want other writers
-			 * to be able to steal it.  Readers, on the other hand,
-			 * will block as they will notice the queued writer.
-			 */
-			wake_up_process(waiter->task);
-		goto out;
-	}
-
-	/* Writers might steal the lock before we grant it to the next reader.
-	 * We prefer to do the first reader grant before counting readers
-	 * so we can bail out early if a writer stole the lock.
-	 */
-	adjustment = 0;
-	if (wake_type != RWSEM_WAKE_READ_OWNED) {
-		adjustment = RWSEM_ACTIVE_READ_BIAS;
- try_reader_grant:
-		oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
-		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
-			/* A writer stole the lock. Undo our reader grant. */
-			if (rwsem_atomic_update(-adjustment, sem) &
-						RWSEM_ACTIVE_MASK)
-				goto out;
-			/* Last active locker left. Retry waking readers. */
-			goto try_reader_grant;
-		}
-	}
-
-	/* Grant an infinite number of read locks to the readers at the front
-	 * of the queue.  Note we increment the 'active part' of the count by
-	 * the number of readers before waking any processes up.
-	 */
-	woken = 0;
-	do {
-		woken++;
-
-		if (waiter->list.next == &sem->wait_list)
-			break;
-
-		waiter = list_entry(waiter->list.next,
-					struct rwsem_waiter, list);
-
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-	if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-		/* hit end of list above */
-		adjustment -= RWSEM_WAITING_BIAS;
-
-	if (adjustment)
-		rwsem_atomic_add(adjustment, sem);
-
-	next = sem->wait_list.next;
-	loop = woken;
-	do {
-		waiter = list_entry(next, struct rwsem_waiter, list);
-		next = waiter->list.next;
-		tsk = waiter->task;
-		smp_mb();
-		waiter->task = NULL;
-		wake_up_process(tsk);
-		put_task_struct(tsk);
-	} while (--loop);
-
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
-
- out:
-	return sem;
-}
-
-/*
- * wait for the read lock to be granted
- */
-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
-{
-	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
-	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	waiter.type = RWSEM_WAITING_FOR_READ;
-	get_task_struct(tsk);
-
-	raw_spin_lock_irq(&sem->wait_lock);
-	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
-
-	/* If there are no active locks, wake the front queued process(es).
-	 *
-	 * If there are no writers and we are first in the queue,
-	 * wake our own waiter to join the existing active readers !
-	 */
-	if (count == RWSEM_WAITING_BIAS ||
-	    (count > RWSEM_WAITING_BIAS &&
-	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
-
-	raw_spin_unlock_irq(&sem->wait_lock);
-
-	/* wait to be given the lock */
-	while (true) {
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (!waiter.task)
-			break;
-		schedule();
-	}
-
-	tsk->state = TASK_RUNNING;
-
-	return sem;
-}
-
-/*
- * wait until we successfully acquire the write lock
- */
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
-{
-	long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
-	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	waiter.type = RWSEM_WAITING_FOR_WRITE;
-
-	raw_spin_lock_irq(&sem->wait_lock);
-	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
-
-	/* If there were already threads queued before us and there are no
-	 * active writers, the lock must be read owned; so we try to wake
-	 * any read locks that were queued ahead of us. */
-	if (count > RWSEM_WAITING_BIAS &&
-	    adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
-
-	/* wait until we successfully acquire the lock */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	while (true) {
-		if (!(count & RWSEM_ACTIVE_MASK)) {
-			/* Try acquiring the write lock. */
-			count = RWSEM_ACTIVE_WRITE_BIAS;
-			if (!list_is_singular(&sem->wait_list))
-				count += RWSEM_WAITING_BIAS;
-
-			if (sem->count == RWSEM_WAITING_BIAS &&
-			    cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-							RWSEM_WAITING_BIAS)
-				break;
-		}
-
-		raw_spin_unlock_irq(&sem->wait_lock);
-
-		/* Block until there are no active lockers. */
-		do {
-			schedule();
-			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
-
-		raw_spin_lock_irq(&sem->wait_lock);
-	}
-
-	list_del(&waiter.list);
-	raw_spin_unlock_irq(&sem->wait_lock);
-	tsk->state = TASK_RUNNING;
-
-	return sem;
-}
-
-/*
- * handle waking up a waiter on the semaphore
- * - up_read/up_write has decremented the active part of count if we come here
- */
-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	/* do nothing if list empty */
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return sem;
-}
-
-/*
- * downgrade a write lock into a read lock
- * - caller incremented waiting part of count and discovered it still negative
- * - just wake up any readers at the front of the queue
- */
-struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	/* do nothing if list empty */
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return sem;
-}
-
-EXPORT_SYMBOL(rwsem_down_read_failed);
-EXPORT_SYMBOL(rwsem_down_write_failed);
-EXPORT_SYMBOL(rwsem_wake);
-EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
deleted file mode 100644
index 0374a596cff..00000000000
--- a/lib/spinlock_debug.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright 2005, Red Hat, Inc., Ingo Molnar
- * Released under the General Public License (GPL).
- *
- * This file contains the spinlock/rwlock implementations for
- * DEBUG_SPINLOCK.
- */
-
-#include <linux/spinlock.h>
-#include <linux/nmi.h>
-#include <linux/interrupt.h>
-#include <linux/debug_locks.h>
-#include <linux/delay.h>
-#include <linux/export.h>
-
-void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
-			  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held lock:
-	 */
-	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-	lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-	lock->magic = SPINLOCK_MAGIC;
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
-}
-
-EXPORT_SYMBOL(__raw_spin_lock_init);
-
-void __rwlock_init(rwlock_t *lock, const char *name,
-		   struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held lock:
-	 */
-	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map(&lock->dep_map, name, key, 0);
-#endif
-	lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
-	lock->magic = RWLOCK_MAGIC;
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
-}
-
-EXPORT_SYMBOL(__rwlock_init);
-
-static void spin_dump(raw_spinlock_t *lock, const char *msg)
-{
-	struct task_struct *owner = NULL;
-
-	if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
-		owner = lock->owner;
-	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
-		msg, raw_smp_processor_id(),
-		current->comm, task_pid_nr(current));
-	printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
-			".owner_cpu: %d\n",
-		lock, lock->magic,
-		owner ? owner->comm : "<none>",
-		owner ? task_pid_nr(owner) : -1,
-		lock->owner_cpu);
-	dump_stack();
-}
-
-static void spin_bug(raw_spinlock_t *lock, const char *msg)
-{
-	if (!debug_locks_off())
-		return;
-
-	spin_dump(lock, msg);
-}
-
-#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
-
-static inline void
-debug_spin_lock_before(raw_spinlock_t *lock)
-{
-	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
-	SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-							lock, "cpu recursion");
-}
-
-static inline void debug_spin_lock_after(raw_spinlock_t *lock)
-{
-	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner = current;
-}
-
-static inline void debug_spin_unlock(raw_spinlock_t *lock)
-{
-	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
-	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
-	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-							lock, "wrong CPU");
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
-}
-
-static void __spin_lock_debug(raw_spinlock_t *lock)
-{
-	u64 i;
-	u64 loops = loops_per_jiffy * HZ;
-
-	for (i = 0; i < loops; i++) {
-		if (arch_spin_trylock(&lock->raw_lock))
-			return;
-		__delay(1);
-	}
-	/* lockup suspected: */
-	spin_dump(lock, "lockup suspected");
-#ifdef CONFIG_SMP
-	trigger_all_cpu_backtrace();
-#endif
-
-	/*
-	 * The trylock above was causing a livelock.  Give the lower level arch
-	 * specific lock code a chance to acquire the lock. We have already
-	 * printed a warning/backtrace at this point. The non-debug arch
-	 * specific code might actually succeed in acquiring the lock.  If it is
-	 * not successful, the end-result is the same - there is no forward
-	 * progress.
-	 */
-	arch_spin_lock(&lock->raw_lock);
-}
-
-void do_raw_spin_lock(raw_spinlock_t *lock)
-{
-	debug_spin_lock_before(lock);
-	if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
-		__spin_lock_debug(lock);
-	debug_spin_lock_after(lock);
-}
-
-int do_raw_spin_trylock(raw_spinlock_t *lock)
-{
-	int ret = arch_spin_trylock(&lock->raw_lock);
-
-	if (ret)
-		debug_spin_lock_after(lock);
-#ifndef CONFIG_SMP
-	/*
-	 * Must not happen on UP:
-	 */
-	SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-	return ret;
-}
-
-void do_raw_spin_unlock(raw_spinlock_t *lock)
-{
-	debug_spin_unlock(lock);
-	arch_spin_unlock(&lock->raw_lock);
-}
-
-static void rwlock_bug(rwlock_t *lock, const char *msg)
-{
-	if (!debug_locks_off())
-		return;
-
-	printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
-		msg, raw_smp_processor_id(), current->comm,
-		task_pid_nr(current), lock);
-	dump_stack();
-}
-
-#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
-
-#if 0		/* __write_lock_debug() can lock up - maybe this can too? */
-static void __read_lock_debug(rwlock_t *lock)
-{
-	u64 i;
-	u64 loops = loops_per_jiffy * HZ;
-	int print_once = 1;
-
-	for (;;) {
-		for (i = 0; i < loops; i++) {
-			if (arch_read_trylock(&lock->raw_lock))
-				return;
-			__delay(1);
-		}
-		/* lockup suspected: */
-		if (print_once) {
-			print_once = 0;
-			printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
-					"%s/%d, %p\n",
-				raw_smp_processor_id(), current->comm,
-				current->pid, lock);
-			dump_stack();
-		}
-	}
-}
-#endif
-
-void do_raw_read_lock(rwlock_t *lock)
-{
-	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	arch_read_lock(&lock->raw_lock);
-}
-
-int do_raw_read_trylock(rwlock_t *lock)
-{
-	int ret = arch_read_trylock(&lock->raw_lock);
-
-#ifndef CONFIG_SMP
-	/*
-	 * Must not happen on UP:
-	 */
-	RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-	return ret;
-}
-
-void do_raw_read_unlock(rwlock_t *lock)
-{
-	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	arch_read_unlock(&lock->raw_lock);
-}
-
-static inline void debug_write_lock_before(rwlock_t *lock)
-{
-	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
-	RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-							lock, "cpu recursion");
-}
-
-static inline void debug_write_lock_after(rwlock_t *lock)
-{
-	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner = current;
-}
-
-static inline void debug_write_unlock(rwlock_t *lock)
-{
-	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
-	RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-							lock, "wrong CPU");
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
-}
-
-#if 0		/* This can cause lockups */
-static void __write_lock_debug(rwlock_t *lock)
-{
-	u64 i;
-	u64 loops = loops_per_jiffy * HZ;
-	int print_once = 1;
-
-	for (;;) {
-		for (i = 0; i < loops; i++) {
-			if (arch_write_trylock(&lock->raw_lock))
-				return;
-			__delay(1);
-		}
-		/* lockup suspected: */
-		if (print_once) {
-			print_once = 0;
-			printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
-					"%s/%d, %p\n",
-				raw_smp_processor_id(), current->comm,
-				current->pid, lock);
-			dump_stack();
-		}
-	}
-}
-#endif
-
-void do_raw_write_lock(rwlock_t *lock)
-{
-	debug_write_lock_before(lock);
-	arch_write_lock(&lock->raw_lock);
-	debug_write_lock_after(lock);
-}
-
-int do_raw_write_trylock(rwlock_t *lock)
-{
-	int ret = arch_write_trylock(&lock->raw_lock);
-
-	if (ret)
-		debug_write_lock_after(lock);
-#ifndef CONFIG_SMP
-	/*
-	 * Must not happen on UP:
-	 */
-	RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
-#endif
-	return ret;
-}
-
-void do_raw_write_unlock(rwlock_t *lock)
-{
-	debug_write_unlock(lock);
-	arch_write_unlock(&lock->raw_lock);
-}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4e8686c7e5a..e4399fa65ad 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -38,6 +38,9 @@
 #include <linux/bootmem.h>
 #include <linux/iommu-helper.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/swiotlb.h>
+
 #define OFFSET(val,align) ((unsigned long)	\
 	                   ( (val) & ( (align) - 1)))
 
@@ -502,6 +505,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
+	dev_warn(hwdev, "swiotlb buffer is full\n");
 	return SWIOTLB_MAP_ERROR;
 found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -726,6 +730,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
+	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+
 	/* Oh well, have to allocate and map a bounce buffer. */
 	map = map_single(dev, phys, size, dir);
 	if (map == SWIOTLB_MAP_ERROR) {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 48586ac3a62..10909c57149 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1712,18 +1712,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			break;
 
 		case FORMAT_TYPE_NRCHARS: {
-			u8 qualifier = spec.qualifier;
+			/*
+			 * Since %n poses a greater security risk than
+			 * utility, ignore %n and skip its argument.
+			 */
+			void *skip_arg;
 
-			if (qualifier == 'l') {
-				long *ip = va_arg(args, long *);
-				*ip = (str - buf);
-			} else if (_tolower(qualifier) == 'z') {
-				size_t *ip = va_arg(args, size_t *);
-				*ip = (str - buf);
-			} else {
-				int *ip = va_arg(args, int *);
-				*ip = (str - buf);
-			}
+			WARN_ONCE(1, "Please remove ignored %%n in '%s'\n",
+					old_fmt);
+
+			skip_arg = va_arg(args, void *);
 			break;
 		}