275 files changed, 7918 insertions, 2211 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1964f98e74b..b85efa77394 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -594,21 +594,21 @@ static int __init init_v9fs(void)
 	int err;
 	pr_info("Installing v9fs 9p2000 file system support\n");
 	/* TODO: Setup list of registered trasnport modules */
-	err = register_filesystem(&v9fs_fs_type);
-	if (err < 0) {
-		pr_err("Failed to register filesystem\n");
-		return err;
-	}
 
 	err = v9fs_cache_register();
 	if (err < 0) {
 		pr_err("Failed to register v9fs for caching\n");
-		goto out_fs_unreg;
+		return err;
 	}
 
 	err = v9fs_sysfs_init();
 	if (err < 0) {
 		pr_err("Failed to register with sysfs\n");
+		goto out_cache;
+	}
+	err = register_filesystem(&v9fs_fs_type);
+	if (err < 0) {
+		pr_err("Failed to register filesystem\n");
 		goto out_sysfs_cleanup;
 	}
 
@@ -617,8 +617,8 @@ static int __init init_v9fs(void)
 out_sysfs_cleanup:
 	v9fs_sysfs_cleanup();
 
-out_fs_unreg:
-	unregister_filesystem(&v9fs_fs_type);
+out_cache:
+	v9fs_cache_unregister();
 
 	return err;
 }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 7b0cd87b07c..10b7d3c9dba 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
 		goto release_sb;
 	}
 
-	root = d_alloc_root(inode);
+	root = d_make_root(inode);
 	if (!root) {
-		iput(inode);
 		retval = -ENOMEM;
 		goto release_sb;
 	}
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9..f95ae3a027f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
 
 menu "File systems"
 
+# Use unaligned word dcache accesses
+config DCACHE_WORD_ACCESS
+       bool
+
 if BLOCK
 
 source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
 source "fs/omfs/Kconfig"
 source "fs/hpfs/Kconfig"
 source "fs/qnx4/Kconfig"
+source "fs/qnx6/Kconfig"
 source "fs/romfs/Kconfig"
 source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 93804d4d66e..2fb97793467 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS)		+= ubifs/
 obj-$(CONFIG_AFFS_FS)		+= affs/
 obj-$(CONFIG_ROMFS_FS)		+= romfs/
 obj-$(CONFIG_QNX4FS_FS)		+= qnx4/
+obj-$(CONFIG_QNX6FS_FS)		+= qnx6/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 8e3b36ace30..06fdcc9382c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_d_op = &adfs_dentry_operations;
 	root = adfs_iget(sb, &root_obj);
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		int i;
-		iput(root);
 		for (i = 0; i < asb->s_map_size; i++)
 			brelse(asb->s_map[i].dm_bh);
 		kfree(asb->s_map);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 8ba73fed796..0782653a05a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -473,7 +473,7 @@ got_root:
 	root_inode = affs_iget(sb, root_block);
 	if (IS_ERR(root_inode)) {
 		ret = PTR_ERR(root_inode);
-		goto out_error_noinode;
+		goto out_error;
 	}
 
 	if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -481,7 +481,7 @@ got_root:
 	else
 		sb->s_d_op = &affs_dentry_operations;
 
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (!sb->s_root) {
 		printk(KERN_ERR "AFFS: Get root inode failed\n");
 		goto out_error;
@@ -494,9 +494,6 @@ got_root:
 	 * Begin the cascaded cleanup ...
 	 */
 out_error:
-	if (root_inode)
-		iput(root_inode);
-out_error_noinode:
 	kfree(sbi->s_bitmap);
 	affs_brelse(root_bh);
 	kfree(sbi->s_prefix);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fe..8f6e9234d56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
 	ASSERT(key != NULL);
 
 	vnode = AFS_FS_I(mapping->host);
-	if (vnode->flags & AFS_VNODE_DELETED) {
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
 		_leave(" = -ESTALE");
 		return -ESTALE;
 	}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c2..b960ff05ea0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 		_debug("extract data");
 		if (call->count > 0) {
 			page = call->reply3;
-			buffer = kmap_atomic(page, KM_USER0);
+			buffer = kmap_atomic(page);
 			ret = afs_extract_data(call, skb, last, buffer,
 					       call->count);
-			kunmap_atomic(buffer, KM_USER0);
+			kunmap_atomic(buffer);
 			switch (ret) {
 			case 0:		break;
 			case -EAGAIN:	return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 	if (call->count < PAGE_SIZE) {
 		_debug("clear");
 		page = call->reply3;
-		buffer = kmap_atomic(page, KM_USER0);
+		buffer = kmap_atomic(page);
 		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap_atomic(buffer, KM_USER0);
+		kunmap_atomic(buffer);
 	}
 
 	_leave(" = 0 [done]");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d2b0888126d..a306bb6d88d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
 	unsigned		reply_size;	/* current size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
 	unsigned		last_to;	/* amount of mapping[last] */
-	unsigned short		offset;		/* offset into received data store */
+	unsigned		offset;		/* offset into received data store */
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f4ce2658b7..298cf8919ec 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 		if (PageError(page))
 			goto error;
 
-		buf = kmap_atomic(page, KM_USER0);
+		buf = kmap_atomic(page);
 		memcpy(devname, buf, size);
-		kunmap_atomic(buf, KM_USER0);
+		kunmap_atomic(buf);
 		page_cache_release(page);
 		page = NULL;
 	}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb..8ad8c2a0703 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct msghdr msg;
 	struct kvec iov[1];
 	int ret;
+	struct sk_buff *skb;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 error_do_abort:
 	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+	while ((skb = skb_dequeue(&call->rx_queue)))
+		afs_free_skb(skb);
 	rxrpc_kernel_end_call(rxcall);
 	call->rxcall = NULL;
 error_kill_call:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 983ec59fc80..f02b31e7e64 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
 {
 	struct afs_super_info *as = sb->s_fs_info;
 	struct afs_fid fid;
-	struct dentry *root = NULL;
 	struct inode *inode = NULL;
 	int ret;
 
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
 		set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
 
 	ret = -ENOMEM;
-	root = d_alloc_root(inode);
-	if (!root)
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
 		goto error;
 
 	sb->s_d_op = &afs_fs_dentry_operations;
-	sb->s_root = root;
 
 	_leave(" = 0");
 	return 0;
 
 error:
-	iput(inode);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/aio.c b/fs/aio.c
index 969beb0e223..c7acaf3167a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 
 	info->nr = nr_events;		/* trusted copy */
 
-	ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+	ring = kmap_atomic(info->ring_pages[0]);
 	ring->nr = nr_events;	/* user copy */
 	ring->id = ctx->user_id;
 	ring->head = ring->tail = 0;
@@ -168,47 +168,38 @@ static int aio_setup_ring(struct kioctx *ctx)
 	ring->compat_features = AIO_RING_COMPAT_FEATURES;
 	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
 	ring->header_length = sizeof(struct aio_ring);
-	kunmap_atomic(ring, KM_USER0);
+	kunmap_atomic(ring);
 
 	return 0;
 }
 
 
 /* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(, km).  Release the pointer with put_aio_ring_event();
+ * kmap_atomic().  Release the pointer with put_aio_ring_event();
  */
 #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-#define aio_ring_event(info, nr, km) ({					\
+#define aio_ring_event(info, nr) ({					\
 	unsigned pos = (nr) + AIO_EVENTS_OFFSET;			\
 	struct io_event *__event;					\
 	__event = kmap_atomic(						\
-			(info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \
+			(info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
 	__event += pos % AIO_EVENTS_PER_PAGE;				\
 	__event;							\
 })
 
-#define put_aio_ring_event(event, km) do {	\
+#define put_aio_ring_event(event) do {		\
 	struct io_event *__event = (event);	\
 	(void)__event;				\
-	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
+	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
 } while(0)
 
 static void ctx_rcu_free(struct rcu_head *head)
 {
 	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
-	unsigned nr_events = ctx->max_reqs;
-
 	kmem_cache_free(kioctx_cachep, ctx);
-
-	if (nr_events) {
-		spin_lock(&aio_nr_lock);
-		BUG_ON(aio_nr - nr_events > aio_nr);
-		aio_nr -= nr_events;
-		spin_unlock(&aio_nr_lock);
-	}
 }
 
 /* __put_ioctx
@@ -217,23 +208,23 @@ static void ctx_rcu_free(struct rcu_head *head)
  */
 static void __put_ioctx(struct kioctx *ctx)
 {
+	unsigned nr_events = ctx->max_reqs;
 	BUG_ON(ctx->reqs_active);
 
-	cancel_delayed_work(&ctx->wq);
-	cancel_work_sync(&ctx->wq.work);
+	cancel_delayed_work_sync(&ctx->wq);
 	aio_free_ring(ctx);
 	mmdrop(ctx->mm);
 	ctx->mm = NULL;
+	if (nr_events) {
+		spin_lock(&aio_nr_lock);
+		BUG_ON(aio_nr - nr_events > aio_nr);
+		aio_nr -= nr_events;
+		spin_unlock(&aio_nr_lock);
+	}
 	pr_debug("__put_ioctx: freeing %p\n", ctx);
 	call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
 
-static inline void get_ioctx(struct kioctx *kioctx)
-{
-	BUG_ON(atomic_read(&kioctx->users) <= 0);
-	atomic_inc(&kioctx->users);
-}
-
 static inline int try_get_ioctx(struct kioctx *kioctx)
 {
 	return atomic_inc_not_zero(&kioctx->users);
@@ -253,7 +244,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 {
 	struct mm_struct *mm;
 	struct kioctx *ctx;
-	int did_sync = 0;
+	int err = -ENOMEM;
 
 	/* Prevent overflows */
 	if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -262,7 +253,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if ((unsigned long)nr_events > aio_max_nr)
+	if (!nr_events || (unsigned long)nr_events > aio_max_nr)
 		return ERR_PTR(-EAGAIN);
 
 	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -273,7 +264,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	mm = ctx->mm = current->mm;
 	atomic_inc(&mm->mm_count);
 
-	atomic_set(&ctx->users, 1);
+	atomic_set(&ctx->users, 2);
 	spin_lock_init(&ctx->ctx_lock);
 	spin_lock_init(&ctx->ring_info.ring_lock);
 	init_waitqueue_head(&ctx->wait);
@@ -286,25 +277,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		goto out_freectx;
 
 	/* limit the number of system wide aios */
-	do {
-		spin_lock_bh(&aio_nr_lock);
-		if (aio_nr + nr_events > aio_max_nr ||
-		    aio_nr + nr_events < aio_nr)
-			ctx->max_reqs = 0;
-		else
-			aio_nr += ctx->max_reqs;
-		spin_unlock_bh(&aio_nr_lock);
-		if (ctx->max_reqs || did_sync)
-			break;
-
-		/* wait for rcu callbacks to have completed before giving up */
-		synchronize_rcu();
-		did_sync = 1;
-		ctx->max_reqs = nr_events;
-	} while (1);
-
-	if (ctx->max_reqs == 0)
+	spin_lock(&aio_nr_lock);
+	if (aio_nr + nr_events > aio_max_nr ||
+	    aio_nr + nr_events < aio_nr) {
+		spin_unlock(&aio_nr_lock);
 		goto out_cleanup;
+	}
+	aio_nr += ctx->max_reqs;
+	spin_unlock(&aio_nr_lock);
 
 	/* now link into global list. */
 	spin_lock(&mm->ioctx_lock);
@@ -316,16 +296,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	return ctx;
 
 out_cleanup:
-	__put_ioctx(ctx);
-	return ERR_PTR(-EAGAIN);
-
+	err = -EAGAIN;
+	aio_free_ring(ctx);
 out_freectx:
 	mmdrop(mm);
 	kmem_cache_free(kioctx_cachep, ctx);
-	ctx = ERR_PTR(-ENOMEM);
-
-	dprintk("aio: error allocating ioctx %p\n", ctx);
-	return ctx;
+	dprintk("aio: error allocating ioctx %d\n", err);
+	return ERR_PTR(err);
 }
 
 /* aio_cancel_all
@@ -413,10 +390,6 @@ void exit_aio(struct mm_struct *mm)
 		aio_cancel_all(ctx);
 
 		wait_for_all_aios(ctx);
-		/*
-		 * Ensure we don't leave the ctx on the aio_wq
-		 */
-		cancel_work_sync(&ctx->wq.work);
 
 		if (1 != atomic_read(&ctx->users))
 			printk(KERN_DEBUG
@@ -490,6 +463,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
 		kmem_cache_free(kiocb_cachep, req);
 		ctx->reqs_active--;
 	}
+	if (unlikely(!ctx->reqs_active && ctx->dead))
+		wake_up_all(&ctx->wait);
 	spin_unlock_irq(&ctx->ctx_lock);
 }
 
@@ -607,11 +582,16 @@ static void aio_fput_routine(struct work_struct *data)
 			fput(req->ki_filp);
 
 		/* Link the iocb into the context's free list */
+		rcu_read_lock();
 		spin_lock_irq(&ctx->ctx_lock);
 		really_put_req(ctx, req);
+		/*
+		 * at that point ctx might've been killed, but actual
+		 * freeing is RCU'd
+		 */
 		spin_unlock_irq(&ctx->ctx_lock);
+		rcu_read_unlock();
 
-		put_ioctx(ctx);
 		spin_lock_irq(&fput_lock);
 	}
 	spin_unlock_irq(&fput_lock);
@@ -642,7 +622,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	 * this function will be executed w/out any aio kthread wakeup.
 	 */
 	if (unlikely(!fput_atomic(req->ki_filp))) {
-		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
 		spin_unlock(&fput_lock);
@@ -920,7 +899,7 @@ static void aio_kick_handler(struct work_struct *work)
  	unuse_mm(mm);
 	set_fs(oldfs);
 	/*
-	 * we're in a worker thread already, don't use queue_delayed_work,
+	 * we're in a worker thread already; no point using non-zero delay
 	 */
 	if (requeue)
 		queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1019,10 +998,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
 	if (kiocbIsCancelled(iocb))
 		goto put_rq;
 
-	ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+	ring = kmap_atomic(info->ring_pages[0]);
 
 	tail = info->tail;
-	event = aio_ring_event(info, tail, KM_IRQ0);
+	event = aio_ring_event(info, tail);
 	if (++tail >= info->nr)
 		tail = 0;
 
@@ -1043,8 +1022,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
 	info->tail = tail;
 	ring->tail = tail;
 
-	put_aio_ring_event(event, KM_IRQ0);
-	kunmap_atomic(ring, KM_IRQ1);
+	put_aio_ring_event(event);
+	kunmap_atomic(ring);
 
 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
 
@@ -1089,7 +1068,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
 	unsigned long head;
 	int ret = 0;
 
-	ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+	ring = kmap_atomic(info->ring_pages[0]);
 	dprintk("in aio_read_evt h%lu t%lu m%lu\n",
 		 (unsigned long)ring->head, (unsigned long)ring->tail,
 		 (unsigned long)ring->nr);
@@ -1101,18 +1080,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
 
 	head = ring->head % info->nr;
 	if (head != ring->tail) {
-		struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+		struct io_event *evp = aio_ring_event(info, head);
 		*ent = *evp;
 		head = (head + 1) % info->nr;
 		smp_mb(); /* finish reading the event before updatng the head */
 		ring->head = head;
 		ret = 1;
-		put_aio_ring_event(evp, KM_USER1);
+		put_aio_ring_event(evp);
 	}
 	spin_unlock(&info->ring_lock);
 
 out:
-	kunmap_atomic(ring, KM_USER0);
+	kunmap_atomic(ring);
 	dprintk("leaving aio_read_evt: %d  h%lu t%lu\n", ret,
 		 (unsigned long)ring->head, (unsigned long)ring->tail);
 	return ret;
@@ -1336,10 +1315,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
 		ret = put_user(ioctx->user_id, ctxp);
-		if (!ret)
+		if (!ret) {
+			put_ioctx(ioctx);
 			return 0;
-
-		get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+		}
 		io_destroy(ioctx);
 	}
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907..28d39fb84ae 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
 	.d_dname	= anon_inodefs_dname,
 };
 
-static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
-				int flags, const char *dev_name, void *data)
-{
-	return mount_pseudo(fs_type, "anon_inode:", NULL,
-			&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
-}
-
-static struct file_system_type anon_inode_fs_type = {
-	.name		= "anon_inodefs",
-	.mount		= anon_inodefs_mount,
-	.kill_sb	= kill_anon_super,
-};
-
 /*
  * nop .set_page_dirty method so that people can use .page_mkwrite on
  * anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
 	.set_page_dirty = anon_set_page_dirty,
 };
 
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+static struct inode *anon_inode_mkinode(struct super_block *s)
+{
+	struct inode *inode = new_inode_pseudo(s);
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_ino = get_next_ino();
+	inode->i_fop = &anon_inode_fops;
+
+	inode->i_mapping->a_ops = &anon_aops;
+
+	/*
+	 * Mark the inode dirty from the very beginning,
+	 * that way it will never be moved to the dirty
+	 * list because mark_inode_dirty() will think
+	 * that it already _is_ on the dirty list.
+	 */
+	inode->i_state = I_DIRTY;
+	inode->i_mode = S_IRUSR | S_IWUSR;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_flags |= S_PRIVATE;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	return inode;
+}
+
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
+{
+	struct dentry *root;
+	root = mount_pseudo(fs_type, "anon_inode:", NULL,
+			&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
+	if (!IS_ERR(root)) {
+		struct super_block *s = root->d_sb;
+		anon_inode_inode = anon_inode_mkinode(s);
+		if (IS_ERR(anon_inode_inode)) {
+			dput(root);
+			deactivate_locked_super(s);
+			root = ERR_CAST(anon_inode_inode);
+		}
+	}
+	return root;
+}
+
+static struct file_system_type anon_inode_fs_type = {
+	.name		= "anon_inodefs",
+	.mount		= anon_inodefs_mount,
+	.kill_sb	= kill_anon_super,
+};
+
 /**
  * anon_inode_getfile - creates a new file instance by hooking it up to an
  *                      anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
 
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(void)
-{
-	struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
-
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-
-	inode->i_ino = get_next_ino();
-	inode->i_fop = &anon_inode_fops;
-
-	inode->i_mapping->a_ops = &anon_aops;
-
-	/*
-	 * Mark the inode dirty from the very beginning,
-	 * that way it will never be moved to the dirty
-	 * list because mark_inode_dirty() will think
-	 * that it already _is_ on the dirty list.
-	 */
-	inode->i_state = I_DIRTY;
-	inode->i_mode = S_IRUSR | S_IWUSR;
-	inode->i_uid = current_fsuid();
-	inode->i_gid = current_fsgid();
-	inode->i_flags |= S_PRIVATE;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	return inode;
-}
-
 static int __init anon_inode_init(void)
 {
 	int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
 		error = PTR_ERR(anon_inode_mnt);
 		goto err_unregister_filesystem;
 	}
-	anon_inode_inode = anon_inode_mkinode();
-	if (IS_ERR(anon_inode_inode)) {
-		error = PTR_ERR(anon_inode_inode);
-		goto err_mntput;
-	}
-
 	return 0;
 
-err_mntput:
-	kern_unmount(anon_inode_mnt);
 err_unregister_filesystem:
 	unregister_filesystem(&anon_inode_fs_type);
 err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d8d8e7ba6a1..eb1cc92cd67 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,6 +110,7 @@ struct autofs_sb_info {
 	int sub_version;
 	int min_proto;
 	int max_proto;
+	int compat_daemon;
 	unsigned long exp_timeout;
 	unsigned int type;
 	int reghost_enabled;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 76741d8d778..85f1fcdb30e 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 		sbi->pipefd = pipefd;
 		sbi->pipe = pipe;
 		sbi->catatonic = 0;
+		sbi->compat_daemon = is_compat_task();
 	}
 out:
 	mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4ea..1feb68ecef9 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
 	/* Negative dentry - try next */
 	if (!simple_positive(q)) {
 		spin_unlock(&p->d_lock);
+		lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
 		p = q;
 		goto again;
 	}
@@ -186,6 +187,7 @@ again:
 	/* Negative dentry - try next */
 	if (!simple_positive(ret)) {
 		spin_unlock(&p->d_lock);
+		lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
 		p = ret;
 		goto again;
 	}
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b405..cddc74b9cdb 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
 {
 	int err;
 
+	autofs_dev_ioctl_init();
+
 	err = register_filesystem(&autofs_fs_type);
 	if (err)
-		return err;
-
-	autofs_dev_ioctl_init();
+		autofs_dev_ioctl_exit();
 
 	return err;
 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e16980b00b8..d8dc002e9cc 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
 #include <linux/parser.h>
 #include <linux/bitops.h>
 #include <linux/magic.h>
+#include <linux/compat.h>
 #include "autofs_i.h"
 #include <linux/module.h>
 
@@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	set_autofs_type_indirect(&sbi->type);
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
+	sbi->compat_daemon = is_compat_task();
 	mutex_init(&sbi->wq_mutex);
 	mutex_init(&sbi->pipe_mutex);
 	spin_lock_init(&sbi->fs_lock);
@@ -245,12 +247,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	if (!ino)
 		goto fail_free;
 	root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
-	if (!root_inode)
-		goto fail_ino;
-
-	root = d_alloc_root(root_inode);
+	root = d_make_root(root_inode);
 	if (!root)
-		goto fail_iput;
+		goto fail_ino;
 	pipe = NULL;
 
 	root->d_fsdata = ino;
@@ -315,9 +314,6 @@ fail_fput:
 fail_dput:
 	dput(root);
 	goto fail_free;
-fail_iput:
-	printk("autofs: get root dentry failed\n");
-	iput(root_inode);
 fail_ino:
 	kfree(ino);
 fail_free:
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7..9c098db4334 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi,
 
 	return (bytes > 0);
 }
-	
+
+/*
+ * The autofs_v5 packet was misdesigned.
+ *
+ * The packets are identical on x86-32 and x86-64, but have different
+ * alignment. Which means that 'sizeof()' will give different results.
+ * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
+ */
+static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
+{
+	size_t pktsz = sizeof(struct autofs_v5_packet);
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+	if (sbi->compat_daemon > 0)
+		pktsz -= 4;
+#endif
+	return pktsz;
+}
+
 static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 				 struct autofs_wait_queue *wq,
 				 int type)
@@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	{
 		struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
 
-		pktsz = sizeof(*packet);
-
+		pktsz = autofs_v5_packet_size(sbi);
 		packet->wait_queue_token = wq->wait_queue_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 6e6d536767f..e18da23d42b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -852,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 		ret = PTR_ERR(root);
 		goto unacquire_priv_sbp;
 	}
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
-		iput(root);
 		befs_error(sb, "get root inode failed");
 		goto unacquire_priv_sbp;
 	}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index b0391bc402b..e23dc7c8b88 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -367,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		ret = PTR_ERR(inode);
 		goto out2;
 	}
-	s->s_root = d_alloc_root(inode);
+	s->s_root = d_make_root(inode);
 	if (!s->s_root) {
-		iput(inode);
 		ret = -ENOMEM;
 		goto out2;
 	}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26a..4d5e6d26578 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -259,8 +259,14 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 
+	retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
+	if (retval < 0) {
+		/* Someone check-me: is this error path enough? */
+		send_sig(SIGKILL, current, 0);
+		return retval;
+	}
+
 	install_exec_creds(bprm);
- 	current->flags &= ~PF_FORKNOEXEC;
 
 	if (N_MAGIC(ex) == OMAGIC) {
 		unsigned long text_addr, map_size;
@@ -352,13 +358,6 @@ beyond_if:
 		return retval;
 	}
 
-	retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
-	if (retval < 0) { 
-		/* Someone check-me: is this error path enough? */ 
-		send_sig(SIGKILL, current, 0); 
-		return retval;
-	}
-
 	current->mm->start_stack =
 		(unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
 #ifdef __alpha__
@@ -454,7 +453,8 @@ out:
 
 static int __init init_aout_binfmt(void)
 {
-	return register_binfmt(&aout_format);
+	register_binfmt(&aout_format);
+	return 0;
 }
 
 static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d61..81878b78c9d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -712,7 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		goto out_free_dentry;
 
 	/* OK, This is the point of no return */
-	current->flags &= ~PF_FORKNOEXEC;
 	current->mm->def_flags = def_flags;
 
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -934,7 +933,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
 	install_exec_creds(bprm);
-	current->flags &= ~PF_FORKNOEXEC;
 	retval = create_elf_tables(bprm, &loc->elf_ex,
 			  load_addr, interp_load_addr);
 	if (retval < 0) {
@@ -1421,7 +1419,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 	for (i = 1; i < view->n; ++i) {
 		const struct user_regset *regset = &view->regsets[i];
 		do_thread_regset_writeback(t->task, regset);
-		if (regset->core_note_type &&
+		if (regset->core_note_type && regset->get &&
 		    (!regset->active || regset->active(t->task, regset))) {
 			int ret;
 			size_t size = regset->n * regset->size;
@@ -2077,7 +2075,8 @@ out:
 
 static int __init init_elf_binfmt(void)
 {
-	return register_binfmt(&elf_format);
+	register_binfmt(&elf_format);
+	return 0;
 }
 
 static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459fa..c64bf5ee2df 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -91,7 +91,8 @@ static struct linux_binfmt elf_fdpic_format = {
 
 static int __init init_elf_fdpic_binfmt(void)
 {
-	return register_binfmt(&elf_fdpic_format);
+	register_binfmt(&elf_fdpic_format);
+	return 0;
 }
 
 static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +335,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	current->mm->context.exec_fdpic_loadmap = 0;
 	current->mm->context.interp_fdpic_loadmap = 0;
 
-	current->flags &= ~PF_FORKNOEXEC;
-
 #ifdef CONFIG_MMU
 	elf_fdpic_arch_lay_out_mm(&exec_params,
 				  &interp_params,
@@ -413,7 +412,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 #endif
 
 	install_exec_creds(bprm);
-	current->flags &= ~PF_FORKNOEXEC;
 	if (create_elf_fdpic_tables(bprm, current->mm,
 				    &exec_params, &interp_params) < 0)
 		goto error_kill;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9b..2790c7e1912 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
 
 static int __init init_em86_binfmt(void)
 {
-	return register_binfmt(&em86_format);
+	register_binfmt(&em86_format);
+	return 0;
 }
 
 static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed77..04f61f0bdfd 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -902,7 +902,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 						libinfo.lib_list[j].start_data:UNLOADED_LIB;
 
 	install_exec_creds(bprm);
- 	current->flags &= ~PF_FORKNOEXEC;
 
 	set_binfmt(&flat_format);
 
@@ -950,7 +949,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 static int __init init_flat_binfmt(void)
 {
-	return register_binfmt(&flat_format);
+	register_binfmt(&flat_format);
+	return 0;
 }
 
 /****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a9198dfd5f8..1ffb60355ca 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -726,11 +726,8 @@ static struct file_system_type bm_fs_type = {
 static int __init init_misc_binfmt(void)
 {
 	int err = register_filesystem(&bm_fs_type);
-	if (!err) {
-		err = insert_binfmt(&misc_format);
-		if (err)
-			unregister_filesystem(&bm_fs_type);
-	}
+	if (!err)
+		insert_binfmt(&misc_format);
 	return err;
 }
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591..d3b8c1f6315 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
 
 static int __init init_script_binfmt(void)
 {
-	return register_binfmt(&script_format);
+	register_binfmt(&script_format);
+	return 0;
 }
 
 static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b..e4fc746629a 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -225,7 +225,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		goto out_free;
 
 	/* OK, This is the point of no return */
-	current->flags &= ~PF_FORKNOEXEC;
 	current->personality = PER_HPUX;
 	setup_new_exec(bprm);
 
@@ -289,7 +288,8 @@ static int load_som_library(struct file *f)
 
 static int __init init_som_binfmt(void)
 {
-	return register_binfmt(&som_format);
+	register_binfmt(&som_format);
+	return 0;
 }
 
 static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917c..e85c04b9f61 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
 	bix.sector_size = bi->sector_size;
 
 	bio_for_each_segment(bv, bio, i) {
-		void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+		void *kaddr = kmap_atomic(bv->bv_page);
 		bix.data_buf = kaddr + bv->bv_offset;
 		bix.data_size = bv->bv_len;
 		bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
 		total += sectors * bi->tuple_size;
 		BUG_ON(total > bio->bi_integrity->bip_size);
 
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 }
 
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
 	bix.sector_size = bi->sector_size;
 
 	bio_for_each_segment(bv, bio, i) {
-		void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+		void *kaddr = kmap_atomic(bv->bv_page);
 		bix.data_buf = kaddr + bv->bv_offset;
 		bix.data_size = bv->bv_len;
 		bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
 		ret = bi->verify_fn(&bix);
 
 		if (ret) {
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			return ret;
 		}
 
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
 		total += sectors * bi->tuple_size;
 		BUG_ON(total > bio->bi_integrity->bip_size);
 
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 
 	return ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0e575d1304b..5e9f198f771 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			 * The latter is necessary to prevent ghost
 			 * partitions on a removed medium.
 			 */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(disk, bdev);
+			}
 			if (ret)
 				goto out_clear;
 		} else {
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
 			/* the same as first opener case, read comment there */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(bdev->bd_disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(bdev->bd_disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(bdev->bd_disk, bdev);
+			}
 			if (ret)
 				goto out_unlock_bdev;
 		}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 633c701a287..0436c12da8c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	struct btrfs_key info_key = { 0 };
 	struct btrfs_delayed_ref_root *delayed_refs = NULL;
-	struct btrfs_delayed_ref_head *head = NULL;
+	struct btrfs_delayed_ref_head *head;
 	int info_level = 0;
 	int ret;
 	struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 	 * at a specified point in time
 	 */
 again:
+	head = NULL;
+
 	ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
@@ -635,8 +637,10 @@ again:
 			goto again;
 		}
 		ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
-		if (ret)
+		if (ret) {
+			spin_unlock(&delayed_refs->lock);
 			goto out;
+		}
 	}
 	spin_unlock(&delayed_refs->lock);
 
@@ -892,6 +896,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 		if (eb != eb_in)
 			free_extent_buffer(eb);
 		ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+		if (ret > 0)
+			ret = -ENOENT;
 		if (ret)
 			break;
 		next_inum = found_key.offset;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b669a7d8e49..c053e90f200 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "extent_io.h"
-#include "disk-io.h"
 #include "volumes.h"
 #include "print-tree.h"
 #include "locking.h"
@@ -644,7 +643,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
 static int btrfsic_process_superblock(struct btrfsic_state *state,
 				      struct btrfs_fs_devices *fs_devices)
 {
-	int ret;
+	int ret = 0;
 	struct btrfs_super_block *selected_super;
 	struct list_head *dev_head = &fs_devices->devices;
 	struct btrfs_device *device;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d..b805afb37fa 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
 		page = cb->compressed_pages[i];
 		csum = ~(u32)0;
 
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
 		btrfs_csum_final(csum, (char *)&csum);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 
 		if (csum != *cb_sum) {
 			printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			if (zero_offset) {
 				int zeros;
 				zeros = PAGE_CACHE_SIZE - zero_offset;
-				userpage = kmap_atomic(page, KM_USER0);
+				userpage = kmap_atomic(page);
 				memset(userpage + zero_offset, 0, zeros);
 				flush_dcache_page(page);
-				kunmap_atomic(userpage, KM_USER0);
+				kunmap_atomic(userpage);
 			}
 		}
 
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				   page_offset(bio->bi_io_vec->bv_page),
 				   PAGE_CACHE_SIZE);
 	read_unlock(&em_tree->lock);
+	if (!em)
+		return -EIO;
 
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -991,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		bytes = min(PAGE_CACHE_SIZE - *pg_offset,
 			    PAGE_CACHE_SIZE - buf_offset);
 		bytes = min(bytes, working_bytes);
-		kaddr = kmap_atomic(page_out, KM_USER0);
+		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		flush_dcache_page(page_out);
 
 		*pg_offset += bytes;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 27ebe61d3cc..80b6486fd5e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -886,7 +886,7 @@ struct btrfs_block_rsv {
 	u64 reserved;
 	struct btrfs_space_info *space_info;
 	spinlock_t lock;
-	unsigned int full:1;
+	unsigned int full;
 };
 
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 811d9f918b1..534266fe505 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2260,6 +2260,12 @@ int open_ctree(struct super_block *sb,
 		goto fail_sb_buffer;
 	}
 
+	if (sectorsize < PAGE_SIZE) {
+		printk(KERN_WARNING "btrfs: Incompatible sector size "
+		       "found on %s\n", sb->s_id);
+		goto fail_sb_buffer;
+	}
+
 	mutex_lock(&fs_info->chunk_mutex);
 	ret = btrfs_read_sys_array(tree_root);
 	mutex_unlock(&fs_info->chunk_mutex);
@@ -2301,6 +2307,12 @@ int open_ctree(struct super_block *sb,
 
 	btrfs_close_extra_devices(fs_devices);
 
+	if (!fs_devices->latest_bdev) {
+		printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+		       sb->s_id);
+		goto fail_tree_roots;
+	}
+
 retry_root_backup:
 	blocksize = btrfs_level_size(tree_root,
 				     btrfs_super_root_level(disk_super));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 283af7a676a..37e0a800d34 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3312,7 +3312,8 @@ commit_trans:
 	}
 	data_sinfo->bytes_may_use += bytes;
 	trace_btrfs_space_reservation(root->fs_info, "space_info",
-				      (u64)data_sinfo, bytes, 1);
+				      (u64)(unsigned long)data_sinfo,
+				      bytes, 1);
 	spin_unlock(&data_sinfo->lock);
 
 	return 0;
@@ -3333,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
 	spin_lock(&data_sinfo->lock);
 	data_sinfo->bytes_may_use -= bytes;
 	trace_btrfs_space_reservation(root->fs_info, "space_info",
-				      (u64)data_sinfo, bytes, 0);
+				      (u64)(unsigned long)data_sinfo,
+				      bytes, 0);
 	spin_unlock(&data_sinfo->lock);
 }
 
@@ -3611,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root,
 	if (space_info != delayed_rsv->space_info)
 		return -ENOSPC;
 
+	spin_lock(&space_info->lock);
 	spin_lock(&delayed_rsv->lock);
-	if (delayed_rsv->size < bytes) {
+	if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
 		spin_unlock(&delayed_rsv->lock);
+		spin_unlock(&space_info->lock);
 		return -ENOSPC;
 	}
 	spin_unlock(&delayed_rsv->lock);
+	spin_unlock(&space_info->lock);
 
 commit:
 	trans = btrfs_join_transaction(root);
@@ -3695,9 +3700,9 @@ again:
 		if (used + orig_bytes <= space_info->total_bytes) {
 			space_info->bytes_may_use += orig_bytes;
 			trace_btrfs_space_reservation(root->fs_info,
-						      "space_info",
-						      (u64)space_info,
-						      orig_bytes, 1);
+					      "space_info",
+					      (u64)(unsigned long)space_info,
+					      orig_bytes, 1);
 			ret = 0;
 		} else {
 			/*
@@ -3766,9 +3771,9 @@ again:
 		if (used + num_bytes < space_info->total_bytes + avail) {
 			space_info->bytes_may_use += orig_bytes;
 			trace_btrfs_space_reservation(root->fs_info,
-						      "space_info",
-						      (u64)space_info,
-						      orig_bytes, 1);
+					      "space_info",
+					      (u64)(unsigned long)space_info,
+					      orig_bytes, 1);
 			ret = 0;
 		} else {
 			wait_ordered = true;
@@ -3913,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
 			spin_lock(&space_info->lock);
 			space_info->bytes_may_use -= num_bytes;
 			trace_btrfs_space_reservation(fs_info, "space_info",
-						      (u64)space_info,
-						      num_bytes, 0);
+					      (u64)(unsigned long)space_info,
+					      num_bytes, 0);
 			space_info->reservation_progress++;
 			spin_unlock(&space_info->lock);
 		}
@@ -4105,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 	num_bytes += div64_u64(data_used + meta_used, 50);
 
 	if (num_bytes * 3 > meta_used)
-		num_bytes = div64_u64(meta_used, 3);
+		num_bytes = div64_u64(meta_used, 3) * 2;
 
 	return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
 }
@@ -4132,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 		block_rsv->reserved += num_bytes;
 		sinfo->bytes_may_use += num_bytes;
 		trace_btrfs_space_reservation(fs_info, "space_info",
-					      (u64)sinfo, num_bytes, 1);
+				      (u64)(unsigned long)sinfo, num_bytes, 1);
 	}
 
 	if (block_rsv->reserved >= block_rsv->size) {
 		num_bytes = block_rsv->reserved - block_rsv->size;
 		sinfo->bytes_may_use -= num_bytes;
 		trace_btrfs_space_reservation(fs_info, "space_info",
-					      (u64)sinfo, num_bytes, 0);
+				      (u64)(unsigned long)sinfo, num_bytes, 0);
 		sinfo->reservation_progress++;
 		block_rsv->reserved = block_rsv->size;
 		block_rsv->full = 1;
@@ -4192,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 	if (!trans->bytes_reserved)
 		return;
 
-	trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans,
+	trace_btrfs_space_reservation(root->fs_info, "transaction",
+				      (u64)(unsigned long)trans,
 				      trans->bytes_reserved, 0);
 	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 	trans->bytes_reserved = 0;
@@ -4710,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
 			space_info->bytes_reserved += num_bytes;
 			if (reserve == RESERVE_ALLOC) {
 				trace_btrfs_space_reservation(cache->fs_info,
-							      "space_info",
-							      (u64)space_info,
-							      num_bytes, 0);
+					      "space_info",
+					      (u64)(unsigned long)space_info,
+					      num_bytes, 0);
 				space_info->bytes_may_use -= num_bytes;
 			}
 		}
@@ -7886,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 	u64 start;
 	u64 end;
 	u64 trimmed = 0;
+	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 	int ret = 0;
 
-	cache = btrfs_lookup_block_group(fs_info, range->start);
+	/*
+	 * try to trim all FS space, our block group may start from non-zero.
+	 */
+	if (range->len == total_bytes)
+		cache = btrfs_lookup_first_block_group(fs_info, range->start);
+	else
+		cache = btrfs_lookup_block_group(fs_info, range->start);
 
 	while (cache) {
 		if (cache->key.objectid >= (range->start + range->len)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fcf77e1ded4..2862454bcdb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -513,6 +513,15 @@ hit_next:
 	WARN_ON(state->end < start);
 	last_end = state->end;
 
+	if (state->end < end && !need_resched())
+		next_node = rb_next(&state->rb_node);
+	else
+		next_node = NULL;
+
+	/* the state doesn't have the wanted bits, go ahead */
+	if (!(state->state & bits))
+		goto next;
+
 	/*
 	 *     | ---- desired range ---- |
 	 *  | state | or
@@ -565,20 +574,15 @@ hit_next:
 		goto out;
 	}
 
-	if (state->end < end && prealloc && !need_resched())
-		next_node = rb_next(&state->rb_node);
-	else
-		next_node = NULL;
-
 	set |= clear_state_bit(tree, state, &bits, wake);
+next:
 	if (last_end == (u64)-1)
 		goto out;
 	start = last_end + 1;
 	if (start <= end && next_node) {
 		state = rb_entry(next_node, struct extent_state,
 				 rb_node);
-		if (state->start == start)
-			goto hit_next;
+		goto hit_next;
 	}
 	goto search_again;
 
@@ -961,8 +965,6 @@ hit_next:
 
 		set_state_bits(tree, state, &bits);
 		clear_state_bit(tree, state, &clear_bits, 0);
-
-		merge_state(tree, state);
 		if (last_end == (u64)-1)
 			goto out;
 
@@ -1007,7 +1009,6 @@ hit_next:
 		if (state->end <= end) {
 			set_state_bits(tree, state, &bits);
 			clear_state_bit(tree, state, &clear_bits, 0);
-			merge_state(tree, state);
 			if (last_end == (u64)-1)
 				goto out;
 			start = last_end + 1;
@@ -1068,8 +1069,6 @@ hit_next:
 
 		set_state_bits(tree, prealloc, &bits);
 		clear_state_bit(tree, prealloc, &clear_bits, 0);
-
-		merge_state(tree, prealloc);
 		prealloc = NULL;
 		goto out;
 	}
@@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
 		 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
 		 failrec->this_mirror, num_copies, failrec->in_validation);
 
-	tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror,
-					failrec->bio_flags, 0);
-	return 0;
+	ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
+					 failrec->this_mirror,
+					 failrec->bio_flags, 0);
+	return ret;
 }
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
 
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+{
+	int uptodate = (err == 0);
+	struct extent_io_tree *tree;
+	int ret;
+
+	tree = &BTRFS_I(page->mapping->host)->io_tree;
+
+	if (tree->ops && tree->ops->writepage_end_io_hook) {
+		ret = tree->ops->writepage_end_io_hook(page, start,
+					       end, NULL, uptodate);
+		if (ret)
+			uptodate = 0;
+	}
+
+	if (!uptodate && tree->ops &&
+	    tree->ops->writepage_io_failed_hook) {
+		ret = tree->ops->writepage_io_failed_hook(NULL, page,
+						 start, end, NULL);
+		/* Writeback already completed */
+		if (ret == 0)
+			return 1;
+	}
+
+	if (!uptodate) {
+		clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
+		ClearPageUptodate(page);
+		SetPageError(page);
+	}
+	return 0;
+}
+
 /*
  * after a writepage IO is done, we need to:
  * clear the uptodate bits on error
@@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
  */
 static void end_bio_extent_writepage(struct bio *bio, int err)
 {
-	int uptodate = err == 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct extent_io_tree *tree;
 	u64 start;
 	u64 end;
 	int whole_page;
-	int ret;
 
 	do {
 		struct page *page = bvec->bv_page;
@@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
 
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
-		if (tree->ops && tree->ops->writepage_end_io_hook) {
-			ret = tree->ops->writepage_end_io_hook(page, start,
-						       end, NULL, uptodate);
-			if (ret)
-				uptodate = 0;
-		}
-
-		if (!uptodate && tree->ops &&
-		    tree->ops->writepage_io_failed_hook) {
-			ret = tree->ops->writepage_io_failed_hook(bio, page,
-							 start, end, NULL);
-			if (ret == 0) {
-				uptodate = (err == 0);
-				continue;
-			}
-		}
 
-		if (!uptodate) {
-			clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
-			ClearPageUptodate(page);
-			SetPageError(page);
-		}
+		if (end_extent_writepage(page, err, start, end))
+			continue;
 
 		if (whole_page)
 			end_page_writeback(page);
@@ -2535,10 +2546,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
 		if (zero_offset) {
 			iosize = PAGE_CACHE_SIZE - zero_offset;
-			userpage = kmap_atomic(page, KM_USER0);
+			userpage = kmap_atomic(page);
 			memset(userpage + zero_offset, 0, iosize);
 			flush_dcache_page(page);
-			kunmap_atomic(userpage, KM_USER0);
+			kunmap_atomic(userpage);
 		}
 	}
 	while (cur <= end) {
@@ -2547,10 +2558,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 			struct extent_state *cached = NULL;
 
 			iosize = PAGE_CACHE_SIZE - pg_offset;
-			userpage = kmap_atomic(page, KM_USER0);
+			userpage = kmap_atomic(page);
 			memset(userpage + pg_offset, 0, iosize);
 			flush_dcache_page(page);
-			kunmap_atomic(userpage, KM_USER0);
+			kunmap_atomic(userpage);
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
 					    &cached, GFP_NOFS);
 			unlock_extent_cached(tree, cur, cur + iosize - 1,
@@ -2596,10 +2607,10 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 			char *userpage;
 			struct extent_state *cached = NULL;
 
-			userpage = kmap_atomic(page, KM_USER0);
+			userpage = kmap_atomic(page);
 			memset(userpage + pg_offset, 0, iosize);
 			flush_dcache_page(page);
-			kunmap_atomic(userpage, KM_USER0);
+			kunmap_atomic(userpage);
 
 			set_extent_uptodate(tree, cur, cur + iosize - 1,
 					    &cached, GFP_NOFS);
@@ -2745,10 +2756,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	if (page->index == end_index) {
 		char *userpage;
 
-		userpage = kmap_atomic(page, KM_USER0);
+		userpage = kmap_atomic(page);
 		memset(userpage + pg_offset, 0,
 		       PAGE_CACHE_SIZE - pg_offset);
-		kunmap_atomic(userpage, KM_USER0);
+		kunmap_atomic(userpage);
 		flush_dcache_page(page);
 	}
 	pg_offset = 0;
@@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 				delalloc_start = delalloc_end + 1;
 				continue;
 			}
-			tree->ops->fill_delalloc(inode, page, delalloc_start,
-						 delalloc_end, &page_started,
-						 &nr_written);
+			ret = tree->ops->fill_delalloc(inode, page,
+						       delalloc_start,
+						       delalloc_end,
+						       &page_started,
+						       &nr_written);
+			BUG_ON(ret);
 			/*
 			 * delalloc_end is already one less than the total
 			 * length, so we don't subtract one from
@@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	if (tree->ops && tree->ops->writepage_start_hook) {
 		ret = tree->ops->writepage_start_hook(page, start,
 						      page_end);
-		if (ret == -EAGAIN) {
-			redirty_page_for_writepage(wbc, page);
+		if (ret) {
+			/* Fixup worker will requeue */
+			if (ret == -EBUSY)
+				wbc->pages_skipped++;
+			else
+				redirty_page_for_writepage(wbc, page);
 			update_nr_written(page, wbc, nr_written);
 			unlock_page(page);
 			ret = 0;
@@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
 			len = end - start + 1;
 			write_lock(&map->lock);
 			em = lookup_extent_mapping(map, start, len);
-			if (IS_ERR_OR_NULL(em)) {
+			if (!em) {
 				write_unlock(&map->lock);
 				break;
 			}
@@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
 	num_pages = num_extent_pages(eb->start, eb->len);
 	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
-	if (eb_straddles_pages(eb)) {
-		clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-				      cached_state, GFP_NOFS);
-	}
+	clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+			      cached_state, GFP_NOFS);
+
 	for (i = 0; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
 		if (page)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc6a042cb6f..cecc3518c12 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -319,4 +319,5 @@ struct btrfs_mapping_tree;
 int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
 			u64 length, u64 logical, struct page *page,
 			int mirror_num);
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 #endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 33a7890b1f4..1195f09761f 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,8 +26,8 @@ struct extent_map {
 	unsigned long flags;
 	struct block_device *bdev;
 	atomic_t refs;
-	unsigned int in_tree:1;
-	unsigned int compress_type:4;
+	unsigned int in_tree;
+	unsigned int compress_type;
 };
 
 struct extent_map_tree {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c7fb3a4247d..078b4fd5450 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -447,13 +447,13 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
 			sums->bytenr = ordered->start;
 		}
 
-		data = kmap_atomic(bvec->bv_page, KM_USER0);
+		data = kmap_atomic(bvec->bv_page);
 		sector_sum->sum = ~(u32)0;
 		sector_sum->sum = btrfs_csum_data(root,
 						  data + bvec->bv_offset,
 						  sector_sum->sum,
 						  bvec->bv_len);
-		kunmap_atomic(data, KM_USER0);
+		kunmap_atomic(data);
 		btrfs_csum_final(sector_sum->sum,
 				 (char *)&sector_sum->sum);
 		sector_sum->bytenr = disk_bytenr;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 859ba2dd889..e8d06b6b919 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode,
 		return -EOPNOTSUPP;
 
 	/*
+	 * Make sure we have enough space before we do the
+	 * allocation.
+	 */
+	ret = btrfs_check_data_free_space(inode, len);
+	if (ret)
+		return ret;
+
+	/*
 	 * wait for ordered IO before we have any locks.  We'll loop again
 	 * below with the locks held.
 	 */
@@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode,
 		if (em->block_start == EXTENT_MAP_HOLE ||
 		    (cur_offset >= inode->i_size &&
 		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-
-			/*
-			 * Make sure we have enough space before we do the
-			 * allocation.
-			 */
-			ret = btrfs_check_data_free_space(inode, last_byte -
-							  cur_offset);
-			if (ret) {
-				free_extent_map(em);
-				break;
-			}
-
 			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
 							last_byte - cur_offset,
 							1 << inode->i_blkbits,
 							offset + len,
 							&alloc_hint);
 
-			/* Let go of our reservation. */
-			btrfs_free_reserved_data_space(inode, last_byte -
-						       cur_offset);
 			if (ret < 0) {
 				free_extent_map(em);
 				break;
@@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 			     &cached_state, GFP_NOFS);
 out:
 	mutex_unlock(&inode->i_mutex);
+	/* Let go of our reservation. */
+	btrfs_free_reserved_data_space(inode, len);
 	return ret;
 }
 
@@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 						     start - root->sectorsize,
 						     root->sectorsize, 0);
 		if (IS_ERR(em)) {
-			ret = -ENXIO;
+			ret = PTR_ERR(em);
 			goto out;
 		}
 		last_end = em->start + em->len;
@@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 	while (1) {
 		em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
 		if (IS_ERR(em)) {
-			ret = -ENXIO;
+			ret = PTR_ERR(em);
 			break;
 		}
 
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c2f20594c9f..b02e379b14c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	spin_lock(&block_group->lock);
 	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
 		spin_unlock(&block_group->lock);
+		btrfs_free_path(path);
 		goto out;
 	}
 	spin_unlock(&block_group->lock);
@@ -1067,7 +1068,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		spin_unlock(&block_group->lock);
 		ret = 0;
 #ifdef DEBUG
-		printk(KERN_ERR "btrfs: failed to write free space cace "
+		printk(KERN_ERR "btrfs: failed to write free space cache "
 		       "for block group %llu\n", block_group->key.objectid);
 #endif
 	}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 213ffa86ce1..ee15d88b33d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 					  trans->bytes_reserved);
 	if (ret)
 		goto out;
-	trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+	trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+				      (u64)(unsigned long)trans,
 				      trans->bytes_reserved, 1);
 again:
 	inode = lookup_free_ino_inode(root, path);
@@ -500,7 +501,8 @@ again:
 out_put:
 	iput(inode);
 out_release:
-	trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+	trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+				      (u64)(unsigned long)trans,
 				      trans->bytes_reserved, 0);
 	btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 32214fe0f7e..3a0b5c1f9d3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -173,9 +173,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 			cur_size = min_t(unsigned long, compressed_size,
 				       PAGE_CACHE_SIZE);
 
-			kaddr = kmap_atomic(cpage, KM_USER0);
+			kaddr = kmap_atomic(cpage);
 			write_extent_buffer(leaf, kaddr, ptr, cur_size);
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 
 			i++;
 			ptr += cur_size;
@@ -187,10 +187,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 		page = find_get_page(inode->i_mapping,
 				     start >> PAGE_CACHE_SHIFT);
 		btrfs_set_file_extent_compression(leaf, ei, 0);
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		offset = start & (PAGE_CACHE_SIZE - 1);
 		write_extent_buffer(leaf, kaddr + offset, ptr, size);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		page_cache_release(page);
 	}
 	btrfs_mark_buffer_dirty(leaf);
@@ -422,10 +422,10 @@ again:
 			 * sending it down to disk
 			 */
 			if (offset) {
-				kaddr = kmap_atomic(page, KM_USER0);
+				kaddr = kmap_atomic(page);
 				memset(kaddr + offset, 0,
 				       PAGE_CACHE_SIZE - offset);
-				kunmap_atomic(kaddr, KM_USER0);
+				kunmap_atomic(kaddr);
 			}
 			will_compress = 1;
 		}
@@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	struct inode *inode;
 	u64 page_start;
 	u64 page_end;
+	int ret;
 
 	fixup = container_of(work, struct btrfs_writepage_fixup, work);
 	page = fixup->page;
@@ -1582,12 +1583,21 @@ again:
 				     page_end, &cached_state, GFP_NOFS);
 		unlock_page(page);
 		btrfs_start_ordered_extent(inode, ordered, 1);
+		btrfs_put_ordered_extent(ordered);
 		goto again;
 	}
 
-	BUG();
+	ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+	if (ret) {
+		mapping_set_error(page->mapping, ret);
+		end_extent_writepage(page, ret, page_start, page_end);
+		ClearPageChecked(page);
+		goto out;
+	 }
+
 	btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
 	ClearPageChecked(page);
+	set_page_dirty(page);
 out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			     &cached_state, GFP_NOFS);
@@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 	fixup->work.func = btrfs_writepage_fixup_worker;
 	fixup->page = page;
 	btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
-	return -EAGAIN;
+	return -EBUSY;
 }
 
 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -1863,7 +1873,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	} else {
 		ret = get_state_private(io_tree, start, &private);
 	}
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	if (ret)
 		goto zeroit;
 
@@ -1872,7 +1882,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	if (csum != private)
 		goto zeroit;
 
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 good:
 	return 0;
 
@@ -1884,7 +1894,7 @@ zeroit:
 		       (unsigned long long)private);
 	memset(kaddr + offset, 1, end - start + 1);
 	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	if (private == 0)
 		return 0;
 	return -EIO;
@@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		ret = btrfs_insert_dir_item(trans, root, name, name_len,
 					    parent_inode, &key,
 					    btrfs_inode_type(inode), index);
-		BUG_ON(ret);
+		if (ret)
+			goto fail_dir_item;
 
 		btrfs_i_size_write(parent_inode, parent_inode->i_size +
 				   name_len * 2);
@@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		ret = btrfs_update_inode(trans, root, parent_inode);
 	}
 	return ret;
+
+fail_dir_item:
+	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
+		u64 local_index;
+		int err;
+		err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+				 key.objectid, root->root_key.objectid,
+				 parent_ino, &local_index, name, name_len);
+
+	} else if (add_backref) {
+		u64 local_index;
+		int err;
+
+		err = btrfs_del_inode_ref(trans, root, name, name_len,
+					  ino, parent_ino, &local_index);
+	}
+	return ret;
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
@@ -4909,12 +4937,12 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
 	if (ret) {
-		char *kaddr = kmap_atomic(page, KM_USER0);
+		char *kaddr = kmap_atomic(page);
 		unsigned long copy_size = min_t(u64,
 				  PAGE_CACHE_SIZE - pg_offset,
 				  max_size - extent_offset);
 		memset(kaddr + pg_offset, 0, copy_size);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 	kfree(tmp);
 	return 0;
@@ -5691,11 +5719,11 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
 			unsigned long flags;
 
 			local_irq_save(flags);
-			kaddr = kmap_atomic(page, KM_IRQ0);
+			kaddr = kmap_atomic(page);
 			csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
 					       csum, bvec->bv_len);
 			btrfs_csum_final(csum, (char *)&csum);
-			kunmap_atomic(kaddr, KM_IRQ0);
+			kunmap_atomic(kaddr);
 			local_irq_restore(flags);
 
 			flush_dcache_page(bvec->bv_page);
@@ -6696,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 	int err;
 	u64 index = 0;
 
-	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
-				new_dirid, S_IFDIR | 0700, &index);
+	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
+				new_dirid, new_dirid,
+				S_IFDIR | (~current_umask() & S_IRWXUGO),
+				&index);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 	inode->i_op = &btrfs_dir_inode_operations;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 03bb62a9ee2..d8b54715c2d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 	int i_done;
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
+	struct extent_io_tree *tree;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 
 	if (isize == 0)
@@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
 					   num_pages << PAGE_CACHE_SHIFT);
 	if (ret)
 		return ret;
-again:
-	ret = 0;
 	i_done = 0;
+	tree = &BTRFS_I(inode)->io_tree;
 
 	/* step one, lock all the pages */
 	for (i = 0; i < num_pages; i++) {
 		struct page *page;
+again:
 		page = find_or_create_page(inode->i_mapping,
-					    start_index + i, mask);
+					   start_index + i, mask);
 		if (!page)
 			break;
 
+		page_start = page_offset(page);
+		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		while (1) {
+			lock_extent(tree, page_start, page_end, GFP_NOFS);
+			ordered = btrfs_lookup_ordered_extent(inode,
+							      page_start);
+			unlock_extent(tree, page_start, page_end, GFP_NOFS);
+			if (!ordered)
+				break;
+
+			unlock_page(page);
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+			lock_page(page);
+		}
+
 		if (!PageUptodate(page)) {
 			btrfs_readpage(NULL, page);
 			lock_page(page);
@@ -893,15 +910,22 @@ again:
 				break;
 			}
 		}
+
 		isize = i_size_read(inode);
 		file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
-		if (!isize || page->index > file_end ||
-		    page->mapping != inode->i_mapping) {
+		if (!isize || page->index > file_end) {
 			/* whoops, we blew past eof, skip this page */
 			unlock_page(page);
 			page_cache_release(page);
 			break;
 		}
+
+		if (page->mapping != inode->i_mapping) {
+			unlock_page(page);
+			page_cache_release(page);
+			goto again;
+		}
+
 		pages[i] = page;
 		i_done++;
 	}
@@ -924,25 +948,6 @@ again:
 	lock_extent_bits(&BTRFS_I(inode)->io_tree,
 			 page_start, page_end - 1, 0, &cached_state,
 			 GFP_NOFS);
-	ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
-	if (ordered &&
-	    ordered->file_offset + ordered->len > page_start &&
-	    ordered->file_offset < page_end) {
-		btrfs_put_ordered_extent(ordered);
-		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-				     page_start, page_end - 1,
-				     &cached_state, GFP_NOFS);
-		for (i = 0; i < i_done; i++) {
-			unlock_page(pages[i]);
-			page_cache_release(pages[i]);
-		}
-		btrfs_wait_ordered_range(inode, page_start,
-					 page_end - page_start);
-		goto again;
-	}
-	if (ordered)
-		btrfs_put_ordered_extent(ordered);
-
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
 			  page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
 			  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
@@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 		goto out;
 	}
 
+	if (name[0] == '.' &&
+	   (namelen == 1 || (name[1] == '.' && namelen == 2))) {
+		ret = -EEXIST;
+		goto out;
+	}
+
 	if (subvol) {
 		ret = btrfs_mksubvol(&file->f_path, name, namelen,
 				     NULL, transid, readonly);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a178f5ebea7..743b86fa4fc 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -411,9 +411,9 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 
 	bytes = min_t(unsigned long, destlen, out_len - start_byte);
 
-	kaddr = kmap_atomic(dest_page, KM_USER0);
+	kaddr = kmap_atomic(dest_page);
 	memcpy(kaddr, workspace->buf + start_byte, bytes);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 out:
 	return ret;
 }
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132..22db04550f6 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -305,7 +305,7 @@ again:
 
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&dev->reada_zones,
-				(unsigned long)zone->end >> PAGE_CACHE_SHIFT,
+				(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
 				zone);
 	spin_unlock(&fs_info->reada_lock);
 
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9770cc5bfb7..390e7102b0f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -591,7 +591,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 	u64 flags = sbio->spag[ix].flags;
 
 	page = sbio->bio->bi_io_vec[ix].bv_page;
-	buffer = kmap_atomic(page, KM_USER0);
+	buffer = kmap_atomic(page);
 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
 		ret = scrub_checksum_data(sbio->sdev,
 					  sbio->spag + ix, buffer);
@@ -603,7 +603,7 @@ static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 	} else {
 		WARN_ON(1);
 	}
-	kunmap_atomic(buffer, KM_USER0);
+	kunmap_atomic(buffer);
 
 	return ret;
 }
@@ -792,7 +792,7 @@ static void scrub_checksum(struct btrfs_work *work)
 	}
 	for (i = 0; i < sbio->count; ++i) {
 		page = sbio->bio->bi_io_vec[i].bv_page;
-		buffer = kmap_atomic(page, KM_USER0);
+		buffer = kmap_atomic(page);
 		flags = sbio->spag[i].flags;
 		logical = sbio->logical + i * PAGE_SIZE;
 		ret = 0;
@@ -807,7 +807,7 @@ static void scrub_checksum(struct btrfs_work *work)
 		} else {
 			WARN_ON(1);
 		}
-		kunmap_atomic(buffer, KM_USER0);
+		kunmap_atomic(buffer);
 		if (ret) {
 			ret = scrub_recheck_error(sbio, i);
 			if (!ret) {
@@ -1367,7 +1367,8 @@ out:
 }
 
 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
-	u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
+	u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
+	u64 dev_offset)
 {
 	struct btrfs_mapping_tree *map_tree =
 		&sdev->dev->dev_root->fs_info->mapping_tree;
@@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
 		goto out;
 
 	for (i = 0; i < map->num_stripes; ++i) {
-		if (map->stripes[i].dev == sdev->dev) {
+		if (map->stripes[i].dev == sdev->dev &&
+		    map->stripes[i].physical == dev_offset) {
 			ret = scrub_stripe(sdev, map, i, chunk_offset, length);
 			if (ret)
 				goto out;
@@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
 			break;
 		}
 		ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
-				  chunk_offset, length);
+				  chunk_offset, length, found_key.offset);
 		btrfs_put_block_group(cache);
 		if (ret)
 			break;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3ce97b217cb..81df3fec6a6 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -629,7 +629,6 @@ static int btrfs_fill_super(struct super_block *sb,
 			    void *data, int silent)
 {
 	struct inode *inode;
-	struct dentry *root_dentry;
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 	struct btrfs_key key;
 	int err;
@@ -660,15 +659,12 @@ static int btrfs_fill_super(struct super_block *sb,
 		goto fail_close;
 	}
 
-	root_dentry = d_alloc_root(inode);
-	if (!root_dentry) {
-		iput(inode);
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
 		err = -ENOMEM;
 		goto fail_close;
 	}
 
-	sb->s_root = root_dentry;
-
 	save_mount_options(sb, data);
 	cleancache_init_fs(sb);
 	sb->s_flags |= MS_ACTIVE;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 287a6728b1a..04b77e3ceb7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -327,7 +327,8 @@ again:
 
 	if (num_bytes) {
 		trace_btrfs_space_reservation(root->fs_info, "transaction",
-					      (u64)h, num_bytes, 1);
+					      (u64)(unsigned long)h,
+					      num_bytes, 1);
 		h->block_rsv = &root->fs_info->trans_block_rsv;
 		h->bytes_reserved = num_bytes;
 	}
@@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 				dentry->d_name.name, dentry->d_name.len,
 				parent_inode, &key,
 				BTRFS_FT_DIR, index);
-	BUG_ON(ret);
+	if (ret) {
+		pending->error = -EEXIST;
+		dput(parent);
+		goto fail;
+	}
 
 	btrfs_i_size_write(parent_inode, parent_inode->i_size +
 					 dentry->d_name.len * 2);
@@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_pending_snapshot *pending;
 	struct list_head *head = &trans->transaction->pending_snapshots;
-	int ret;
 
-	list_for_each_entry(pending, head, list) {
-		ret = create_pending_snapshot(trans, fs_info, pending);
-		BUG_ON(ret);
-	}
+	list_for_each_entry(pending, head, list)
+		create_pending_snapshot(trans, fs_info, pending);
 	return 0;
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b4e2af7954..ef41f285a47 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 {
 	struct btrfs_device *device, *next;
 
+	struct block_device *latest_bdev = NULL;
+	u64 latest_devid = 0;
+	u64 latest_transid = 0;
+
 	mutex_lock(&uuid_mutex);
 again:
 	/* This is the initialized path, it is safe to release the devices. */
 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
-		if (device->in_fs_metadata)
+		if (device->in_fs_metadata) {
+			if (!latest_transid ||
+			    device->generation > latest_transid) {
+				latest_devid = device->devid;
+				latest_transid = device->generation;
+				latest_bdev = device->bdev;
+			}
 			continue;
+		}
 
 		if (device->bdev) {
 			blkdev_put(device->bdev, device->mode);
@@ -487,6 +498,10 @@ again:
 		goto again;
 	}
 
+	fs_devices->latest_bdev = latest_bdev;
+	fs_devices->latest_devid = latest_devid;
+	fs_devices->latest_trans = latest_transid;
+
 	mutex_unlock(&uuid_mutex);
 	return 0;
 }
@@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
 	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
 	read_unlock(&em_tree->lock);
 
-	BUG_ON(em->start > chunk_offset ||
+	BUG_ON(!em || em->start > chunk_offset ||
 	       em->start + em->len < chunk_offset);
 	map = (struct map_lookup *)em->bdev;
 
@@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root)
 		return -ENOMEM;
 	btrfs_set_buffer_uptodate(sb);
 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
+	/*
+	 * The sb extent buffer is artifical and just used to read the system array.
+	 * btrfs_set_buffer_uptodate() call does not properly mark all it's
+	 * pages up-to-date when the page is larger: extent does not cover the
+	 * whole page and consequently check_page_uptodate does not find all
+	 * the page's extents up-to-date (the hole beyond sb),
+	 * write_extent_buffer then triggers a WARN_ON.
+	 *
+	 * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
+	 * but sb spans only this function. Add an explicit SetPageUptodate call
+	 * to silence the warning eg. on PowerPC 64.
+	 */
+	if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+		SetPageUptodate(sb->first_page);
 
 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
 	array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index faccd47c6c4..92c20654cc5 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -370,9 +370,9 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 			    PAGE_CACHE_SIZE - buf_offset);
 		bytes = min(bytes, bytes_left);
 
-		kaddr = kmap_atomic(dest_page, KM_USER0);
+		kaddr = kmap_atomic(dest_page);
 		memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 
 		pg_offset += bytes;
 		bytes_left -= bytes;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index a0358c2189c..7f0771d3894 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -646,7 +646,8 @@ lookup_again:
 		 * (this is used to keep track of culling, and atimes are only
 		 * updated by read, write and readdir but not lookup or
 		 * open) */
-		touch_atime(cache->mnt, next);
+		path.dentry = next;
+		touch_atime(&path);
 	}
 
 	/* open a file interface onto a data file */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 00de2c9568c..256f8522192 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -655,9 +655,8 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 		dout("open_root_inode success\n");
 		if (ceph_ino(inode) == CEPH_INO_ROOT &&
 		    fsc->sb->s_root == NULL) {
-			root = d_alloc_root(inode);
+			root = d_make_root(inode);
 			if (!root) {
-				iput(inode);
 				root = ERR_PTR(-ENOMEM);
 				goto out;
 			}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c1b25448738..3cc1b251ca0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -556,6 +556,7 @@ init_cifs_idmap(void)
 
 	/* instruct request_key() to use this special keyring as a cache for
 	 * the results it looks up */
+	set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
 	cred->thread_keyring = keyring;
 	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 	root_cred = cred;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b1fd382d195..418fc42fb8b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -119,12 +119,10 @@ cifs_read_super(struct super_block *sb)
 
 	if (IS_ERR(inode)) {
 		rc = PTR_ERR(inode);
-		inode = NULL;
 		goto out_no_root;
 	}
 
-	sb->s_root = d_alloc_root(inode);
-
+	sb->s_root = d_make_root(inode);
 	if (!sb->s_root) {
 		rc = -ENOMEM;
 		goto out_no_root;
@@ -147,9 +145,6 @@ cifs_read_super(struct super_block *sb)
 
 out_no_root:
 	cERROR(1, "cifs_read_super: get root inode failed");
-	if (inode)
-		iput(inode);
-
 	return rc;
 }
 
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 63a196b97d5..bc7e24420ac 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 			 * If either that or op not supported returned, follow
 			 * the normal lookup.
 			 */
-			if ((rc == 0) || (rc == -ENOENT))
+			switch (rc) {
+			case 0:
+				/*
+				 * The server may allow us to open things like
+				 * FIFOs, but the client isn't set up to deal
+				 * with that. If it's not a regular file, just
+				 * close it and proceed as if it were a normal
+				 * lookup.
+				 */
+				if (newInode && !S_ISREG(newInode->i_mode)) {
+					CIFSSMBClose(xid, pTcon, fileHandle);
+					break;
+				}
+			case -ENOENT:
 				posix_open = true;
-			else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+			case -EOPNOTSUPP:
+				break;
+			default:
 				pTcon->broken_posix_open = true;
+			}
 		}
 		if (!posix_open)
 			rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4dd9283885e..5e64748a291 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -920,16 +920,26 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	for (lockp = &inode->i_flock; *lockp != NULL; \
 	     lockp = &(*lockp)->fl_next)
 
+struct lock_to_push {
+	struct list_head llist;
+	__u64 offset;
+	__u64 length;
+	__u32 pid;
+	__u16 netfid;
+	__u8 type;
+};
+
 static int
 cifs_push_posix_locks(struct cifsFileInfo *cfile)
 {
 	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	struct file_lock *flock, **before;
-	struct cifsLockInfo *lck, *tmp;
+	unsigned int count = 0, i = 0;
 	int rc = 0, xid, type;
+	struct list_head locks_to_send, *el;
+	struct lock_to_push *lck, *tmp;
 	__u64 length;
-	struct list_head locks_to_send;
 
 	xid = GetXid();
 
@@ -940,29 +950,55 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 		return rc;
 	}
 
+	lock_flocks();
+	cifs_for_each_lock(cfile->dentry->d_inode, before) {
+		if ((*before)->fl_flags & FL_POSIX)
+			count++;
+	}
+	unlock_flocks();
+
 	INIT_LIST_HEAD(&locks_to_send);
 
+	/*
+	 * Allocating count locks is enough because no locks can be added to
+	 * the list while we are holding cinode->lock_mutex that protects
+	 * locking operations of this inode.
+	 */
+	for (; i < count; i++) {
+		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
+		if (!lck) {
+			rc = -ENOMEM;
+			goto err_out;
+		}
+		list_add_tail(&lck->llist, &locks_to_send);
+	}
+
+	i = 0;
+	el = locks_to_send.next;
 	lock_flocks();
 	cifs_for_each_lock(cfile->dentry->d_inode, before) {
+		if (el == &locks_to_send) {
+			/* something is really wrong */
+			cERROR(1, "Can't push all brlocks!");
+			break;
+		}
 		flock = *before;
+		if ((flock->fl_flags & FL_POSIX) == 0)
+			continue;
 		length = 1 + flock->fl_end - flock->fl_start;
 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
 			type = CIFS_RDLCK;
 		else
 			type = CIFS_WRLCK;
-
-		lck = cifs_lock_init(flock->fl_start, length, type,
-				     cfile->netfid);
-		if (!lck) {
-			rc = -ENOMEM;
-			goto send_locks;
-		}
+		lck = list_entry(el, struct lock_to_push, llist);
 		lck->pid = flock->fl_pid;
-
-		list_add_tail(&lck->llist, &locks_to_send);
+		lck->netfid = cfile->netfid;
+		lck->length = length;
+		lck->type = type;
+		lck->offset = flock->fl_start;
+		i++;
+		el = el->next;
 	}
-
-send_locks:
 	unlock_flocks();
 
 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
@@ -979,11 +1015,18 @@ send_locks:
 		kfree(lck);
 	}
 
+out:
 	cinode->can_cache_brlcks = false;
 	mutex_unlock(&cinode->lock_mutex);
 
 	FreeXid(xid);
 	return rc;
+err_out:
+	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
+		list_del(&lck->llist);
+		kfree(lck);
+	}
+	goto out;
 }
 
 static int
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a5f54b7d982..745da3d0653 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
 		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
 		fattr->cf_dtype = DT_DIR;
+		/*
+		 * Server can return wrong NumberOfLinks value for directories
+		 * when Unix extensions are disabled - fake it.
+		 */
+		fattr->cf_nlink = 2;
 	} else {
 		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
 		fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 		/* clear write bits if ATTR_READONLY is set */
 		if (fattr->cf_cifsattrs & ATTR_READONLY)
 			fattr->cf_mode &= ~(S_IWUGO);
-	}
 
-	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+		fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+	}
 
 	fattr->cf_uid = cifs_sb->mnt_uid;
 	fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
 			}
 /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
 	to set uid/gid */
-			inc_nlink(inode);
 
 			cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
 			cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
 		d_drop(direntry);
 	} else {
 mkdir_get_info:
-		inc_nlink(inode);
 		if (pTcon->unix_ext)
 			rc = cifs_get_inode_info_unix(&newinode, full_path,
 						      inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
 		}
 	}
 mkdir_out:
+	/*
+	 * Force revalidate to get parent dir info when needed since cached
+	 * attributes are invalid now.
+	 */
+	CIFS_I(inode)->time = 0;
 	kfree(full_path);
 	FreeXid(xid);
 	cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	cifs_put_tlink(tlink);
 
 	if (!rc) {
-		drop_nlink(inode);
 		spin_lock(&direntry->d_inode->i_lock);
 		i_size_write(direntry->d_inode, 0);
 		clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	}
 
 	cifsInode = CIFS_I(direntry->d_inode);
-	cifsInode->time = 0;	/* force revalidate to go get info when
-				   needed */
+	/* force revalidate to go get info when needed */
+	cifsInode->time = 0;
 
 	cifsInode = CIFS_I(inode);
-	cifsInode->time = 0;	/* force revalidate to get parent dir info
-				   since cached search results now invalid */
+	/*
+	 * Force revalidate to get parent dir info when needed since cached
+	 * attributes are invalid now.
+	 */
+	cifsInode->time = 0;
 
 	direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
 		current_fs_time(inode->i_sb);
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 45f07c46f3e..10d92cf57ab 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -105,7 +105,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 	struct cifs_tcon *pTcon;
 	struct super_block *sb;
 	char *full_path;
-	struct cifs_ntsd *pacl;
 
 	if (direntry == NULL)
 		return -EIO;
@@ -164,23 +163,24 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
 	} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
 			strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+#ifdef CONFIG_CIFS_ACL
+		struct cifs_ntsd *pacl;
 		pacl = kmalloc(value_size, GFP_KERNEL);
 		if (!pacl) {
 			cFYI(1, "%s: Can't allocate memory for ACL",
 					__func__);
 			rc = -ENOMEM;
 		} else {
-#ifdef CONFIG_CIFS_ACL
 			memcpy(pacl, ea_value, value_size);
 			rc = set_cifs_acl(pacl, value_size,
 				direntry->d_inode, full_path, CIFS_ACL_DACL);
 			if (rc == 0) /* force revalidate of the inode */
 				CIFS_I(direntry->d_inode)->time = 0;
 			kfree(pacl);
+		}
 #else
 			cFYI(1, "Set CIFS ACL not supported yet");
 #endif /* CONFIG_CIFS_ACL */
-		}
 	} else {
 		int temp;
 		temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5e2e1b3f068..05156c17b55 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -208,13 +208,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
         if (IS_ERR(root)) {
 		error = PTR_ERR(root);
 		printk("Failure of coda_cnode_make for root: error %d\n", error);
-		root = NULL;
 		goto error;
 	} 
 
 	printk("coda_read_super: rootinode is %ld dev %s\n", 
 	       root->i_ino, root->i_sb->s_id);
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		error = -EINVAL;
 		goto error;
@@ -222,9 +221,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 
 error:
-	if (root)
-		iput(root);
-
 	mutex_lock(&vc->vc_mutex);
 	bdi_destroy(&vc->bdi);
 	vc->vc_sb = NULL;
diff --git a/fs/compat.c b/fs/compat.c
index fa9d721ecfe..07880bae28a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim
 
 static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 {
-	compat_ino_t ino = stat->ino;
-	typeof(ubuf->st_uid) uid = 0;
-	typeof(ubuf->st_gid) gid = 0;
-	int err;
+	struct compat_stat tmp;
 
-	SET_UID(uid, stat->uid);
-	SET_GID(gid, stat->gid);
+	if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+		return -EOVERFLOW;
 
-	if ((u64) stat->size > MAX_NON_LFS ||
-	    !old_valid_dev(stat->dev) ||
-	    !old_valid_dev(stat->rdev))
+	memset(&tmp, 0, sizeof(tmp));
+	tmp.st_dev = old_encode_dev(stat->dev);
+	tmp.st_ino = stat->ino;
+	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
 		return -EOVERFLOW;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
+	tmp.st_mode = stat->mode;
+	tmp.st_nlink = stat->nlink;
+	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-
-	if (clear_user(ubuf, sizeof(*ubuf)))
-		return -EFAULT;
-
-	err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
-	err |= __put_user(ino, &ubuf->st_ino);
-	err |= __put_user(stat->mode, &ubuf->st_mode);
-	err |= __put_user(stat->nlink, &ubuf->st_nlink);
-	err |= __put_user(uid, &ubuf->st_uid);
-	err |= __put_user(gid, &ubuf->st_gid);
-	err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
-	err |= __put_user(stat->size, &ubuf->st_size);
-	err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
-	err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
-	err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
-	err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
-	err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
-	err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
-	err |= __put_user(stat->blksize, &ubuf->st_blksize);
-	err |= __put_user(stat->blocks, &ubuf->st_blocks);
-	return err;
+	SET_UID(tmp.st_uid, stat->uid);
+	SET_GID(tmp.st_gid, stat->gid);
+	tmp.st_rdev = old_encode_dev(stat->rdev);
+	if ((u64) stat->size > MAX_NON_LFS)
+		return -EOVERFLOW;
+	tmp.st_size = stat->size;
+	tmp.st_atime = stat->atime.tv_sec;
+	tmp.st_atime_nsec = stat->atime.tv_nsec;
+	tmp.st_mtime = stat->mtime.tv_sec;
+	tmp.st_mtime_nsec = stat->mtime.tv_nsec;
+	tmp.st_ctime = stat->ctime.tv_sec;
+	tmp.st_ctime_nsec = stat->ctime.tv_nsec;
+	tmp.st_blocks = stat->blocks;
+	tmp.st_blksize = stat->blksize;
+	return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
 asmlinkage long compat_sys_newstat(const char __user * filename,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a26bea10e81..10d8cd90ca6 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -34,7 +34,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/auto_fs.h>
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index ede857d20a0..b5f0a3b91f1 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -58,12 +58,11 @@ struct configfs_dirent {
 extern struct mutex configfs_symlink_mutex;
 extern spinlock_t configfs_dirent_lock;
 
-extern struct vfsmount * configfs_mount;
 extern struct kmem_cache *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
-extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *);
+extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *);
 extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
 extern int configfs_inode_init(void);
 extern void configfs_inode_exit(void);
@@ -80,15 +79,15 @@ extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
 extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
-extern int configfs_pin_fs(void);
+extern struct dentry *configfs_pin_fs(void);
 extern void configfs_release_fs(void);
 
 extern struct rw_semaphore configfs_rename_sem;
-extern struct super_block * configfs_sb;
 extern const struct file_operations configfs_dir_operations;
 extern const struct file_operations configfs_file_operations;
 extern const struct file_operations bin_fops;
 extern const struct inode_operations configfs_dir_inode_operations;
+extern const struct inode_operations configfs_root_inode_operations;
 extern const struct inode_operations configfs_symlink_inode_operations;
 extern const struct dentry_operations configfs_dentry_ops;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5ddd7ebd9dc..7e6c52d8a20 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -264,11 +264,13 @@ static int init_symlink(struct inode * inode)
 	return 0;
 }
 
-static int create_dir(struct config_item * k, struct dentry * p,
-		      struct dentry * d)
+static int create_dir(struct config_item *k, struct dentry *d)
 {
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+	struct dentry *p = d->d_parent;
+
+	BUG_ON(!k);
 
 	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
 	if (!error)
@@ -304,19 +306,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
 
 static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
 {
-	struct dentry * parent;
-	int error = 0;
-
-	BUG_ON(!item);
-
-	if (item->ci_parent)
-		parent = item->ci_parent->ci_dentry;
-	else if (configfs_mount)
-		parent = configfs_mount->mnt_root;
-	else
-		return -EFAULT;
-
-	error = create_dir(item,parent,dentry);
+	int error = create_dir(item, dentry);
 	if (!error)
 		item->ci_dentry = dentry;
 	return error;
@@ -1079,23 +1069,24 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
 	int ret;
 	struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
 	struct config_item *s_item = &subsys->su_group.cg_item;
+	struct dentry *root;
 
 	/*
 	 * Pin the configfs filesystem.  This means we can safely access
 	 * the root of the configfs filesystem.
 	 */
-	ret = configfs_pin_fs();
-	if (ret)
-		return ret;
+	root = configfs_pin_fs();
+	if (IS_ERR(root))
+		return PTR_ERR(root);
 
 	/*
 	 * Next, lock the root directory.  We're going to check that the
 	 * subsystem is really registered, and so we need to lock out
 	 * configfs_[un]register_subsystem().
 	 */
-	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_lock(&root->d_inode->i_mutex);
 
-	root_sd = configfs_sb->s_root->d_fsdata;
+	root_sd = root->d_fsdata;
 
 	list_for_each_entry(p, &root_sd->s_children, s_sibling) {
 		if (p->s_type & CONFIGFS_DIR) {
@@ -1129,7 +1120,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
 out_unlock_dirent_lock:
 	spin_unlock(&configfs_dirent_lock);
 out_unlock_fs:
-	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 
 	/*
 	 * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1183,11 +1174,6 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 	struct module *subsys_owner = NULL, *new_item_owner = NULL;
 	char *name;
 
-	if (dentry->d_parent == configfs_sb->s_root) {
-		ret = -EPERM;
-		goto out;
-	}
-
 	sd = dentry->d_parent->d_fsdata;
 
 	/*
@@ -1359,9 +1345,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct module *subsys_owner = NULL, *dead_item_owner = NULL;
 	int ret;
 
-	if (dentry->d_parent == configfs_sb->s_root)
-		return -EPERM;
-
 	sd = dentry->d_fsdata;
 	if (sd->s_type & CONFIGFS_USET_DEFAULT)
 		return -EPERM;
@@ -1459,6 +1442,11 @@ const struct inode_operations configfs_dir_inode_operations = {
 	.setattr	= configfs_setattr,
 };
 
+const struct inode_operations configfs_root_inode_operations = {
+	.lookup		= configfs_lookup,
+	.setattr	= configfs_setattr,
+};
+
 #if 0
 int configfs_rename_dir(struct config_item * item, const char *new_name)
 {
@@ -1546,6 +1534,7 @@ static inline unsigned char dt_type(struct configfs_dirent *sd)
 static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	struct dentry *dentry = filp->f_path.dentry;
+	struct super_block *sb = dentry->d_sb;
 	struct configfs_dirent * parent_sd = dentry->d_fsdata;
 	struct configfs_dirent *cursor = filp->private_data;
 	struct list_head *p, *q = &cursor->s_sibling;
@@ -1608,7 +1597,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 					ino = inode->i_ino;
 				spin_unlock(&configfs_dirent_lock);
 				if (!inode)
-					ino = iunique(configfs_sb, 2);
+					ino = iunique(sb, 2);
 
 				if (filldir(dirent, name, len, filp->f_pos, ino,
 						 dt_type(next)) < 0)
@@ -1680,27 +1669,27 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	struct config_group *group = &subsys->su_group;
 	struct qstr name;
 	struct dentry *dentry;
+	struct dentry *root;
 	struct configfs_dirent *sd;
 
-	err = configfs_pin_fs();
-	if (err)
-		return err;
+	root = configfs_pin_fs();
+	if (IS_ERR(root))
+		return PTR_ERR(root);
 
 	if (!group->cg_item.ci_name)
 		group->cg_item.ci_name = group->cg_item.ci_namebuf;
 
-	sd = configfs_sb->s_root->d_fsdata;
+	sd = root->d_fsdata;
 	link_group(to_config_group(sd->s_element), group);
 
-	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
-			I_MUTEX_PARENT);
+	mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
 
 	name.name = group->cg_item.ci_name;
 	name.len = strlen(name.name);
 	name.hash = full_name_hash(name.name, name.len);
 
 	err = -ENOMEM;
-	dentry = d_alloc(configfs_sb->s_root, &name);
+	dentry = d_alloc(root, &name);
 	if (dentry) {
 		d_add(dentry, NULL);
 
@@ -1717,7 +1706,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 		}
 	}
 
-	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 
 	if (err) {
 		unlink_group(group);
@@ -1731,13 +1720,14 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 {
 	struct config_group *group = &subsys->su_group;
 	struct dentry *dentry = group->cg_item.ci_dentry;
+	struct dentry *root = dentry->d_sb->s_root;
 
-	if (dentry->d_parent != configfs_sb->s_root) {
+	if (dentry->d_parent != root) {
 		printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
 		return;
 	}
 
-	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
+	mutex_lock_nested(&root->d_inode->i_mutex,
 			  I_MUTEX_PARENT);
 	mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
 	mutex_lock(&configfs_symlink_mutex);
@@ -1754,7 +1744,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 
 	d_delete(dentry);
 
-	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_unlock(&root->d_inode->i_mutex);
 
 	dput(dentry);
 
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 3ee36d41886..0074362d9f7 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -44,8 +44,6 @@
 static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
 #endif
 
-extern struct super_block * configfs_sb;
-
 static const struct address_space_operations configfs_aops = {
 	.readpage	= simple_readpage,
 	.write_begin	= simple_write_begin,
@@ -132,9 +130,10 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 	inode->i_ctime = iattr->ia_ctime;
 }
 
-struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd)
+struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
+				 struct super_block *s)
 {
-	struct inode * inode = new_inode(configfs_sb);
+	struct inode * inode = new_inode(s);
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode->i_mapping->a_ops = &configfs_aops;
@@ -188,36 +187,35 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
 int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
 {
 	int error = 0;
-	struct inode * inode = NULL;
-	if (dentry) {
-		if (!dentry->d_inode) {
-			struct configfs_dirent *sd = dentry->d_fsdata;
-			if ((inode = configfs_new_inode(mode, sd))) {
-				if (dentry->d_parent && dentry->d_parent->d_inode) {
-					struct inode *p_inode = dentry->d_parent->d_inode;
-					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
-				}
-				configfs_set_inode_lock_class(sd, inode);
-				goto Proceed;
-			}
-			else
-				error = -ENOMEM;
-		} else
-			error = -EEXIST;
-	} else
-		error = -ENOENT;
-	goto Done;
+	struct inode *inode = NULL;
+	struct configfs_dirent *sd;
+	struct inode *p_inode;
+
+	if (!dentry)
+		return -ENOENT;
+
+	if (dentry->d_inode)
+		return -EEXIST;
 
- Proceed:
-	if (init)
+	sd = dentry->d_fsdata;
+	inode = configfs_new_inode(mode, sd, dentry->d_sb);
+	if (!inode)
+		return -ENOMEM;
+
+	p_inode = dentry->d_parent->d_inode;
+	p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+	configfs_set_inode_lock_class(sd, inode);
+
+	if (init) {
 		error = init(inode);
-	if (!error) {
-		d_instantiate(dentry, inode);
-		if (S_ISDIR(mode) || S_ISLNK(mode))
-			dget(dentry);  /* pin link and directory dentries in core */
-	} else
-		iput(inode);
- Done:
+		if (error) {
+			iput(inode);
+			return error;
+		}
+	}
+	d_instantiate(dentry, inode);
+	if (S_ISDIR(mode) || S_ISLNK(mode))
+		dget(dentry);  /* pin link and directory dentries in core */
 	return error;
 }
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 276e15cafd5..aee0a7ebbd8 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -37,8 +37,7 @@
 /* Random magic number */
 #define CONFIGFS_MAGIC 0x62656570
 
-struct vfsmount * configfs_mount = NULL;
-struct super_block * configfs_sb = NULL;
+static struct vfsmount *configfs_mount = NULL;
 struct kmem_cache *configfs_dir_cachep;
 static int configfs_mnt_count = 0;
 
@@ -77,12 +76,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = CONFIGFS_MAGIC;
 	sb->s_op = &configfs_ops;
 	sb->s_time_gran = 1;
-	configfs_sb = sb;
 
 	inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
-				   &configfs_root);
+				   &configfs_root, sb);
 	if (inode) {
-		inode->i_op = &configfs_dir_inode_operations;
+		inode->i_op = &configfs_root_inode_operations;
 		inode->i_fop = &configfs_dir_operations;
 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
 		inc_nlink(inode);
@@ -91,10 +89,9 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 	}
 
-	root = d_alloc_root(inode);
+	root = d_make_root(inode);
 	if (!root) {
 		pr_debug("%s: could not get root dentry!\n",__func__);
-		iput(inode);
 		return -ENOMEM;
 	}
 	config_group_init(&configfs_root_group);
@@ -118,10 +115,11 @@ static struct file_system_type configfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-int configfs_pin_fs(void)
+struct dentry *configfs_pin_fs(void)
 {
-	return simple_pin_fs(&configfs_fs_type, &configfs_mount,
+	int err = simple_pin_fs(&configfs_fs_type, &configfs_mount,
 			     &configfs_mnt_count);
+	return err ? ERR_PTR(err) : configfs_mount->mnt_root;
 }
 
 void configfs_release_fs(void)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0f3eb41d920..cc9f2546ea4 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -110,13 +110,13 @@ out:
 
 
 static int get_target(const char *symname, struct path *path,
-		      struct config_item **target)
+		      struct config_item **target, struct super_block *sb)
 {
 	int ret;
 
 	ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
 	if (!ret) {
-		if (path->dentry->d_sb == configfs_sb) {
+		if (path->dentry->d_sb == sb) {
 			*target = configfs_get_config_item(path->dentry);
 			if (!*target) {
 				ret = -ENOENT;
@@ -141,10 +141,6 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 	struct config_item *target_item = NULL;
 	struct config_item_type *type;
 
-	ret = -EPERM;  /* What lack-of-symlink returns */
-	if (dentry->d_parent == configfs_sb->s_root)
-		goto out;
-
 	sd = dentry->d_parent->d_fsdata;
 	/*
 	 * Fake invisibility if dir belongs to a group/default groups hierarchy
@@ -162,7 +158,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 	    !type->ct_item_ops->allow_link)
 		goto out_put;
 
-	ret = get_target(symname, &path, &target_item);
+	ret = get_target(symname, &path, &target_item, dentry->d_sb);
 	if (ret)
 		goto out_put;
 
@@ -198,8 +194,6 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
 	if (!(sd->s_type & CONFIGFS_ITEM_LINK))
 		goto out;
 
-	BUG_ON(dentry->d_parent == configfs_sb->s_root);
-
 	sl = sd->s_element;
 
 	parent_item = configfs_get_config_item(dentry->d_parent);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a2ee8f9f5a3..d013c46402e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* Do sanity checks on the superblock */
 	if (super.magic != CRAMFS_MAGIC) {
-		/* check for wrong endianess */
+		/* check for wrong endianness */
 		if (super.magic == CRAMFS_MAGIC_WEND) {
 			if (!silent)
-				printk(KERN_ERR "cramfs: wrong endianess\n");
+				printk(KERN_ERR "cramfs: wrong endianness\n");
 			goto out;
 		}
 
@@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 		mutex_unlock(&read_mutex);
 		if (super.magic != CRAMFS_MAGIC) {
 			if (super.magic == CRAMFS_MAGIC_WEND && !silent)
-				printk(KERN_ERR "cramfs: wrong endianess\n");
+				printk(KERN_ERR "cramfs: wrong endianness\n");
 			else if (!silent)
 				printk(KERN_ERR "cramfs: wrong magic\n");
 			goto out;
@@ -318,11 +318,9 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 	root = get_cramfs_inode(sb, &super.root, 0);
 	if (IS_ERR(root))
 		goto out;
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
-		iput(root);
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root)
 		goto out;
-	}
 	return 0;
 out:
 	kfree(sbi);
diff --git a/fs/dcache.c b/fs/dcache.c
index 16a53cc2cc0..e441941c834 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,11 +104,11 @@ static unsigned int d_hash_shift __read_mostly;
 
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
-static inline struct hlist_bl_head *d_hash(struct dentry *parent,
-					unsigned long hash)
+static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
+					unsigned int hash)
 {
-	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
-	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+	hash += (unsigned long) parent / L1_CACHE_BYTES;
+	hash = hash + (hash >> D_HASHBITS);
 	return dentry_hashtable + (hash & D_HASHMASK);
 }
 
@@ -137,6 +137,49 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 }
 #endif
 
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+				const unsigned char *ct, size_t tcount)
+{
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+	unsigned long a,b,mask;
+
+	if (unlikely(scount != tcount))
+		return 1;
+
+	for (;;) {
+		a = *(unsigned long *)cs;
+		b = *(unsigned long *)ct;
+		if (tcount < sizeof(unsigned long))
+			break;
+		if (unlikely(a != b))
+			return 1;
+		cs += sizeof(unsigned long);
+		ct += sizeof(unsigned long);
+		tcount -= sizeof(unsigned long);
+		if (!tcount)
+			return 0;
+	}
+	mask = ~(~0ul << tcount*8);
+	return unlikely(!!((a ^ b) & mask));
+#else
+	if (scount != tcount)
+		return 1;
+
+	do {
+		if (*cs != *ct)
+			return 1;
+		cs++;
+		ct++;
+		tcount--;
+	} while (tcount);
+	return 0;
+#endif
+}
+
 static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1423,30 +1466,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 
 EXPORT_SYMBOL(d_instantiate_unique);
 
-/**
- * d_alloc_root - allocate root dentry
- * @root_inode: inode to allocate the root for
- *
- * Allocate a root ("/") dentry for the inode given. The inode is
- * instantiated and returned. %NULL is returned if there is insufficient
- * memory or the inode passed is %NULL.
- */
- 
-struct dentry * d_alloc_root(struct inode * root_inode)
-{
-	struct dentry *res = NULL;
-
-	if (root_inode) {
-		static const struct qstr name = { .name = "/", .len = 1 };
-
-		res = __d_alloc(root_inode->i_sb, &name);
-		if (res)
-			d_instantiate(res, root_inode);
-	}
-	return res;
-}
-EXPORT_SYMBOL(d_alloc_root);
-
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
@@ -1717,8 +1736,9 @@ EXPORT_SYMBOL(d_add_ci);
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
  */
-struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
-				unsigned *seq, struct inode **inode)
+struct dentry *__d_lookup_rcu(const struct dentry *parent,
+				const struct qstr *name,
+				unsigned *seqp, struct inode **inode)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -1748,6 +1768,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 	 * See Documentation/filesystems/path-lookup.txt for more details.
 	 */
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+		unsigned seq;
 		struct inode *i;
 		const char *tname;
 		int tlen;
@@ -1756,7 +1777,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
 			continue;
 
 seqretry:
-		*seq = read_seqcount_begin(&dentry->d_seq);
+		seq = read_seqcount_begin(&dentry->d_seq);
 		if (dentry->d_parent != parent)
 			continue;
 		if (d_unhashed(dentry))
@@ -1771,7 +1792,7 @@ seqretry:
 		 * edge of memory when walking. If we could load this
 		 * atomically some other way, we could drop this check.
 		 */
-		if (read_seqcount_retry(&dentry->d_seq, *seq))
+		if (read_seqcount_retry(&dentry->d_seq, seq))
 			goto seqretry;
 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 			if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1809,7 @@ seqretry:
 		 * order to do anything useful with the returned dentry
 		 * anyway.
 		 */
+		*seqp = seq;
 		*inode = i;
 		return dentry;
 	}
@@ -2968,7 +2990,7 @@ __setup("dhash_entries=", set_dhash_entries);
 
 static void __init dcache_init_early(void)
 {
-	int loop;
+	unsigned int loop;
 
 	/* If hashes are distributed across NUMA nodes, defer
 	 * hash allocation until vmalloc space is available.
@@ -2986,13 +3008,13 @@ static void __init dcache_init_early(void)
 					&d_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << d_hash_shift); loop++)
+	for (loop = 0; loop < (1U << d_hash_shift); loop++)
 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
 
 static void __init dcache_init(void)
 {
-	int loop;
+	unsigned int loop;
 
 	/* 
 	 * A constructor could be added for stable state like the lists,
@@ -3016,7 +3038,7 @@ static void __init dcache_init(void)
 					&d_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << d_hash_shift); loop++)
+	for (loop = 0; loop < (1U << d_hash_shift); loop++)
 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ef023eef046..21e93605161 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -611,7 +611,7 @@ static const struct file_operations fops_regset32 = {
  * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
-struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
+struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
 				       struct dentry *parent,
 				       struct debugfs_regset32 *regset)
 {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 956d5ddddf6..b80bc846a15 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,9 +23,13 @@
 #include <linux/debugfs.h>
 #include <linux/fsnotify.h>
 #include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
 
+#define DEBUGFS_DEFAULT_MODE	0755
+
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
@@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
+struct debugfs_mount_opts {
+	uid_t uid;
+	gid_t gid;
+	umode_t mode;
+};
+
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_mode,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_mode, "mode=%o"},
+	{Opt_err, NULL}
+};
+
+struct debugfs_fs_info {
+	struct debugfs_mount_opts mount_opts;
+};
+
+static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
+{
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int token;
+	char *p;
+
+	opts->mode = DEBUGFS_DEFAULT_MODE;
+
+	while ((p = strsep(&data, ",")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			opts->uid = option;
+			break;
+		case Opt_gid:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->gid = option;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->mode = option & S_IALLUGO;
+			break;
+		/*
+		 * We might like to report bad mount options here;
+		 * but traditionally debugfs has ignored all mount options
+		 */
+		}
+	}
+
+	return 0;
+}
+
+static int debugfs_apply_options(struct super_block *sb)
+{
+	struct debugfs_fs_info *fsi = sb->s_fs_info;
+	struct inode *inode = sb->s_root->d_inode;
+	struct debugfs_mount_opts *opts = &fsi->mount_opts;
+
+	inode->i_mode &= ~S_IALLUGO;
+	inode->i_mode |= opts->mode;
+
+	inode->i_uid = opts->uid;
+	inode->i_gid = opts->gid;
+
+	return 0;
+}
+
+static int debugfs_remount(struct super_block *sb, int *flags, char *data)
+{
+	int err;
+	struct debugfs_fs_info *fsi = sb->s_fs_info;
+
+	err = debugfs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	debugfs_apply_options(sb);
+
+fail:
+	return err;
+}
+
+static int debugfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
+	struct debugfs_mount_opts *opts = &fsi->mount_opts;
+
+	if (opts->uid != 0)
+		seq_printf(m, ",uid=%u", opts->uid);
+	if (opts->gid != 0)
+		seq_printf(m, ",gid=%u", opts->gid);
+	if (opts->mode != DEBUGFS_DEFAULT_MODE)
+		seq_printf(m, ",mode=%o", opts->mode);
+
+	return 0;
+}
+
+static const struct super_operations debugfs_super_operations = {
+	.statfs		= simple_statfs,
+	.remount_fs	= debugfs_remount,
+	.show_options	= debugfs_show_options,
+};
+
 static int debug_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr debug_files[] = {{""}};
+	struct debugfs_fs_info *fsi;
+	int err;
+
+	save_mount_options(sb, data);
+
+	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
+	sb->s_fs_info = fsi;
+	if (!fsi) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	err = debugfs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	err  =  simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
+	if (err)
+		goto fail;
+
+	sb->s_op = &debugfs_super_operations;
+
+	debugfs_apply_options(sb);
+
+	return 0;
 
-	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
+fail:
+	kfree(fsi);
+	sb->s_fs_info = NULL;
+	return err;
 }
 
 static struct dentry *debug_mount(struct file_system_type *fs_type,
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c4e2a58a2e8..10f5e0b484d 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -36,7 +36,61 @@
 #define DEVPTS_DEFAULT_PTMX_MODE 0000
 #define PTMX_MINOR	2
 
-extern int pty_limit;			/* Config limit on Unix98 ptys */
+/*
+ * sysctl support for setting limits on the number of Unix98 ptys allocated.
+ * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
+ */
+static int pty_limit = NR_UNIX98_PTY_DEFAULT;
+static int pty_reserve = NR_UNIX98_PTY_RESERVE;
+static int pty_limit_min;
+static int pty_limit_max = INT_MAX;
+static int pty_count;
+
+static struct ctl_table pty_table[] = {
+	{
+		.procname	= "max",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.data		= &pty_limit,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pty_limit_min,
+		.extra2		= &pty_limit_max,
+	}, {
+		.procname	= "reserve",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.data		= &pty_reserve,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pty_limit_min,
+		.extra2		= &pty_limit_max,
+	}, {
+		.procname	= "nr",
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.data		= &pty_count,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+
+static struct ctl_table pty_kern_table[] = {
+	{
+		.procname	= "pty",
+		.mode		= 0555,
+		.child		= pty_table,
+	},
+	{}
+};
+
+static struct ctl_table pty_root_table[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= pty_kern_table,
+	},
+	{}
+};
+
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
@@ -49,10 +103,11 @@ struct pts_mount_opts {
 	umode_t mode;
 	umode_t ptmxmode;
 	int newinstance;
+	int max;
 };
 
 enum {
-	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
+	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,  Opt_max,
 	Opt_err
 };
 
@@ -63,6 +118,7 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	{Opt_ptmxmode, "ptmxmode=%o"},
 	{Opt_newinstance, "newinstance"},
+	{Opt_max, "max=%d"},
 #endif
 	{Opt_err, NULL}
 };
@@ -109,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 	opts->gid     = 0;
 	opts->mode    = DEVPTS_DEFAULT_MODE;
 	opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+	opts->max     = NR_UNIX98_PTY_MAX;
 
 	/* newinstance makes sense only on initial mount */
 	if (op == PARSE_MOUNT)
@@ -152,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 			if (op == PARSE_MOUNT)
 				opts->newinstance = 1;
 			break;
+		case Opt_max:
+			if (match_int(&args[0], &option) ||
+			    option < 0 || option > NR_UNIX98_PTY_MAX)
+				return -EINVAL;
+			opts->max = option;
+			break;
 #endif
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
@@ -258,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 	seq_printf(seq, ",mode=%03o", opts->mode);
 #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
+	if (opts->max < NR_UNIX98_PTY_MAX)
+		seq_printf(seq, ",max=%d", opts->max);
 #endif
 
 	return 0;
@@ -309,12 +374,11 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	inode->i_fop = &simple_dir_operations;
 	set_nlink(inode, 2);
 
-	s->s_root = d_alloc_root(inode);
+	s->s_root = d_make_root(inode);
 	if (s->s_root)
 		return 0;
 
 	printk(KERN_ERR "devpts: get root dentry failed\n");
-	iput(inode);
 
 fail:
 	return -ENOMEM;
@@ -438,6 +502,12 @@ retry:
 		return -ENOMEM;
 
 	mutex_lock(&allocated_ptys_lock);
+	if (pty_count >= pty_limit -
+			(fsi->mount_opts.newinstance ? pty_reserve : 0)) {
+		mutex_unlock(&allocated_ptys_lock);
+		return -ENOSPC;
+	}
+
 	ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
 	if (ida_ret < 0) {
 		mutex_unlock(&allocated_ptys_lock);
@@ -446,11 +516,12 @@ retry:
 		return -EIO;
 	}
 
-	if (index >= pty_limit) {
+	if (index >= fsi->mount_opts.max) {
 		ida_remove(&fsi->allocated_ptys, index);
 		mutex_unlock(&allocated_ptys_lock);
-		return -EIO;
+		return -ENOSPC;
 	}
+	pty_count++;
 	mutex_unlock(&allocated_ptys_lock);
 	return index;
 }
@@ -462,6 +533,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
 
 	mutex_lock(&allocated_ptys_lock);
 	ida_remove(&fsi->allocated_ptys, idx);
+	pty_count--;
 	mutex_unlock(&allocated_ptys_lock);
 }
 
@@ -558,11 +630,15 @@ void devpts_pty_kill(struct tty_struct *tty)
 static int __init init_devpts_fs(void)
 {
 	int err = register_filesystem(&devpts_fs_type);
+	struct ctl_table_header *table;
+
 	if (!err) {
+		table = register_sysctl_table(pty_root_table);
 		devpts_mnt = kern_mount(&devpts_fs_type);
 		if (IS_ERR(devpts_mnt)) {
 			err = PTR_ERR(devpts_mnt);
 			unregister_filesystem(&devpts_fs_type);
+			unregister_sysctl_table(table);
 		}
 	}
 	return err;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4a588dbd11b..f4aadd15b61 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode)
 	if (atomic_read(&inode->i_dio_count))
 		__inode_dio_wait(inode);
 }
-EXPORT_SYMBOL_GPL(inode_dio_wait);
+EXPORT_SYMBOL(inode_dio_wait);
 
 /*
  * inode_dio_done - signal finish of a direct I/O requests
@@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode)
 	if (atomic_dec_and_test(&inode->i_dio_count))
 		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
 }
-EXPORT_SYMBOL_GPL(inode_dio_done);
+EXPORT_SYMBOL(inode_dio_done);
 
 /*
  * How many pages are in the queue?
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 83641574b01..dc5eb598b81 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -351,11 +351,28 @@ int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
 static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
 {
 	struct dlm_rsb *r;
+	uint32_t hash, bucket;
+	int rv;
+
+	hash = jhash(name, len, 0);
+	bucket = hash & (ls->ls_rsbtbl_size - 1);
+
+	spin_lock(&ls->ls_rsbtbl[bucket].lock);
+	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, 0, &r);
+	if (rv)
+		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
+					 name, len, 0, &r);
+	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+
+	if (!rv)
+		return r;
 
 	down_read(&ls->ls_root_sem);
 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
 		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
 			up_read(&ls->ls_root_sem);
+			log_error(ls, "find_rsb_root revert to root_list %s",
+				  r->res_name);
 			return r;
 		}
 	}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d47183043c5..fa5c07d51dc 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -411,8 +411,8 @@ static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
 	return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
 }
 
-static int search_rsb_tree(struct rb_root *tree, char *name, int len,
-			   unsigned int flags, struct dlm_rsb **r_ret)
+int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+			unsigned int flags, struct dlm_rsb **r_ret)
 {
 	struct rb_node *node = tree->rb_node;
 	struct dlm_rsb *r;
@@ -474,12 +474,12 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
 	struct dlm_rsb *r;
 	int error;
 
-	error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
+	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r);
 	if (!error) {
 		kref_get(&r->res_ref);
 		goto out;
 	}
-	error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
+	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r);
 	if (error)
 		goto out;
 
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 265017a7c3e..1a255307f6f 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -28,6 +28,9 @@ void dlm_scan_waiters(struct dlm_ls *ls);
 void dlm_scan_timeout(struct dlm_ls *ls);
 void dlm_adjust_timeouts(struct dlm_ls *ls);
 
+int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
+			unsigned int flags, struct dlm_rsb **r_ret);
+
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
 void dlm_grant_after_purge(struct dlm_ls *ls);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0b3109ee425..133ef6dc7cb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,6 +52,7 @@
 #include <linux/mutex.h>
 #include <linux/sctp.h>
 #include <linux/slab.h>
+#include <net/sctp/sctp.h>
 #include <net/sctp/user.h>
 #include <net/ipv6.h>
 
@@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con,
 			int prim_len, ret;
 			int addr_len;
 			struct connection *new_con;
-			sctp_peeloff_arg_t parg;
-			int parglen = sizeof(parg);
-			int err;
 
 			/*
 			 * We get this before any data for an association.
@@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con,
 				return;
 
 			/* Peel off a new sock */
-			parg.associd = sn->sn_assoc_change.sac_assoc_id;
-			ret = kernel_getsockopt(con->sock, IPPROTO_SCTP,
-						SCTP_SOCKOPT_PEELOFF,
-						(void *)&parg, &parglen);
+			sctp_lock_sock(con->sock->sk);
+			ret = sctp_do_peeloff(con->sock->sk,
+				sn->sn_assoc_change.sac_assoc_id,
+				&new_con->sock);
+			sctp_release_sock(con->sock->sk);
 			if (ret < 0) {
 				log_print("Can't peel off a socket for "
 					  "connection %d to node %d: err=%d",
-					  parg.associd, nodeid, ret);
-				return;
-			}
-			new_con->sock = sockfd_lookup(parg.sd, &err);
-			if (!new_con->sock) {
-				log_print("sockfd_lookup error %d", err);
+					  (int)sn->sn_assoc_change.sac_assoc_id,
+					  nodeid, ret);
 				return;
 			}
 			add_sock(new_con->sock, new_con);
-			sockfd_put(new_con->sock);
 
 			log_print("connecting to %d sctp association %d",
 				 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
@@ -1082,7 +1076,7 @@ static void init_local(void)
 	int i;
 
 	dlm_local_count = 0;
-	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
+	for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
 		if (dlm_our_addr(&sas, i))
 			break;
 
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d3f95f941c4..2b17f2f9b12 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -48,8 +48,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 				unsigned long nr_segs, loff_t pos)
 {
 	ssize_t rc;
-	struct dentry *lower_dentry;
-	struct vfsmount *lower_vfsmount;
+	struct path lower;
 	struct file *file = iocb->ki_filp;
 
 	rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
@@ -60,9 +59,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 	if (-EIOCBQUEUED == rc)
 		rc = wait_on_sync_kiocb(iocb);
 	if (rc >= 0) {
-		lower_dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
-		lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
-		touch_atime(lower_vfsmount, lower_dentry);
+		lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry);
+		lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry);
+		touch_atime(&lower);
 	}
 	return rc;
 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index b4a6befb121..68954937a07 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -550,9 +550,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	if (IS_ERR(inode))
 		goto out_free;
 
-	s->s_root = d_alloc_root(inode);
+	s->s_root = d_make_root(inode);
 	if (!s->s_root) {
-		iput(inode);
 		rc = -ENOMEM;
 		goto out_free;
 	}
@@ -795,15 +794,10 @@ static int __init ecryptfs_init(void)
 		       "Failed to allocate one or more kmem_cache objects\n");
 		goto out;
 	}
-	rc = register_filesystem(&ecryptfs_fs_type);
-	if (rc) {
-		printk(KERN_ERR "Failed to register filesystem\n");
-		goto out_free_kmem_caches;
-	}
 	rc = do_sysfs_registration();
 	if (rc) {
 		printk(KERN_ERR "sysfs registration failed\n");
-		goto out_unregister_filesystem;
+		goto out_free_kmem_caches;
 	}
 	rc = ecryptfs_init_kthread();
 	if (rc) {
@@ -824,19 +818,24 @@ static int __init ecryptfs_init(void)
 		       "rc = [%d]\n", rc);
 		goto out_release_messaging;
 	}
+	rc = register_filesystem(&ecryptfs_fs_type);
+	if (rc) {
+		printk(KERN_ERR "Failed to register filesystem\n");
+		goto out_destroy_crypto;
+	}
 	if (ecryptfs_verbosity > 0)
 		printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values "
 			"will be written to the syslog!\n", ecryptfs_verbosity);
 
 	goto out;
+out_destroy_crypto:
+	ecryptfs_destroy_crypto();
 out_release_messaging:
 	ecryptfs_release_messaging();
 out_destroy_kthread:
 	ecryptfs_destroy_kthread();
 out_do_sysfs_unregistration:
 	do_sysfs_unregistration();
-out_unregister_filesystem:
-	unregister_filesystem(&ecryptfs_fs_type);
 out_free_kmem_caches:
 	ecryptfs_free_kmem_caches();
 out:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 349209dc6a9..3a06f4043df 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
 		goto memdup;
 	} else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
 		printk(KERN_WARNING "%s: Acceptable packet size range is "
-		       "[%d-%lu], but amount of data written is [%zu].",
+		       "[%d-%zu], but amount of data written is [%zu].",
 		       __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
 		return -EINVAL;
 	}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index cf152823bbf..2dd946b636d 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -184,7 +184,6 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
 const struct super_operations ecryptfs_sops = {
 	.alloc_inode = ecryptfs_alloc_inode,
 	.destroy_inode = ecryptfs_destroy_inode,
-	.drop_inode = generic_drop_inode,
 	.statfs = ecryptfs_statfs,
 	.remount_fs = NULL,
 	.evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 981106429a9..e755ec746c6 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -317,10 +317,9 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
 		goto out_no_fs;
 	}
 
-	s->s_root = d_alloc_root(root);
+	s->s_root = d_make_root(root);
 	if (!(s->s_root)) {
 		printk(KERN_ERR "EFS: get root dentry failed\n");
-		iput(root);
 		ret = -ENOMEM;
 		goto out_no_fs;
 	}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf2..4d9d3a45e35 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
 	return !list_empty(p);
 }
 
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+	return container_of(p, struct eppoll_entry, wait);
+}
+
 /* Get the "struct epitem" from a wait queue pointer */
 static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
 {
@@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
 	put_cpu();
 }
 
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+	wait_queue_head_t *whead;
+
+	rcu_read_lock();
+	/* If it is cleared by POLLFREE, it should be rcu-safe */
+	whead = rcu_dereference(pwq->whead);
+	if (whead)
+		remove_wait_queue(whead, &pwq->wait);
+	rcu_read_unlock();
+}
+
 /*
  * This function unregisters poll callbacks from the associated file
  * descriptor.  Must be called with "mtx" held (or "epmutex" if called from
@@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 		pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
 
 		list_del(&pwq->llink);
-		remove_wait_queue(pwq->whead, &pwq->wait);
+		ep_remove_wait_queue(pwq);
 		kmem_cache_free(pwq_cache, pwq);
 	}
 }
@@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
+	if ((unsigned long)key & POLLFREE) {
+		ep_pwq_from_wait(wait)->whead = NULL;
+		/*
+		 * whead = NULL above can race with ep_remove_wait_queue()
+		 * which can do another remove_wait_queue() after us, so we
+		 * can't use __remove_wait_queue(). whead->lock is held by
+		 * the caller.
+		 */
+		list_del_init(&wait->task_list);
+	}
+
 	spin_lock_irqsave(&ep->lock, flags);
 
 	/*
@@ -960,6 +988,10 @@ static int path_count[PATH_ARR_SIZE];
 
 static int path_count_inc(int nests)
 {
+	/* Allow an arbitrary number of depth 1 paths */
+	if (nests == 0)
+		return 0;
+
 	if (++path_count[nests] > path_limits[nests])
 		return -1;
 	return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 92ce83a11e9..23559c227d9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
 #include <trace/events/task.h>
 #include "internal.h"
 
+#include <trace/events/sched.h>
+
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 unsigned int core_pipe_limit;
@@ -79,15 +81,13 @@ static atomic_t call_count = ATOMIC_INIT(1);
 static LIST_HEAD(formats);
 static DEFINE_RWLOCK(binfmt_lock);
 
-int __register_binfmt(struct linux_binfmt * fmt, int insert)
+void __register_binfmt(struct linux_binfmt * fmt, int insert)
 {
-	if (!fmt)
-		return -EINVAL;
+	BUG_ON(!fmt);
 	write_lock(&binfmt_lock);
 	insert ? list_add(&fmt->lh, &formats) :
 		 list_add_tail(&fmt->lh, &formats);
 	write_unlock(&binfmt_lock);
-	return 0;	
 }
 
 EXPORT_SYMBOL(__register_binfmt);
@@ -822,7 +822,7 @@ static int exec_mmap(struct mm_struct *mm)
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
 	old_mm = current->mm;
-	sync_mm_rss(tsk, old_mm);
+	sync_mm_rss(old_mm);
 	mm_release(tsk, old_mm);
 
 	if (old_mm) {
@@ -848,6 +848,7 @@ static int exec_mmap(struct mm_struct *mm)
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
 		BUG_ON(active_mm != old_mm);
+		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
 		mm_update_next_owner(old_mm);
 		mmput(old_mm);
 		return 0;
@@ -975,8 +976,8 @@ static int de_thread(struct task_struct *tsk)
 	sig->notify_count = 0;
 
 no_thread_group:
-	if (current->mm)
-		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+	/* we have changed execution domain */
+	tsk->exit_signal = SIGCHLD;
 
 	exit_itimers(sig);
 	flush_itimer_signals();
@@ -1112,7 +1113,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	bprm->mm = NULL;		/* We're using it now */
 
 	set_fs(USER_DS);
-	current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
+	current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
 	flush_thread();
 	current->personality &= ~bprm->per_clear;
 
@@ -1339,13 +1340,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
 			ret = -EFAULT;
 			goto out;
 		}
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 
 		for (; offset < PAGE_SIZE && kaddr[offset];
 				offset++, bprm->p++)
 			;
 
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		put_arg_page(page);
 
 		if (offset == PAGE_SIZE)
@@ -1402,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			 */
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
-				if (depth == 0)
-					ptrace_event(PTRACE_EVENT_EXEC,
-							old_pid);
+				if (depth == 0) {
+					trace_sched_process_exec(current, old_pid, bprm);
+					ptrace_event(PTRACE_EVENT_EXEC, old_pid);
+				}
 				put_binfmt(fmt);
 				allow_write_access(bprm->file);
 				if (bprm->file)
@@ -1915,7 +1917,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct completion *vfork_done;
 	int core_waiters = -EBUSY;
 
 	init_completion(&core_state->startup);
@@ -1927,22 +1928,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
-	if (unlikely(core_waiters < 0))
-		goto fail;
-
-	/*
-	 * Make sure nobody is waiting for us to release the VM,
-	 * otherwise we can deadlock when we wait on each other
-	 */
-	vfork_done = tsk->vfork_done;
-	if (vfork_done) {
-		tsk->vfork_done = NULL;
-		complete(vfork_done);
-	}
-
-	if (core_waiters)
+	if (core_waiters > 0)
 		wait_for_completion(&core_state->startup);
-fail:
+
 	return core_waiters;
 }
 
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 80405836ba6..c61e62ac231 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
 		goto fail;
 	}
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	de = (struct exofs_dir_entry *)kaddr;
 	de->name_len = 1;
 	de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
@@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
 	de->inode_no = cpu_to_le64(parent->i_ino);
 	memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
 	exofs_set_de_type(de, inode);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	err = exofs_commit_chunk(page, 0, chunk_size);
 fail:
 	page_cache_release(page);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 9dbf0c30103..fc7161d6bf6 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
 {
 	struct inode *inode = old_dentry->d_inode;
 
-	if (inode->i_nlink >= EXOFS_LINK_MAX)
-		return -EMLINK;
-
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
 	ihold(inode);
@@ -156,10 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
 static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode *inode;
-	int err = -EMLINK;
-
-	if (dir->i_nlink >= EXOFS_LINK_MAX)
-		goto out;
+	int err;
 
 	inode_inc_link_count(dir);
 
@@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (err)
 			goto out_dir;
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= EXOFS_LINK_MAX)
-				goto out_dir;
-		}
 		err = exofs_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index d22cd168c6e..7f2b590a36b 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -754,6 +754,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize = EXOFS_BLKSIZE;
 	sb->s_blocksize_bits = EXOFS_BLKSHIFT;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_max_links = EXOFS_LINK_MAX;
 	atomic_set(&sbi->s_curr_pending, 0);
 	sb->s_bdev = NULL;
 	sb->s_dev = 0;
@@ -818,9 +819,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 		ret = PTR_ERR(root);
 		goto free_sbi;
 	}
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
-		iput(root);
 		EXOFS_ERR("ERROR: get root inode failed\n");
 		ret = -ENOMEM;
 		goto free_sbi;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index d37df352d32..0f4f5c92925 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -645,7 +645,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 		unlock_page(page);
 		goto fail;
 	}
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memset(kaddr, 0, chunk_size);
 	de = (struct ext2_dir_entry_2 *)kaddr;
 	de->name_len = 1;
@@ -660,7 +660,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 	de->inode = cpu_to_le32(parent->i_ino);
 	memcpy (de->name, "..\0", 4);
 	ext2_set_de_type (de, inode);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	err = ext2_commit_chunk(page, 0, chunk_size);
 fail:
 	page_cache_release(page);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 080419814ba..dffb8653628 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -195,9 +195,6 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	struct inode *inode = old_dentry->d_inode;
 	int err;
 
-	if (inode->i_nlink >= EXT2_LINK_MAX)
-		return -EMLINK;
-
 	dquot_initialize(dir);
 
 	inode->i_ctime = CURRENT_TIME_SEC;
@@ -217,10 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	struct inode * inode;
-	int err = -EMLINK;
-
-	if (dir->i_nlink >= EXT2_LINK_MAX)
-		goto out;
+	int err;
 
 	dquot_initialize(dir);
 
@@ -346,11 +340,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= EXT2_LINK_MAX)
-				goto out_dir;
-		}
 		err = ext2_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0090595beb2..e1025c7a437 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -919,6 +919,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
+	sb->s_max_links = EXT2_LINK_MAX;
 
 	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
 		sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
@@ -1087,9 +1088,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount3;
 	}
 
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
-		iput(root);
 		ext2_msg(sb, KERN_ERR, "error: get root inode failed");
 		ret = -ENOMEM;
 		goto failed_mount3;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 726c7ef6cdf..e0b45b93327 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2046,10 +2046,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
 		goto failed_mount3;
 	}
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
-		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount3;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 502c61fd739..933900909ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3735,9 +3735,8 @@ no_journal:
 		iput(root);
 		goto failed_mount4;
 	}
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
-		iput(root);
 		ext4_msg(sb, KERN_ERR, "get root dentry failed");
 		ret = -ENOMEM;
 		goto failed_mount4;
@@ -5056,6 +5055,9 @@ static int __init ext4_init_fs(void)
 {
 	int i, err;
 
+	ext4_li_info = NULL;
+	mutex_init(&ext4_li_mtx);
+
 	ext4_check_flag_values();
 
 	for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5094,8 +5096,6 @@ static int __init ext4_init_fs(void)
 	if (err)
 		goto out;
 
-	ext4_li_info = NULL;
-	mutex_init(&ext4_li_mtx);
 	return 0;
 out:
 	unregister_as_ext2();
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 3ab841054d5..21687e31acc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1496,11 +1496,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	root_inode->i_ino = MSDOS_ROOT_INO;
 	root_inode->i_version = 1;
 	error = fat_read_root(root_inode);
-	if (error < 0)
+	if (error < 0) {
+		iput(root_inode);
 		goto out_fail;
+	}
 	error = -ENOMEM;
 	insert_inode_hash(root_inode);
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (!sb->s_root) {
 		fat_msg(sb, KERN_ERR, "get root inode failed");
 		goto out_fail;
@@ -1516,8 +1518,6 @@ out_invalid:
 out_fail:
 	if (fat_inode)
 		iput(fat_inode);
-	if (root_inode)
-		iput(root_inode);
 	unload_nls(sbi->nls_io);
 	unload_nls(sbi->nls_disk);
 	if (sbi->options.iocharset != fat_default_iocharset)
diff --git a/fs/file_table.c b/fs/file_table.c
index 20002e39754..70f2a0fd6ae 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(alloc_file);
  * to write to @file, along with access to write through
  * its vfsmount.
  */
-void drop_file_write_access(struct file *file)
+static void drop_file_write_access(struct file *file)
 {
 	struct vfsmount *mnt = file->f_path.mnt;
 	struct dentry *dentry = file->f_path.dentry;
@@ -219,7 +219,6 @@ void drop_file_write_access(struct file *file)
 	mnt_drop_write(mnt);
 	file_release_write(file);
 }
-EXPORT_SYMBOL_GPL(drop_file_write_access);
 
 /* the real guts of fput() - releasing the last reference to file
  */
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 9d1c9955838..d4fabd26084 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -224,9 +224,8 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 		ret = PTR_ERR(root);
 		goto out;
 	}
-	sbp->s_root = d_alloc_root(root);
+	sbp->s_root = d_make_root(root);
 	if (!sbp->s_root) {
-		iput(root);
 		printk(KERN_WARNING "vxfs: unable to get root dentry.\n");
 		goto out_free_ilist;
 	}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5b4a9362d5a..77b535ac713 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1284,7 +1284,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
 /**
- * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * writeback_inodes_sb_nr_if_idle	-	start writeback if none underway
  * @sb: the superblock
  * @nr: the number of pages to write
  * @reason: reason why some writeback work was initiated
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 78b519c1353..6324c427495 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -26,11 +26,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 {
 	struct path old_root;
 
+	path_get_longterm(path);
 	spin_lock(&fs->lock);
 	write_seqcount_begin(&fs->seq);
 	old_root = fs->root;
 	fs->root = *path;
-	path_get_longterm(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 	if (old_root.dentry)
@@ -45,11 +45,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
 	struct path old_pwd;
 
+	path_get_longterm(path);
 	spin_lock(&fs->lock);
 	write_seqcount_begin(&fs->seq);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
-	path_get_longterm(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 
@@ -57,6 +57,14 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 		path_put_longterm(&old_pwd);
 }
 
+static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
+{
+	if (likely(p->dentry != old->dentry || p->mnt != old->mnt))
+		return 0;
+	*p = *new;
+	return 1;
+}
+
 void chroot_fs_refs(struct path *old_root, struct path *new_root)
 {
 	struct task_struct *g, *p;
@@ -68,21 +76,16 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 		task_lock(p);
 		fs = p->fs;
 		if (fs) {
+			int hits = 0;
 			spin_lock(&fs->lock);
 			write_seqcount_begin(&fs->seq);
-			if (fs->root.dentry == old_root->dentry
-			    && fs->root.mnt == old_root->mnt) {
-				path_get_longterm(new_root);
-				fs->root = *new_root;
+			hits += replace_path(&fs->root, old_root, new_root);
+			hits += replace_path(&fs->pwd, old_root, new_root);
+			write_seqcount_end(&fs->seq);
+			while (hits--) {
 				count++;
-			}
-			if (fs->pwd.dentry == old_root->dentry
-			    && fs->pwd.mnt == old_root->mnt) {
 				path_get_longterm(new_root);
-				fs->pwd = *new_root;
-				count++;
 			}
-			write_seqcount_end(&fs->seq);
 			spin_unlock(&fs->lock);
 		}
 		task_unlock(p);
@@ -107,10 +110,8 @@ void exit_fs(struct task_struct *tsk)
 		int kill;
 		task_lock(tsk);
 		spin_lock(&fs->lock);
-		write_seqcount_begin(&fs->seq);
 		tsk->fs = NULL;
 		kill = !--fs->users;
-		write_seqcount_end(&fs->seq);
 		spin_unlock(&fs->lock);
 		task_unlock(tsk);
 		if (kill)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5f3368ab0fa..7df2b5e8fbe 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -838,10 +838,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 			}
 		}
 		if (page) {
-			void *mapaddr = kmap_atomic(page, KM_USER0);
+			void *mapaddr = kmap_atomic(page);
 			void *buf = mapaddr + offset;
 			offset += fuse_copy_do(cs, &buf, &count);
-			kunmap_atomic(mapaddr, KM_USER0);
+			kunmap_atomic(mapaddr);
 		} else
 			offset += fuse_copy_do(cs, NULL, &count);
 	}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd93fb..a841868bf9c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1887,11 +1887,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
 			goto out;
 
-		vaddr = kmap_atomic(pages[0], KM_USER0);
+		vaddr = kmap_atomic(pages[0]);
 		err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
 					    transferred, in_iovs + out_iovs,
 					    (flags & FUSE_IOCTL_COMPAT) != 0);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 		if (err)
 			goto out;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 64cf8d07393..4aec5995867 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -988,14 +988,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
 	err = -ENOMEM;
 	root = fuse_get_root_inode(sb, d.rootmode);
-	if (!root)
+	root_dentry = d_make_root(root);
+	if (!root_dentry)
 		goto err_put_conn;
-
-	root_dentry = d_alloc_root(root);
-	if (!root_dentry) {
-		iput(root);
-		goto err_put_conn;
-	}
 	/* only now - we want root dentry with NULL ->d_op */
 	sb->s_d_op = &fuse_dentry_operations;
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 501e5cba09b..38b7a74a0f9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -434,12 +434,12 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		return error;
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
 		dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 	memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	flush_dcache_page(page);
 	brelse(dibh);
 	SetPageUptodate(page);
@@ -542,9 +542,9 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
 		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
-		p = kmap_atomic(page, KM_USER0);
+		p = kmap_atomic(page);
 		memcpy(buf + copied, p + offset, amt);
-		kunmap_atomic(p, KM_USER0);
+		kunmap_atomic(p);
 		mark_page_accessed(page);
 		page_cache_release(page);
 		copied += amt;
@@ -788,11 +788,11 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 
 	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memcpy(buf + pos, kaddr + pos, copied);
 	memset(kaddr + pos + copied, 0, len - copied);
 	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 14a70401597..197c5c47e57 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -60,7 +60,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 	int release = 0;
 
 	if (!page || page->index) {
-		page = grab_cache_page(inode->i_mapping, 0);
+		page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
 		if (!page)
 			return -ENOMEM;
 		release = 1;
@@ -930,7 +930,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 	struct page *page;
 	int err;
 
-	page = grab_cache_page(mapping, index);
+	page = find_or_create_page(mapping, index, GFP_NOFS);
 	if (!page)
 		return 0;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c5fb3597f69..76834587a8a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -313,6 +313,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return gfs2_get_flags(filp, (u32 __user *)arg);
 	case FS_IOC_SETFLAGS:
 		return gfs2_set_flags(filp, (u32 __user *)arg);
+	case FITRIM:
+		return gfs2_fitrim(filp, (void __user *)arg);
 	}
 	return -ENOTTY;
 }
@@ -674,6 +676,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *dibh;
 	int error;
+	loff_t size = len;
 	unsigned int nr_blks;
 	sector_t lblock = offset >> inode->i_blkbits;
 
@@ -707,8 +710,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			goto out;
 		}
 	}
-	if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
-		i_size_write(inode, offset + len);
+	if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
+		i_size_write(inode, offset + size);
 
 	mark_inode_dirty(inode);
 
@@ -777,12 +780,14 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (unlikely(error))
 		goto out_uninit;
 
-	if (!gfs2_write_alloc_required(ip, offset, len))
-		goto out_unlock;
-
 	while (len > 0) {
 		if (len < bytes)
 			bytes = len;
+		if (!gfs2_write_alloc_required(ip, offset, bytes)) {
+			len -= bytes;
+			offset += bytes;
+			continue;
+		}
 		qa = gfs2_qadata_get(ip);
 		if (!qa) {
 			error = -ENOMEM;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 376816fcd04..dab2526071c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/rculist_bl.h>
 #include <linux/bit_spinlock.h>
+#include <linux/percpu.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -167,14 +168,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
 	spin_unlock(&lru_lock);
 }
 
-static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
 {
-	spin_lock(&lru_lock);
 	if (!list_empty(&gl->gl_lru)) {
 		list_del_init(&gl->gl_lru);
 		atomic_dec(&lru_count);
 		clear_bit(GLF_LRU, &gl->gl_flags);
 	}
+}
+
+static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+{
+	spin_lock(&lru_lock);
+	__gfs2_glock_remove_from_lru(gl);
 	spin_unlock(&lru_lock);
 }
 
@@ -217,11 +223,12 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct address_space *mapping = gfs2_glock2aspace(gl);
 
-	if (atomic_dec_and_test(&gl->gl_ref)) {
+	if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
+		__gfs2_glock_remove_from_lru(gl);
+		spin_unlock(&lru_lock);
 		spin_lock_bucket(gl->gl_hash);
 		hlist_bl_del_rcu(&gl->gl_list);
 		spin_unlock_bucket(gl->gl_hash);
-		gfs2_glock_remove_from_lru(gl);
 		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
 		GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
 		trace_gfs2_glock_put(gl);
@@ -537,6 +544,11 @@ __acquires(&gl->gl_spin)
 		do_error(gl, 0); /* Fail queued try locks */
 	}
 	gl->gl_req = target;
+	set_bit(GLF_BLOCKING, &gl->gl_flags);
+	if ((gl->gl_req == LM_ST_UNLOCKED) ||
+	    (gl->gl_state == LM_ST_EXCLUSIVE) ||
+	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
+		clear_bit(GLF_BLOCKING, &gl->gl_flags);
 	spin_unlock(&gl->gl_spin);
 	if (glops->go_xmote_th)
 		glops->go_xmote_th(gl);
@@ -738,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 		return -ENOMEM;
 
 	atomic_inc(&sdp->sd_glock_disposal);
+	gl->gl_sbd = sdp;
 	gl->gl_flags = 0;
 	gl->gl_name = name;
 	atomic_set(&gl->gl_ref, 1);
@@ -746,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
 	gl->gl_hash = hash;
 	gl->gl_ops = glops;
-	snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
+	gl->gl_dstamp = ktime_set(0, 0);
+	preempt_disable();
+	/* We use the global stats to estimate the initial per-glock stats */
+	gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
+	preempt_enable();
+	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
+	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
 	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
 	gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
 	gl->gl_tchange = jiffies;
 	gl->gl_object = NULL;
-	gl->gl_sbd = sdp;
 	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
 	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
 	INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -993,6 +1011,8 @@ fail:
 	}
 	set_bit(GLF_QUEUED, &gl->gl_flags);
 	trace_gfs2_glock_queue(gh, 1);
+	gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
+	gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
 	if (likely(insert_pt == NULL)) {
 		list_add_tail(&gh->gh_list, &gl->gl_holders);
 		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1652,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 		*p++ = 'L';
 	if (gl->gl_object)
 		*p++ = 'o';
+	if (test_bit(GLF_BLOCKING, gflags))
+		*p++ = 'b';
 	*p = 0;
 	return buf;
 }
@@ -1708,8 +1730,78 @@ out:
 	return error;
 }
 
+static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
+{
+	struct gfs2_glock *gl = iter_ptr;
+
+	seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
+		   gl->gl_name.ln_type,
+		   (unsigned long long)gl->gl_name.ln_number,
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
+		   (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
+	return 0;
+}
+
+static const char *gfs2_gltype[] = {
+	"type",
+	"reserved",
+	"nondisk",
+	"inode",
+	"rgrp",
+	"meta",
+	"iopen",
+	"flock",
+	"plock",
+	"quota",
+	"journal",
+};
+
+static const char *gfs2_stype[] = {
+	[GFS2_LKS_SRTT]		= "srtt",
+	[GFS2_LKS_SRTTVAR]	= "srttvar",
+	[GFS2_LKS_SRTTB]	= "srttb",
+	[GFS2_LKS_SRTTVARB]	= "srttvarb",
+	[GFS2_LKS_SIRT]		= "sirt",
+	[GFS2_LKS_SIRTVAR]	= "sirtvar",
+	[GFS2_LKS_DCOUNT]	= "dlm",
+	[GFS2_LKS_QCOUNT]	= "queue",
+};
+
+#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
+
+static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
+{
+	struct gfs2_glock_iter *gi = seq->private;
+	struct gfs2_sbd *sdp = gi->sdp;
+	unsigned index = gi->hash >> 3;
+	unsigned subindex = gi->hash & 0x07;
+	s64 value;
+	int i;
+
+	if (index == 0 && subindex != 0)
+		return 0;
 
+	seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
+		   (index == 0) ? "cpu": gfs2_stype[subindex]);
 
+	for_each_possible_cpu(i) {
+                const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
+		if (index == 0) {
+			value = i;
+		} else {
+			value = lkstats->lkstats[index - 1].stats[subindex];
+		}
+		seq_printf(seq, " %15lld", (long long)value);
+	}
+	seq_putc(seq, '\n');
+	return 0;
+}
 
 int __init gfs2_glock_init(void)
 {
@@ -1822,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
 	return dump_glock(seq, iter_ptr);
 }
 
+static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct gfs2_glock_iter *gi = seq->private;
+
+	gi->hash = *pos;
+	if (*pos >= GFS2_NR_SBSTATS)
+		return NULL;
+	preempt_disable();
+	return SEQ_START_TOKEN;
+}
+
+static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
+				   loff_t *pos)
+{
+	struct gfs2_glock_iter *gi = seq->private;
+	(*pos)++;
+	gi->hash++;
+	if (gi->hash >= GFS2_NR_SBSTATS) {
+		preempt_enable();
+		return NULL;
+	}
+	return SEQ_START_TOKEN;
+}
+
+static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
+{
+	preempt_enable();
+}
+
 static const struct seq_operations gfs2_glock_seq_ops = {
 	.start = gfs2_glock_seq_start,
 	.next  = gfs2_glock_seq_next,
@@ -1829,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = {
 	.show  = gfs2_glock_seq_show,
 };
 
-static int gfs2_debugfs_open(struct inode *inode, struct file *file)
+static const struct seq_operations gfs2_glstats_seq_ops = {
+	.start = gfs2_glock_seq_start,
+	.next  = gfs2_glock_seq_next,
+	.stop  = gfs2_glock_seq_stop,
+	.show  = gfs2_glstats_seq_show,
+};
+
+static const struct seq_operations gfs2_sbstats_seq_ops = {
+	.start = gfs2_sbstats_seq_start,
+	.next  = gfs2_sbstats_seq_next,
+	.stop  = gfs2_sbstats_seq_stop,
+	.show  = gfs2_sbstats_seq_show,
+};
+
+static int gfs2_glocks_open(struct inode *inode, struct file *file)
 {
 	int ret = seq_open_private(file, &gfs2_glock_seq_ops,
 				   sizeof(struct gfs2_glock_iter));
@@ -1841,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static const struct file_operations gfs2_debug_fops = {
+static int gfs2_glstats_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
+				   sizeof(struct gfs2_glock_iter));
+	if (ret == 0) {
+		struct seq_file *seq = file->private_data;
+		struct gfs2_glock_iter *gi = seq->private;
+		gi->sdp = inode->i_private;
+	}
+	return ret;
+}
+
+static int gfs2_sbstats_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
+				   sizeof(struct gfs2_glock_iter));
+	if (ret == 0) {
+		struct seq_file *seq = file->private_data;
+		struct gfs2_glock_iter *gi = seq->private;
+		gi->sdp = inode->i_private;
+	}
+	return ret;
+}
+
+static const struct file_operations gfs2_glocks_fops = {
+	.owner   = THIS_MODULE,
+	.open    = gfs2_glocks_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+static const struct file_operations gfs2_glstats_fops = {
 	.owner   = THIS_MODULE,
-	.open    = gfs2_debugfs_open,
+	.open    = gfs2_glstats_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_private,
+};
+
+static const struct file_operations gfs2_sbstats_fops = {
+	.owner   = THIS_MODULE,
+	.open	 = gfs2_sbstats_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release_private,
@@ -1857,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
 	sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
 							 S_IFREG | S_IRUGO,
 							 sdp->debugfs_dir, sdp,
-							 &gfs2_debug_fops);
+							 &gfs2_glocks_fops);
 	if (!sdp->debugfs_dentry_glocks)
-		return -ENOMEM;
+		goto fail;
+
+	sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
+							S_IFREG | S_IRUGO,
+							sdp->debugfs_dir, sdp,
+							&gfs2_glstats_fops);
+	if (!sdp->debugfs_dentry_glstats)
+		goto fail;
+
+	sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
+							S_IFREG | S_IRUGO,
+							sdp->debugfs_dir, sdp,
+							&gfs2_sbstats_fops);
+	if (!sdp->debugfs_dentry_sbstats)
+		goto fail;
 
 	return 0;
+fail:
+	gfs2_delete_debugfs_file(sdp);
+	return -ENOMEM;
 }
 
 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
 {
-	if (sdp && sdp->debugfs_dir) {
+	if (sdp->debugfs_dir) {
 		if (sdp->debugfs_dentry_glocks) {
 			debugfs_remove(sdp->debugfs_dentry_glocks);
 			sdp->debugfs_dentry_glocks = NULL;
 		}
+		if (sdp->debugfs_dentry_glstats) {
+			debugfs_remove(sdp->debugfs_dentry_glstats);
+			sdp->debugfs_dentry_glstats = NULL;
+		}
+		if (sdp->debugfs_dentry_sbstats) {
+			debugfs_remove(sdp->debugfs_dentry_sbstats);
+			sdp->debugfs_dentry_sbstats = NULL;
+		}
 		debugfs_remove(sdp->debugfs_dir);
 		sdp->debugfs_dir = NULL;
 	}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 97742a7ea9c..47d0bda5ac2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -19,6 +19,8 @@
 #include <linux/rculist_bl.h>
 #include <linux/completion.h>
 #include <linux/rbtree.h>
+#include <linux/ktime.h>
+#include <linux/percpu.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -205,6 +207,22 @@ struct gfs2_glock_operations {
 };
 
 enum {
+	GFS2_LKS_SRTT = 0,	/* Non blocking smoothed round trip time */
+	GFS2_LKS_SRTTVAR = 1,	/* Non blocking smoothed variance */
+	GFS2_LKS_SRTTB = 2,	/* Blocking smoothed round trip time */
+	GFS2_LKS_SRTTVARB = 3,	/* Blocking smoothed variance */
+	GFS2_LKS_SIRT = 4,	/* Smoothed Inter-request time */
+	GFS2_LKS_SIRTVAR = 5,	/* Smoothed Inter-request variance */
+	GFS2_LKS_DCOUNT = 6,	/* Count of dlm requests */
+	GFS2_LKS_QCOUNT = 7,	/* Count of gfs2_holder queues */
+	GFS2_NR_LKSTATS
+};
+
+struct gfs2_lkstats {
+	s64 stats[GFS2_NR_LKSTATS];
+};
+
+enum {
 	/* States */
 	HIF_HOLDER		= 6,  /* Set for gh that "holds" the glock */
 	HIF_FIRST		= 7,
@@ -238,10 +256,12 @@ enum {
 	GLF_QUEUED			= 12,
 	GLF_LRU				= 13,
 	GLF_OBJECT			= 14, /* Used only for tracing */
+	GLF_BLOCKING			= 15,
 };
 
 struct gfs2_glock {
 	struct hlist_bl_node gl_list;
+	struct gfs2_sbd *gl_sbd;
 	unsigned long gl_flags;		/* GLF_... */
 	struct lm_lockname gl_name;
 	atomic_t gl_ref;
@@ -261,16 +281,14 @@ struct gfs2_glock {
 	struct list_head gl_holders;
 
 	const struct gfs2_glock_operations *gl_ops;
-	char gl_strname[GDLM_STRNAME_BYTES];
+	ktime_t gl_dstamp;
+	struct gfs2_lkstats gl_stats;
 	struct dlm_lksb gl_lksb;
 	char gl_lvb[32];
 	unsigned long gl_tchange;
 	void *gl_object;
 
 	struct list_head gl_lru;
-
-	struct gfs2_sbd *gl_sbd;
-
 	struct list_head gl_ail_list;
 	atomic_t gl_ail_count;
 	atomic_t gl_revokes;
@@ -560,8 +578,14 @@ struct lm_lockstruct {
 	uint32_t *ls_recover_result; /* result of last jid recovery */
 };
 
+struct gfs2_pcpu_lkstats {
+	/* One struct for each glock type */
+	struct gfs2_lkstats lkstats[10];
+};
+
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
+	struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
 	struct kobject sd_kobj;
 	unsigned long sd_flags;	/* SDF_... */
 	struct gfs2_sb_host sd_sb;
@@ -620,7 +644,6 @@ struct gfs2_sbd {
 
 	int sd_rindex_uptodate;
 	spinlock_t sd_rindex_spin;
-	struct mutex sd_rindex_mutex;
 	struct rb_root sd_rindex_tree;
 	unsigned int sd_rgrps;
 	unsigned int sd_max_rg_data;
@@ -725,8 +748,23 @@ struct gfs2_sbd {
 
 	unsigned long sd_last_warning;
 	struct dentry *debugfs_dir;    /* debugfs directory */
-	struct dentry *debugfs_dentry_glocks; /* for debugfs */
+	struct dentry *debugfs_dentry_glocks;
+	struct dentry *debugfs_dentry_glstats;
+	struct dentry *debugfs_dentry_sbstats;
 };
 
+static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
+{
+	gl->gl_stats.stats[which]++;
+}
+
+static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
+{
+	const struct gfs2_sbd *sdp = gl->gl_sbd;
+	preempt_disable();
+	this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
+	preempt_enable();
+}
+
 #endif /* __INCORE_DOT_H__ */
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a7d611b93f0..c98a60ee6df 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
 	int error;
 	int dblocks = 1;
 
-	error = gfs2_rindex_update(sdp);
-	if (error)
-		fs_warn(sdp, "rindex update returns %d\n", error);
-
 	error = gfs2_inplace_reserve(dip, RES_DINODE);
 	if (error)
 		goto out;
@@ -1040,9 +1036,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
 	if (!rgd)
 		goto out_inodes;
+
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
 
@@ -1258,7 +1255,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		 * this is the case of the target file already existing
 		 * so we unlink before doing the rename
 		 */
-		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
+		nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
 		if (nrgd)
 			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
 	}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8944d1e32ab..f8411bd1b80 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -18,14 +18,106 @@
 #include "glock.h"
 #include "util.h"
 #include "sys.h"
+#include "trace_gfs2.h"
 
 extern struct workqueue_struct *gfs2_control_wq;
 
+/**
+ * gfs2_update_stats - Update time based stats
+ * @mv: Pointer to mean/variance structure to update
+ * @sample: New data to include
+ *
+ * @delta is the difference between the current rtt sample and the
+ * running average srtt. We add 1/8 of that to the srtt in order to
+ * update the current srtt estimate. The varience estimate is a bit
+ * more complicated. We subtract the abs value of the @delta from
+ * the current variance estimate and add 1/4 of that to the running
+ * total.
+ *
+ * Note that the index points at the array entry containing the smoothed
+ * mean value, and the variance is always in the following entry
+ *
+ * Reference: TCP/IP Illustrated, vol 2, p. 831,832
+ * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
+ * they are not scaled fixed point.
+ */
+
+static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
+				     s64 sample)
+{
+	s64 delta = sample - s->stats[index];
+	s->stats[index] += (delta >> 3);
+	index++;
+	s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
+}
+
+/**
+ * gfs2_update_reply_times - Update locking statistics
+ * @gl: The glock to update
+ *
+ * This assumes that gl->gl_dstamp has been set earlier.
+ *
+ * The rtt (lock round trip time) is an estimate of the time
+ * taken to perform a dlm lock request. We update it on each
+ * reply from the dlm.
+ *
+ * The blocking flag is set on the glock for all dlm requests
+ * which may potentially block due to lock requests from other nodes.
+ * DLM requests where the current lock state is exclusive, the
+ * requested state is null (or unlocked) or where the TRY or
+ * TRY_1CB flags are set are classified as non-blocking. All
+ * other DLM requests are counted as (potentially) blocking.
+ */
+static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
+{
+	struct gfs2_pcpu_lkstats *lks;
+	const unsigned gltype = gl->gl_name.ln_type;
+	unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
+			 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
+	s64 rtt;
+
+	preempt_disable();
+	rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
+	lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+	gfs2_update_stats(&gl->gl_stats, index, rtt);		/* Local */
+	gfs2_update_stats(&lks->lkstats[gltype], index, rtt);	/* Global */
+	preempt_enable();
+
+	trace_gfs2_glock_lock_time(gl, rtt);
+}
+
+/**
+ * gfs2_update_request_times - Update locking statistics
+ * @gl: The glock to update
+ *
+ * The irt (lock inter-request times) measures the average time
+ * between requests to the dlm. It is updated immediately before
+ * each dlm call.
+ */
+
+static inline void gfs2_update_request_times(struct gfs2_glock *gl)
+{
+	struct gfs2_pcpu_lkstats *lks;
+	const unsigned gltype = gl->gl_name.ln_type;
+	ktime_t dstamp;
+	s64 irt;
+
+	preempt_disable();
+	dstamp = gl->gl_dstamp;
+	gl->gl_dstamp = ktime_get_real();
+	irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
+	lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+	gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt);		/* Local */
+	gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt);	/* Global */
+	preempt_enable();
+}
+ 
 static void gdlm_ast(void *arg)
 {
 	struct gfs2_glock *gl = arg;
 	unsigned ret = gl->gl_state;
 
+	gfs2_update_reply_times(gl);
 	BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
 
 	if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
@@ -111,7 +203,7 @@ static int make_mode(const unsigned int lmstate)
 static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
 		      const int req)
 {
-	u32 lkf = 0;
+	u32 lkf = DLM_LKF_VALBLK;
 
 	if (gfs_flags & LM_FLAG_TRY)
 		lkf |= DLM_LKF_NOQUEUE;
@@ -138,26 +230,43 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
 	if (lkid != 0) 
 		lkf |= DLM_LKF_CONVERT;
 
-	lkf |= DLM_LKF_VALBLK;
-
 	return lkf;
 }
 
+static void gfs2_reverse_hex(char *c, u64 value)
+{
+	while (value) {
+		*c-- = hex_asc[value & 0x0f];
+		value >>= 4;
+	}
+}
+
 static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
 		     unsigned int flags)
 {
 	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 	int req;
 	u32 lkf;
+	char strname[GDLM_STRNAME_BYTES] = "";
 
 	req = make_mode(req_state);
 	lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
-
+	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
+	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
+	if (gl->gl_lksb.sb_lkid) {
+		gfs2_update_request_times(gl);
+	} else {
+		memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
+		strname[GDLM_STRNAME_BYTES - 1] = '\0';
+		gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
+		gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
+		gl->gl_dstamp = ktime_get_real();
+	}
 	/*
 	 * Submit the actual lock request.
 	 */
 
-	return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+	return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
 			GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
 }
 
@@ -172,6 +281,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 		return;
 	}
 
+	clear_bit(GLF_BLOCKING, &gl->gl_flags);
+	gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
+	gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
+	gfs2_update_request_times(gl);
 	error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
 			   NULL, gl);
 	if (error) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 756fae9eaf8..4752eadc7f6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -19,6 +19,7 @@
 #include <linux/freezer.h>
 #include <linux/bio.h>
 #include <linux/writeback.h>
+#include <linux/list_sort.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -358,7 +359,7 @@ retry:
 	return 0;
 }
 
-static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
+u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 {
 	struct gfs2_journal_extent *je;
 
@@ -467,8 +468,8 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
 
 void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 {
-	if (sdp->sd_log_flush_head == sdp->sd_log_tail)
-		BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head);
+	BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
+	       (sdp->sd_log_flush_head != sdp->sd_log_head));
 
 	if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
 		sdp->sd_log_flush_head = 0;
@@ -476,99 +477,6 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 	}
 }
 
-/**
- * gfs2_log_write_endio - End of I/O for a log buffer
- * @bh: The buffer head
- * @uptodate: I/O Status
- *
- */
-
-static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
-{
-	struct gfs2_sbd *sdp = bh->b_private;
-	bh->b_private = NULL;
-
-	end_buffer_write_sync(bh, uptodate);
-	if (atomic_dec_and_test(&sdp->sd_log_in_flight))
-		wake_up(&sdp->sd_log_flush_wait);
-}
-
-/**
- * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
- * @sdp: The GFS2 superblock
- *
- * Returns: the buffer_head
- */
-
-struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
-{
-	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
-	struct buffer_head *bh;
-
-	bh = sb_getblk(sdp->sd_vfs, blkno);
-	lock_buffer(bh);
-	memset(bh->b_data, 0, bh->b_size);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-	gfs2_log_incr_head(sdp);
-	atomic_inc(&sdp->sd_log_in_flight);
-	bh->b_private = sdp;
-	bh->b_end_io = gfs2_log_write_endio;
-
-	return bh;
-}
-
-/**
- * gfs2_fake_write_endio - 
- * @bh: The buffer head
- * @uptodate: The I/O Status
- *
- */
-
-static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
-{
-	struct buffer_head *real_bh = bh->b_private;
-	struct gfs2_bufdata *bd = real_bh->b_private;
-	struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
-
-	end_buffer_write_sync(bh, uptodate);
-	free_buffer_head(bh);
-	unlock_buffer(real_bh);
-	brelse(real_bh);
-	if (atomic_dec_and_test(&sdp->sd_log_in_flight))
-		wake_up(&sdp->sd_log_flush_wait);
-}
-
-/**
- * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
- * @sdp: the filesystem
- * @data: the data the buffer_head should point to
- *
- * Returns: the log buffer descriptor
- */
-
-struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
-				      struct buffer_head *real)
-{
-	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
-	struct buffer_head *bh;
-
-	bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
-	atomic_set(&bh->b_count, 1);
-	bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
-	set_bh_page(bh, real->b_page, bh_offset(real));
-	bh->b_blocknr = blkno;
-	bh->b_size = sdp->sd_sb.sb_bsize;
-	bh->b_bdev = sdp->sd_vfs->s_bdev;
-	bh->b_private = real;
-	bh->b_end_io = gfs2_fake_write_endio;
-
-	gfs2_log_incr_head(sdp);
-	atomic_inc(&sdp->sd_log_in_flight);
-
-	return bh;
-}
-
 static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
 	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -583,66 +491,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 	sdp->sd_log_tail = new_tail;
 }
 
-/**
- * log_write_header - Get and initialize a journal header buffer
- * @sdp: The GFS2 superblock
- *
- * Returns: the initialized log buffer descriptor
- */
 
-static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
-{
-	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
-	struct buffer_head *bh;
-	struct gfs2_log_header *lh;
-	unsigned int tail;
-	u32 hash;
-
-	bh = sb_getblk(sdp->sd_vfs, blkno);
-	lock_buffer(bh);
-	memset(bh->b_data, 0, bh->b_size);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-
-	gfs2_ail1_empty(sdp);
-	tail = current_tail(sdp);
-
-	lh = (struct gfs2_log_header *)bh->b_data;
-	memset(lh, 0, sizeof(struct gfs2_log_header));
-	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
-	lh->lh_header.__pad0 = cpu_to_be64(0);
-	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
-	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
-	lh->lh_flags = cpu_to_be32(flags);
-	lh->lh_tail = cpu_to_be32(tail);
-	lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
-	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
-	lh->lh_hash = cpu_to_be32(hash);
-
-	bh->b_end_io = end_buffer_write_sync;
-	get_bh(bh);
-	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
-		submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
-	else
-		submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
-	wait_on_buffer(bh);
-
-	if (!buffer_uptodate(bh))
-		gfs2_io_error_bh(sdp, bh);
-	brelse(bh);
-
-	if (sdp->sd_log_tail != tail)
-		log_pull_tail(sdp, tail);
-	else
-		gfs2_assert_withdraw(sdp, !pull);
-
-	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
-	gfs2_log_incr_head(sdp);
-}
-
-static void log_flush_commit(struct gfs2_sbd *sdp)
+static void log_flush_wait(struct gfs2_sbd *sdp)
 {
 	DEFINE_WAIT(wait);
 
@@ -655,8 +505,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 		} while(atomic_read(&sdp->sd_log_in_flight));
 		finish_wait(&sdp->sd_log_flush_wait, &wait);
 	}
+}
+
+static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct gfs2_bufdata *bda, *bdb;
 
-	log_write_header(sdp, 0, 0);
+	bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list);
+	bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list);
+
+	if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
+		return -1;
+	if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
+		return 1;
+	return 0;
 }
 
 static void gfs2_ordered_write(struct gfs2_sbd *sdp)
@@ -666,6 +528,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 	LIST_HEAD(written);
 
 	gfs2_log_lock(sdp);
+	list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
 	while (!list_empty(&sdp->sd_log_le_ordered)) {
 		bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
 		list_move(&bd->bd_le.le_list, &written);
@@ -711,6 +574,68 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 }
 
 /**
+ * log_write_header - Get and initialize a journal header buffer
+ * @sdp: The GFS2 superblock
+ *
+ * Returns: the initialized log buffer descriptor
+ */
+
+static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
+{
+	u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
+	struct buffer_head *bh;
+	struct gfs2_log_header *lh;
+	unsigned int tail;
+	u32 hash;
+
+	bh = sb_getblk(sdp->sd_vfs, blkno);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, bh->b_size);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+
+	gfs2_ail1_empty(sdp);
+	tail = current_tail(sdp);
+
+	lh = (struct gfs2_log_header *)bh->b_data;
+	memset(lh, 0, sizeof(struct gfs2_log_header));
+	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.__pad0 = cpu_to_be64(0);
+	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
+	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
+	lh->lh_flags = cpu_to_be32(flags);
+	lh->lh_tail = cpu_to_be32(tail);
+	lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
+	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+	lh->lh_hash = cpu_to_be32(hash);
+
+	bh->b_end_io = end_buffer_write_sync;
+	get_bh(bh);
+	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
+		gfs2_ordered_wait(sdp);
+		log_flush_wait(sdp);
+		submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
+	} else {
+		submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
+	}
+	wait_on_buffer(bh);
+
+	if (!buffer_uptodate(bh))
+		gfs2_io_error_bh(sdp, bh);
+	brelse(bh);
+
+	if (sdp->sd_log_tail != tail)
+		log_pull_tail(sdp, tail);
+	else
+		gfs2_assert_withdraw(sdp, !pull);
+
+	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
+	gfs2_log_incr_head(sdp);
+}
+
+/**
  * gfs2_log_flush - flush incore transaction(s)
  * @sdp: the filesystem
  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
@@ -753,11 +678,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 
 	gfs2_ordered_write(sdp);
 	lops_before_commit(sdp);
-	gfs2_ordered_wait(sdp);
 
-	if (sdp->sd_log_head != sdp->sd_log_flush_head)
-		log_flush_commit(sdp);
-	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
+		log_write_header(sdp, 0, 0);
+	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		gfs2_log_lock(sdp);
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 		trace_gfs2_log_blocks(sdp, -1);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ab0621698b7..ff07454b582 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -53,10 +53,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 
 extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
 extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
-
-extern struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
-extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
-				      struct buffer_head *real);
+extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 0301be655b1..6b1efb594d9 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/mempool.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/bio.h>
 #include <linux/fs.h>
@@ -76,7 +77,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	if (bi->bi_clone == 0)
 		return;
 	if (sdp->sd_args.ar_discard)
-		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi);
+		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
 	memcpy(bi->bi_clone + bi->bi_offset,
 	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
 	clear_bit(GBF_FULL, &bi->bi_flags);
@@ -143,6 +144,98 @@ static inline __be64 *bh_ptr_end(struct buffer_head *bh)
 	return (__force __be64 *)(bh->b_data + bh->b_size);
 }
 
+/**
+ * gfs2_log_write_endio - End of I/O for a log buffer
+ * @bh: The buffer head
+ * @uptodate: I/O Status
+ *
+ */
+
+static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
+{
+	struct gfs2_sbd *sdp = bh->b_private;
+	bh->b_private = NULL;
+
+	end_buffer_write_sync(bh, uptodate);
+	if (atomic_dec_and_test(&sdp->sd_log_in_flight))
+		wake_up(&sdp->sd_log_flush_wait);
+}
+
+/**
+ * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
+ * @sdp: The GFS2 superblock
+ *
+ * tReturns: the buffer_head
+ */
+
+static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
+{
+	u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
+	struct buffer_head *bh;
+
+	bh = sb_getblk(sdp->sd_vfs, blkno);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, bh->b_size);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+	gfs2_log_incr_head(sdp);
+	atomic_inc(&sdp->sd_log_in_flight);
+	bh->b_private = sdp;
+	bh->b_end_io = gfs2_log_write_endio;
+
+	return bh;
+}
+
+/**
+ * gfs2_fake_write_endio - 
+ * @bh: The buffer head
+ * @uptodate: The I/O Status
+ *
+ */
+
+static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
+{
+	struct buffer_head *real_bh = bh->b_private;
+	struct gfs2_bufdata *bd = real_bh->b_private;
+	struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
+
+	end_buffer_write_sync(bh, uptodate);
+	mempool_free(bh, gfs2_bh_pool);
+	unlock_buffer(real_bh);
+	brelse(real_bh);
+	if (atomic_dec_and_test(&sdp->sd_log_in_flight))
+		wake_up(&sdp->sd_log_flush_wait);
+}
+
+/**
+ * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
+ * @sdp: the filesystem
+ * @data: the data the buffer_head should point to
+ *
+ * Returns: the log buffer descriptor
+ */
+
+static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
+				      struct buffer_head *real)
+{
+	u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
+	struct buffer_head *bh;
+
+	bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
+	atomic_set(&bh->b_count, 1);
+	bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
+	set_bh_page(bh, real->b_page, bh_offset(real));
+	bh->b_blocknr = blkno;
+	bh->b_size = sdp->sd_sb.sb_bsize;
+	bh->b_bdev = sdp->sd_vfs->s_bdev;
+	bh->b_private = real;
+	bh->b_end_io = gfs2_fake_write_endio;
+
+	gfs2_log_incr_head(sdp);
+	atomic_inc(&sdp->sd_log_in_flight);
+
+	return bh;
+}
 
 static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
 {
@@ -553,11 +646,11 @@ static void gfs2_check_magic(struct buffer_head *bh)
 	__be32 *ptr;
 
 	clear_buffer_escaped(bh);
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	ptr = kaddr + bh_offset(bh);
 	if (*ptr == cpu_to_be32(GFS2_MAGIC))
 		set_buffer_escaped(bh);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 }
 
 static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -594,10 +687,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		if (buffer_escaped(bd->bd_bh)) {
 			void *kaddr;
 			bh1 = gfs2_log_get_buf(sdp);
-			kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
+			kaddr = kmap_atomic(bd->bd_bh->b_page);
 			memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
 			       bh1->b_size);
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			*(__be32 *)bh1->b_data = 0;
 			clear_buffer_escaped(bd->bd_bh);
 			unlock_buffer(bd->bd_bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a8d9bcd0e19..754426b1e52 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -17,6 +17,7 @@
 #include <linux/rcupdate.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
+#include <linux/mempool.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -69,6 +70,16 @@ static void gfs2_init_gl_aspace_once(void *foo)
 	address_space_init_once(mapping);
 }
 
+static void *gfs2_bh_alloc(gfp_t mask, void *data)
+{
+	return alloc_buffer_head(mask);
+}
+
+static void gfs2_bh_free(void *ptr, void *data)
+{
+	return free_buffer_head(ptr);
+}
+
 /**
  * init_gfs2_fs - Register GFS2 as a filesystem
  *
@@ -151,6 +162,10 @@ static int __init init_gfs2_fs(void)
 	gfs2_control_wq = alloc_workqueue("gfs2_control",
 			       WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
 	if (!gfs2_control_wq)
+		goto fail_recovery;
+
+	gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL);
+	if (!gfs2_bh_pool)
 		goto fail_control;
 
 	gfs2_register_debugfs();
@@ -160,6 +175,8 @@ static int __init init_gfs2_fs(void)
 	return 0;
 
 fail_control:
+	destroy_workqueue(gfs2_control_wq);
+fail_recovery:
 	destroy_workqueue(gfs_recovery_wq);
 fail_wq:
 	unregister_filesystem(&gfs2meta_fs_type);
@@ -208,6 +225,7 @@ static void __exit exit_gfs2_fs(void)
 
 	rcu_barrier();
 
+	mempool_destroy(gfs2_bh_pool);
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
 	kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6aacf3f230a..6f3a18f9e17 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	sb->s_fs_info = sdp;
 	sdp->sd_vfs = sb;
+	sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
+	if (!sdp->sd_lkstats) {
+		kfree(sdp);
+		return NULL;
+	}
+
 	set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
 	gfs2_tune_init(&sdp->sd_tune);
 
@@ -77,7 +83,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	spin_lock_init(&sdp->sd_statfs_spin);
 
 	spin_lock_init(&sdp->sd_rindex_spin);
-	mutex_init(&sdp->sd_rindex_mutex);
 	sdp->sd_rindex_tree.rb_node = NULL;
 
 	INIT_LIST_HEAD(&sdp->sd_jindex_list);
@@ -431,10 +436,9 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 		fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
 		return PTR_ERR(inode);
 	}
-	dentry = d_alloc_root(inode);
+	dentry = d_make_root(inode);
 	if (!dentry) {
 		fs_err(sdp, "can't alloc %s dentry\n", name);
-		iput(inode);
 		return -ENOMEM;
 	}
 	*dptr = dentry;
@@ -800,6 +804,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		fs_err(sdp, "can't get quota file inode: %d\n", error);
 		goto fail_rindex;
 	}
+
+	error = gfs2_rindex_update(sdp);
+	if (error)
+		goto fail_qinode;
+
 	return 0;
 
 fail_qinode:
@@ -1216,6 +1225,7 @@ fail_sys:
 	gfs2_sys_fs_del(sdp);
 fail:
 	gfs2_delete_debugfs_file(sdp);
+	free_percpu(sdp->sd_lkstats);
 	kfree(sdp);
 	sb->s_fs_info = NULL;
 	return error;
@@ -1388,6 +1398,7 @@ static void gfs2_kill_sb(struct super_block *sb)
 	shrink_dcache_sb(sb);
 	kill_block_super(sb);
 	gfs2_delete_debugfs_file(sdp);
+	free_percpu(sdp->sd_lkstats);
 	kfree(sdp);
 }
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a45b21b0391..6019da3dcae 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -681,7 +681,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	ptr = qp;
 	nbytes = sizeof(struct gfs2_quota);
 get_a_page:
-	page = grab_cache_page(mapping, index);
+	page = find_or_create_page(mapping, index, GFP_NOFS);
 	if (!page)
 		return -ENOMEM;
 
@@ -720,12 +720,12 @@ get_a_page:
 
 	gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
 		nbytes = PAGE_CACHE_SIZE - offset;
 	memcpy(kaddr + offset, ptr, nbytes);
 	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	unlock_page(page);
 	page_cache_release(page);
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 981bfa32121..19bde40b486 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,23 +327,34 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
  * Returns: The resource group, or NULL if not found
  */
 
-struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
+struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 {
-	struct rb_node **newn;
+	struct rb_node *n, *next;
 	struct gfs2_rgrpd *cur;
 
+	if (gfs2_rindex_update(sdp))
+		return NULL;
+
 	spin_lock(&sdp->sd_rindex_spin);
-	newn = &sdp->sd_rindex_tree.rb_node;
-	while (*newn) {
-		cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node);
+	n = sdp->sd_rindex_tree.rb_node;
+	while (n) {
+		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
+		next = NULL;
 		if (blk < cur->rd_addr)
-			newn = &((*newn)->rb_left);
+			next = n->rb_left;
 		else if (blk >= cur->rd_data0 + cur->rd_data)
-			newn = &((*newn)->rb_right);
-		else {
+			next = n->rb_right;
+		if (next == NULL) {
 			spin_unlock(&sdp->sd_rindex_spin);
+			if (exact) {
+				if (blk < cur->rd_addr)
+					return NULL;
+				if (blk >= cur->rd_data0 + cur->rd_data)
+					return NULL;
+			}
 			return cur;
 		}
+		n = next;
 	}
 	spin_unlock(&sdp->sd_rindex_spin);
 
@@ -532,7 +543,6 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 	struct file_ra_state ra_state;
 	int error, rgrps;
 
-	mutex_lock(&sdp->sd_rindex_mutex);
 	file_ra_state_init(&ra_state, inode->i_mapping);
 	for (rgrps = 0;; rgrps++) {
 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
@@ -545,11 +555,10 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 			break;
 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 	}
-	mutex_unlock(&sdp->sd_rindex_mutex);
 	return total_data;
 }
 
-static void rgd_insert(struct gfs2_rgrpd *rgd)
+static int rgd_insert(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
@@ -565,11 +574,13 @@ static void rgd_insert(struct gfs2_rgrpd *rgd)
 		else if (rgd->rd_addr > cur->rd_addr)
 			newn = &((*newn)->rb_right);
 		else
-			return;
+			return -EEXIST;
 	}
 
 	rb_link_node(&rgd->rd_node, parent, newn);
 	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
+	sdp->sd_rgrps++;
+	return 0;
 }
 
 /**
@@ -623,10 +634,12 @@ static int read_rindex_entry(struct gfs2_inode *ip,
 	if (rgd->rd_data > sdp->sd_max_rg_data)
 		sdp->sd_max_rg_data = rgd->rd_data;
 	spin_lock(&sdp->sd_rindex_spin);
-	rgd_insert(rgd);
-	sdp->sd_rgrps++;
+	error = rgd_insert(rgd);
 	spin_unlock(&sdp->sd_rindex_spin);
-	return error;
+	if (!error)
+		return 0;
+
+	error = 0; /* someone else read in the rgrp; free it and ignore it */
 
 fail:
 	kfree(rgd->rd_bits);
@@ -683,20 +696,22 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
 	struct gfs2_glock *gl = ip->i_gl;
 	struct gfs2_holder ri_gh;
 	int error = 0;
+	int unlock_required = 0;
 
 	/* Read new copy from disk if we don't have the latest */
 	if (!sdp->sd_rindex_uptodate) {
-		mutex_lock(&sdp->sd_rindex_mutex);
-		error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
-		if (error)
-			return error;
+		if (!gfs2_glock_is_locked_by_me(gl)) {
+			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
+			if (error)
+				return error;
+			unlock_required = 1;
+		}
 		if (!sdp->sd_rindex_uptodate)
 			error = gfs2_ri_update(ip);
-		gfs2_glock_dq_uninit(&ri_gh);
-		mutex_unlock(&sdp->sd_rindex_mutex);
+		if (unlock_required)
+			gfs2_glock_dq_uninit(&ri_gh);
 	}
 
-
 	return error;
 }
 
@@ -805,9 +820,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
 
 }
 
-void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 			     struct buffer_head *bh,
-			     const struct gfs2_bitmap *bi)
+			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct block_device *bdev = sb->s_bdev;
@@ -818,11 +833,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 	sector_t nr_sects = 0;
 	int rv;
 	unsigned int x;
+	u32 trimmed = 0;
+	u8 diff;
 
 	for (x = 0; x < bi->bi_len; x++) {
-		const u8 *orig = bh->b_data + bi->bi_offset + x;
-		const u8 *clone = bi->bi_clone + bi->bi_offset + x;
-		u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
+		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
+		clone += bi->bi_offset;
+		clone += x;
+		if (bh) {
+			const u8 *orig = bh->b_data + bi->bi_offset + x;
+			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
+		} else {
+			diff = ~(*clone | (*clone >> 1));
+		}
 		diff &= 0x55;
 		if (diff == 0)
 			continue;
@@ -833,11 +856,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				if (nr_sects == 0)
 					goto start_new_extent;
 				if ((start + nr_sects) != blk) {
-					rv = blkdev_issue_discard(bdev, start,
-							    nr_sects, GFP_NOFS,
-							    0);
-					if (rv)
-						goto fail;
+					if (nr_sects >= minlen) {
+						rv = blkdev_issue_discard(bdev,
+							start, nr_sects,
+							GFP_NOFS, 0);
+						if (rv)
+							goto fail;
+						trimmed += nr_sects;
+					}
 					nr_sects = 0;
 start_new_extent:
 					start = blk;
@@ -848,15 +874,104 @@ start_new_extent:
 			blk += sects_per_blk;
 		}
 	}
-	if (nr_sects) {
+	if (nr_sects >= minlen) {
 		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
 		if (rv)
 			goto fail;
+		trimmed += nr_sects;
 	}
-	return;
+	if (ptrimmed)
+		*ptrimmed = trimmed;
+	return 0;
+
 fail:
-	fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
+	if (sdp->sd_args.ar_discard)
+		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
 	sdp->sd_args.ar_discard = 0;
+	return -EIO;
+}
+
+/**
+ * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
+ * @filp: Any file on the filesystem
+ * @argp: Pointer to the arguments (also used to pass result)
+ *
+ * Returns: 0 on success, otherwise error code
+ */
+
+int gfs2_fitrim(struct file *filp, void __user *argp)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
+	struct buffer_head *bh;
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_rgrpd *rgd_end;
+	struct gfs2_holder gh;
+	struct fstrim_range r;
+	int ret = 0;
+	u64 amt;
+	u64 trimmed = 0;
+	unsigned int x;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	if (argp == NULL) {
+		r.start = 0;
+		r.len = ULLONG_MAX;
+		r.minlen = 0;
+	} else if (copy_from_user(&r, argp, sizeof(r)))
+		return -EFAULT;
+
+	rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
+	rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
+
+	while (1) {
+
+		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+		if (ret)
+			goto out;
+
+		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
+			/* Trim each bitmap in the rgrp */
+			for (x = 0; x < rgd->rd_length; x++) {
+				struct gfs2_bitmap *bi = rgd->rd_bits + x;
+				ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
+				if (ret) {
+					gfs2_glock_dq_uninit(&gh);
+					goto out;
+				}
+				trimmed += amt;
+			}
+
+			/* Mark rgrp as having been trimmed */
+			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
+			if (ret == 0) {
+				bh = rgd->rd_bits[0].bi_bh;
+				rgd->rd_flags |= GFS2_RGF_TRIMMED;
+				gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
+				gfs2_rgrp_out(rgd, bh->b_data);
+				gfs2_trans_end(sdp);
+			}
+		}
+		gfs2_glock_dq_uninit(&gh);
+
+		if (rgd == rgd_end)
+			break;
+
+		rgd = gfs2_rgrpd_get_next(rgd);
+	}
+
+out:
+	r.len = trimmed << 9;
+	if (argp && copy_to_user(argp, &r, sizeof(r)))
+		return -EFAULT;
+
+	return ret;
 }
 
 /**
@@ -1003,7 +1118,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
 		rgd = begin = ip->i_rgd;
 	else
-		rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal);
+		rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
 
 	if (rgd == NULL)
 		return -EBADSLT;
@@ -1288,7 +1403,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 	u32 length, rgrp_blk, buf_blk;
 	unsigned int buf;
 
-	rgd = gfs2_blk2rgrpd(sdp, bstart);
+	rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
 	if (!rgd) {
 		if (gfs2_consist(sdp))
 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
@@ -1469,7 +1584,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 		return;
 	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
-
+	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 
@@ -1555,14 +1670,9 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
 {
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder rgd_gh;
-	int error;
-
-	error = gfs2_rindex_update(sdp);
-	if (error)
-		return error;
+	int error = -EINVAL;
 
-	error = -EINVAL;
-	rgd = gfs2_blk2rgrpd(sdp, no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
 	if (!rgd)
 		goto fail;
 
@@ -1605,7 +1715,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
 		rgd = ip->i_rgd;
 	else
-		rgd = gfs2_blk2rgrpd(sdp, block);
+		rgd = gfs2_blk2rgrpd(sdp, block, 1);
 	if (!rgd) {
 		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
 		return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ceec9106cdf..b4b10f4de25 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -11,6 +11,7 @@
 #define __RGRP_DOT_H__
 
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 struct gfs2_rgrpd;
 struct gfs2_sbd;
@@ -18,7 +19,7 @@ struct gfs2_holder;
 
 extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
 
-extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
+extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
 extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
 extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
 
@@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
 extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
 extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
-extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
-				    struct buffer_head *bh,
-				    const struct gfs2_bitmap *bi);
+extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+				   struct buffer_head *bh,
+				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
+extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4553ce515f6..6172fa77ad5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 	if (error)
 		goto out;
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		error = -EIO;
@@ -1557,6 +1557,7 @@ out:
 	end_writeback(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
+	flush_delayed_work_sync(&ip->i_gl->gl_work);
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 5d07609ec57..dfa89cd7553 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -11,6 +11,7 @@
 #include <linux/dlmconstants.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/writeback.h>
+#include <linux/ktime.h>
 #include "incore.h"
 #include "glock.h"
 
@@ -43,7 +44,8 @@
 	{(1UL << GLF_FROZEN),			"F" },		\
 	{(1UL << GLF_QUEUED),			"q" },		\
 	{(1UL << GLF_LRU),			"L" },		\
-	{(1UL << GLF_OBJECT),			"o" })
+	{(1UL << GLF_OBJECT),			"o" },		\
+	{(1UL << GLF_BLOCKING),			"b" })
 
 #ifndef NUMPTY
 #define NUMPTY
@@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue,
 		  glock_trace_name(__entry->state))
 );
 
+/* DLM sends a reply to GFS2 */
+TRACE_EVENT(gfs2_glock_lock_time,
+
+	TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
+
+	TP_ARGS(gl, tdiff),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev		)
+		__field(	u64,	glnum		)
+		__field(	u32,	gltype		)
+		__field(	int,	status		)
+		__field(	char,	flags		)
+		__field(	s64,	tdiff		)
+		__field(	s64,	srtt		)
+		__field(	s64,	srttvar		)
+		__field(	s64,	srttb		)
+		__field(	s64,	srttvarb	)
+		__field(	s64,	sirt		)
+		__field(	s64,	sirtvar		)
+		__field(	s64,	dcount		)
+		__field(	s64,	qcount		)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = gl->gl_sbd->sd_vfs->s_dev;
+		__entry->glnum          = gl->gl_name.ln_number;
+		__entry->gltype         = gl->gl_name.ln_type;
+		__entry->status		= gl->gl_lksb.sb_status;
+		__entry->flags		= gl->gl_lksb.sb_flags;
+		__entry->tdiff		= tdiff;
+		__entry->srtt		= gl->gl_stats.stats[GFS2_LKS_SRTT];
+		__entry->srttvar	= gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
+		__entry->srttb		= gl->gl_stats.stats[GFS2_LKS_SRTTB];
+		__entry->srttvarb	= gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
+		__entry->sirt		= gl->gl_stats.stats[GFS2_LKS_SIRT];
+		__entry->sirtvar	= gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
+		__entry->dcount		= gl->gl_stats.stats[GFS2_LKS_DCOUNT];
+		__entry->qcount		= gl->gl_stats.stats[GFS2_LKS_QCOUNT];
+	),
+
+	TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+		  (unsigned long long)__entry->glnum,
+		  __entry->status, __entry->flags,
+		  (long long)__entry->tdiff,
+		  (long long)__entry->srtt,
+		  (long long)__entry->srttvar,
+		  (long long)__entry->srttb,
+		  (long long)__entry->srttvarb,
+		  (long long)__entry->sirt,
+		  (long long)__entry->sirtvar,
+		  (long long)__entry->dcount,
+		  (long long)__entry->qcount)
+);
+
 /* Section 2 - Log/journal
  *
  * Objectives:
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 53511291fe3..9e7765e8e7b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,6 +25,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
+mempool_t *gfs2_bh_pool __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index b432e04600d..a4ce76c67db 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -10,6 +10,8 @@
 #ifndef __UTIL_DOT_H__
 #define __UTIL_DOT_H__
 
+#include <linux/mempool.h>
+
 #include "incore.h"
 
 #define fs_printk(level, fs, fmt, arg...) \
@@ -150,6 +152,7 @@ extern struct kmem_cache *gfs2_inode_cachep;
 extern struct kmem_cache *gfs2_bufdata_cachep;
 extern struct kmem_cache *gfs2_rgrpd_cachep;
 extern struct kmem_cache *gfs2_quotad_cachep;
+extern mempool_t *gfs2_bh_pool;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 					   unsigned int *p)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index e9636591b5d..2e5ba425cae 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -251,7 +251,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (!blks)
 		return 0;
 
-	rgd = gfs2_blk2rgrpd(sdp, bn);
+	rgd = gfs2_blk2rgrpd(sdp, bn, 1);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		return -EIO;
@@ -1439,7 +1439,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	struct gfs2_holder gh;
 	int error;
 
-	rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
+	rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
 	if (!rgd) {
 		gfs2_consist_inode(ip);
 		return -EIO;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8137fb3e678..7b4c537d6e1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -430,15 +430,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_d_op = &hfs_dentry_operations;
 	res = -ENOMEM;
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (!sb->s_root)
-		goto bail_iput;
+		goto bail_no_root;
 
 	/* everything's okay */
 	return 0;
 
-bail_iput:
-	iput(root_inode);
 bail_no_root:
 	printk(KERN_ERR "hfs: get root inode failed.\n");
 bail:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 21a5b7fc6db..4e75ac646fe 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -317,6 +317,11 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
 
 
 /*
+ * hfs+-specific ioctl for making the filesystem bootable
+ */
+#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
+
+/*
  * Functions in any *.c used in other files
  */
 
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 927cdd6d5bf..921967e5abb 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -117,7 +117,7 @@ struct hfsplus_vh {
 	__be32 write_count;
 	__be64 encodings_bmp;
 
-	u8 finder_info[32];
+	u32 finder_info[8];
 
 	struct hfsplus_fork_raw alloc_file;
 	struct hfsplus_fork_raw ext_file;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6643b242bdd..82b69ee4dac 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -193,6 +193,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
 	mutex_init(&hip->extents_lock);
 	hip->extent_state = 0;
 	hip->flags = 0;
+	hip->userflags = 0;
 	set_bit(HFSPLUS_I_RSRC, &hip->flags);
 
 	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
@@ -400,6 +401,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
 	atomic_set(&hip->opencnt, 0);
 	hip->extent_state = 0;
 	hip->flags = 0;
+	hip->userflags = 0;
 	memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
 	memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
 	hip->alloc_blocks = 0;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index f66c7655b3f..c640ba57074 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -20,6 +20,38 @@
 #include <asm/uaccess.h>
 #include "hfsplus_fs.h"
 
+/*
+ * "Blessing" an HFS+ filesystem writes metadata to the superblock informing
+ * the platform firmware which file to boot from
+ */
+static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
+	struct hfsplus_vh *vh = sbi->s_vhdr;
+	struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&sbi->vh_mutex);
+
+	/* Directory containing the bootable system */
+	vh->finder_info[0] = bvh->finder_info[0] =
+		cpu_to_be32(parent_ino(dentry));
+
+	/* Bootloader */
+	vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino);
+
+	/* Per spec, the OS X system folder - same as finder_info[0] here */
+	vh->finder_info[5] = bvh->finder_info[5] =
+		cpu_to_be32(parent_ino(dentry));
+
+	mutex_unlock(&sbi->vh_mutex);
+	return 0;
+}
+
 static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
@@ -108,6 +140,8 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return hfsplus_ioctl_getflags(file, argp);
 	case HFSPLUS_IOC_EXT2_SETFLAGS:
 		return hfsplus_ioctl_setflags(file, argp);
+	case HFSPLUS_IOC_BLESS:
+		return hfsplus_ioctl_bless(file, argp);
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 427682ca9e4..ceb1c281eef 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -465,6 +465,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_put_alloc_file;
 	}
 
+	sb->s_d_op = &hfsplus_dentry_operations;
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto out_put_alloc_file;
+	}
+
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
 	err = hfs_find_init(sbi->cat_tree, &fd);
@@ -515,13 +522,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	}
 
-	sb->s_d_op = &hfsplus_dentry_operations;
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
-		err = -ENOMEM;
-		goto out_put_hidden_dir;
-	}
-
 	unload_nls(sbi->nls);
 	sbi->nls = nls;
 	return 0;
@@ -529,7 +529,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 out_put_hidden_dir:
 	iput(sbi->hidden_dir);
 out_put_root:
-	iput(root);
+	dput(sb->s_root);
+	sb->s_root = NULL;
 out_put_alloc_file:
 	iput(sbi->alloc_file);
 out_close_cat_tree:
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e130bd46d67..588d45885a6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -966,9 +966,9 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	}
 
 	err = -ENOMEM;
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (sb->s_root == NULL)
-		goto out_put;
+		goto out;
 
 	return 0;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 3690467c944..54f6eccb79d 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -625,11 +625,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	hpfs_init_inode(root);
 	hpfs_read_inode(root);
 	unlock_new_inode(root);
-	s->s_root = d_alloc_root(root);
-	if (!s->s_root) {
-		iput(root);
+	s->s_root = d_make_root(root);
+	if (!s->s_root)
 		goto bail0;
-	}
 
 	/*
 	 * find the root directory's . pointer & finish filling in the inode
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d92f4ce8092..a80e45a690a 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -726,17 +726,12 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
 
 	err = -ENOMEM;
 	root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
-	if (!root_inode)
-		goto out_mntput;
-
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (!sb->s_root)
-		goto out_iput;
+		goto out_mntput;
 
 	return 0;
 
- out_iput:
-	iput(root_inode);
  out_mntput:
 	mntput(proc_mnt);
  out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e85a7ac021..ea251749d9d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -41,6 +41,25 @@ const struct file_operations hugetlbfs_file_operations;
 static const struct inode_operations hugetlbfs_dir_inode_operations;
 static const struct inode_operations hugetlbfs_inode_operations;
 
+struct hugetlbfs_config {
+	uid_t   uid;
+	gid_t   gid;
+	umode_t mode;
+	long	nr_blocks;
+	long	nr_inodes;
+	struct hstate *hstate;
+};
+
+struct hugetlbfs_inode_info {
+	struct shared_policy policy;
+	struct inode vfs_inode;
+};
+
+static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
+{
+	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
+}
+
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
 	.name		= "hugetlbfs",
 	.ra_pages	= 0,	/* No readahead */
@@ -154,10 +173,12 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 			return addr;
 	}
 
-	start_addr = mm->free_area_cache;
-
-	if (len <= mm->cached_hole_size)
+	if (len > mm->cached_hole_size)
+		start_addr = mm->free_area_cache;
+	else {
 		start_addr = TASK_UNMAPPED_BASE;
+		mm->cached_hole_size = 0;
+	}
 
 full_search:
 	addr = ALIGN(start_addr, huge_page_size(h));
@@ -171,13 +192,18 @@ full_search:
 			 */
 			if (start_addr != TASK_UNMAPPED_BASE) {
 				start_addr = TASK_UNMAPPED_BASE;
+				mm->cached_hole_size = 0;
 				goto full_search;
 			}
 			return -ENOMEM;
 		}
 
-		if (!vma || addr + len <= vma->vm_start)
+		if (!vma || addr + len <= vma->vm_start) {
+			mm->free_area_cache = addr + len;
 			return addr;
+		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+			mm->cached_hole_size = vma->vm_start - addr;
 		addr = ALIGN(vma->vm_end, huge_page_size(h));
 	}
 }
@@ -238,17 +264,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 	loff_t isize;
 	ssize_t retval = 0;
 
-	mutex_lock(&inode->i_mutex);
-
 	/* validate length */
 	if (len == 0)
 		goto out;
 
-	isize = i_size_read(inode);
-	if (!isize)
-		goto out;
-
-	end_index = (isize - 1) >> huge_page_shift(h);
 	for (;;) {
 		struct page *page;
 		unsigned long nr, ret;
@@ -256,18 +275,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 
 		/* nr is the maximum number of bytes to copy from this page */
 		nr = huge_page_size(h);
+		isize = i_size_read(inode);
+		if (!isize)
+			goto out;
+		end_index = (isize - 1) >> huge_page_shift(h);
 		if (index >= end_index) {
 			if (index > end_index)
 				goto out;
 			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
-			if (nr <= offset) {
+			if (nr <= offset)
 				goto out;
-			}
 		}
 		nr = nr - offset;
 
 		/* Find the page */
-		page = find_get_page(mapping, index);
+		page = find_lock_page(mapping, index);
 		if (unlikely(page == NULL)) {
 			/*
 			 * We have a HOLE, zero out the user-buffer for the
@@ -279,17 +301,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 			else
 				ra = 0;
 		} else {
+			unlock_page(page);
+
 			/*
 			 * We have the page, copy it to user space buffer.
 			 */
 			ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
 			ret = ra;
+			page_cache_release(page);
 		}
 		if (ra < 0) {
 			if (retval == 0)
 				retval = ra;
-			if (page)
-				page_cache_release(page);
 			goto out;
 		}
 
@@ -299,16 +322,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
 		index += offset >> huge_page_shift(h);
 		offset &= ~huge_page_mask(h);
 
-		if (page)
-			page_cache_release(page);
-
 		/* short read or no more work */
 		if ((ret != nr) || (len == 0))
 			break;
 	}
 out:
 	*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
-	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
 
@@ -607,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		spin_lock(&sbinfo->stat_lock);
 		/* If no limits set, just report 0 for max/free/used
 		 * blocks, like simple_statfs() */
-		if (sbinfo->max_blocks >= 0) {
-			buf->f_blocks = sbinfo->max_blocks;
-			buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+		if (sbinfo->spool) {
+			long free_pages;
+
+			spin_lock(&sbinfo->spool->lock);
+			buf->f_blocks = sbinfo->spool->max_hpages;
+			free_pages = sbinfo->spool->max_hpages
+				- sbinfo->spool->used_hpages;
+			buf->f_bavail = buf->f_bfree = free_pages;
+			spin_unlock(&sbinfo->spool->lock);
 			buf->f_files = sbinfo->max_inodes;
 			buf->f_ffree = sbinfo->free_inodes;
 		}
@@ -625,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
 
 	if (sbi) {
 		sb->s_fs_info = NULL;
+
+		if (sbi->spool)
+			hugepage_put_subpool(sbi->spool);
+
 		kfree(sbi);
 	}
 }
@@ -831,8 +860,6 @@ bad_val:
 static int
 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 {
-	struct inode * inode;
-	struct dentry * root;
 	int ret;
 	struct hugetlbfs_config config;
 	struct hugetlbfs_sb_info *sbinfo;
@@ -855,60 +882,31 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = sbinfo;
 	sbinfo->hstate = config.hstate;
 	spin_lock_init(&sbinfo->stat_lock);
-	sbinfo->max_blocks = config.nr_blocks;
-	sbinfo->free_blocks = config.nr_blocks;
 	sbinfo->max_inodes = config.nr_inodes;
 	sbinfo->free_inodes = config.nr_inodes;
+	sbinfo->spool = NULL;
+	if (config.nr_blocks != -1) {
+		sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+		if (!sbinfo->spool)
+			goto out_free;
+	}
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = huge_page_size(config.hstate);
 	sb->s_blocksize_bits = huge_page_shift(config.hstate);
 	sb->s_magic = HUGETLBFS_MAGIC;
 	sb->s_op = &hugetlbfs_ops;
 	sb->s_time_gran = 1;
-	inode = hugetlbfs_get_root(sb, &config);
-	if (!inode)
-		goto out_free;
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
+	sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
+	if (!sb->s_root)
 		goto out_free;
-	}
-	sb->s_root = root;
 	return 0;
 out_free:
+	if (sbinfo->spool)
+		kfree(sbinfo->spool);
 	kfree(sbinfo);
 	return -ENOMEM;
 }
 
-int hugetlb_get_quota(struct address_space *mapping, long delta)
-{
-	int ret = 0;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
-	if (sbinfo->free_blocks > -1) {
-		spin_lock(&sbinfo->stat_lock);
-		if (sbinfo->free_blocks - delta >= 0)
-			sbinfo->free_blocks -= delta;
-		else
-			ret = -ENOMEM;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
-	return ret;
-}
-
-void hugetlb_put_quota(struct address_space *mapping, long delta)
-{
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
-	if (sbinfo->free_blocks > -1) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_blocks += delta;
-		spin_unlock(&sbinfo->stat_lock);
-	}
-}
-
 static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
@@ -928,8 +926,8 @@ static int can_do_hugetlb_shm(void)
 	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
 }
 
-struct file *hugetlb_file_setup(const char *name, size_t size,
-				vm_flags_t acctflag,
+struct file *hugetlb_file_setup(const char *name, unsigned long addr,
+				size_t size, vm_flags_t acctflag,
 				struct user_struct **user, int creat_flags)
 {
 	int error = -ENOMEM;
@@ -938,6 +936,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	struct path path;
 	struct dentry *root;
 	struct qstr quick_string;
+	struct hstate *hstate;
+	unsigned long num_pages;
 
 	*user = NULL;
 	if (!hugetlbfs_vfsmount)
@@ -946,7 +946,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
 		*user = current_user();
 		if (user_shm_lock(size, *user)) {
-			printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
+			task_lock(current);
+			printk_once(KERN_WARNING
+				"%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
+				current->comm, current->pid);
+			task_unlock(current);
 		} else {
 			*user = NULL;
 			return ERR_PTR(-EPERM);
@@ -967,10 +971,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	if (!inode)
 		goto out_dentry;
 
+	hstate = hstate_inode(inode);
+	size += addr & ~huge_page_mask(hstate);
+	num_pages = ALIGN(size, huge_page_size(hstate)) >>
+			huge_page_shift(hstate);
 	error = -ENOMEM;
-	if (hugetlb_reserve_pages(inode, 0,
-			size >> huge_page_shift(hstate_inode(inode)), NULL,
-			acctflag))
+	if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
 		goto out_inode;
 
 	d_instantiate(path.dentry, inode);
@@ -1006,6 +1012,7 @@ static int __init init_hugetlbfs_fs(void)
 	if (error)
 		return error;
 
+	error = -ENOMEM;
 	hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
 					sizeof(struct hugetlbfs_inode_info),
 					0, 0, init_once);
@@ -1024,10 +1031,10 @@ static int __init init_hugetlbfs_fs(void)
 	}
 
 	error = PTR_ERR(vfsmount);
+	unregister_filesystem(&hugetlbfs_fs_type);
 
  out:
-	if (error)
-		kmem_cache_destroy(hugetlbfs_inode_cachep);
+	kmem_cache_destroy(hugetlbfs_inode_cachep);
  out2:
 	bdi_destroy(&hugetlbfs_backing_dev_info);
 	return error;
diff --git a/fs/inode.c b/fs/inode.c
index fb10d86ffad..9f4f5fecc09 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2,29 +2,19 @@
  * (C) 1997 Linus Torvalds
  * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
  */
+#include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/dcache.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/writeback.h>
-#include <linux/module.h>
 #include <linux/backing-dev.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
 #include <linux/hash.h>
 #include <linux/swap.h>
 #include <linux/security.h>
-#include <linux/pagemap.h>
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
-#include <linux/async.h>
 #include <linux/posix_acl.h>
 #include <linux/prefetch.h>
-#include <linux/ima.h>
-#include <linux/cred.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include "internal.h"
@@ -938,8 +928,7 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode)
 		struct file_system_type *type = inode->i_sb->s_type;
 
 		/* Set new key only if filesystem hasn't already changed it */
-		if (!lockdep_match_class(&inode->i_mutex,
-		    &type->i_mutex_key)) {
+		if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) {
 			/*
 			 * ensure nobody is actually holding i_mutex
 			 */
@@ -966,6 +955,7 @@ void unlock_new_inode(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW;
+	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
@@ -1369,17 +1359,6 @@ int generic_delete_inode(struct inode *inode)
 EXPORT_SYMBOL(generic_delete_inode);
 
 /*
- * Normal UNIX filesystem behaviour: delete the
- * inode when the usage count drops to zero, and
- * i_nlink is zero.
- */
-int generic_drop_inode(struct inode *inode)
-{
-	return !inode->i_nlink || inode_unhashed(inode);
-}
-EXPORT_SYMBOL_GPL(generic_drop_inode);
-
-/*
  * Called when we're dropping the last reference
  * to an inode.
  *
@@ -1510,9 +1489,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
  *	This function automatically handles read only file systems and media,
  *	as well as the "noatime" flag and inode specific "noatime" markers.
  */
-void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
+void touch_atime(struct path *path)
 {
-	struct inode *inode = dentry->d_inode;
+	struct vfsmount *mnt = path->mnt;
+	struct inode *inode = path->dentry->d_inode;
 	struct timespec now;
 
 	if (inode->i_flags & S_NOATIME)
@@ -1651,7 +1631,7 @@ __setup("ihash_entries=", set_ihash_entries);
  */
 void __init inode_init_early(void)
 {
-	int loop;
+	unsigned int loop;
 
 	/* If hashes are distributed across NUMA nodes, defer
 	 * hash allocation until vmalloc space is available.
@@ -1669,13 +1649,13 @@ void __init inode_init_early(void)
 					&i_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << i_hash_shift); loop++)
+	for (loop = 0; loop < (1U << i_hash_shift); loop++)
 		INIT_HLIST_HEAD(&inode_hashtable[loop]);
 }
 
 void __init inode_init(void)
 {
-	int loop;
+	unsigned int loop;
 
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache",
@@ -1699,7 +1679,7 @@ void __init inode_init(void)
 					&i_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << i_hash_shift); loop++)
+	for (loop = 0; loop < (1U << i_hash_shift); loop++)
 		INIT_HLIST_HEAD(&inode_hashtable[loop]);
 }
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bd62c76fb5d..29037c365ba 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -947,9 +947,8 @@ root_found:
 	s->s_d_op = &isofs_dentry_ops[table];
 
 	/* get the root dentry */
-	s->s_root = d_alloc_root(inode);
+	s->s_root = d_make_root(inode);
 	if (!(s->s_root)) {
-		iput(inode);
 		error = -ENOMEM;
 		goto out_no_inode;
 	}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 59c09f9541b..0971e921780 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -129,6 +129,8 @@ static int kjournald(void *arg)
 	setup_timer(&journal->j_commit_timer, commit_timeout,
 			(unsigned long)current);
 
+	set_freezable();
+
 	/* Record that the journal thread is running */
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
@@ -328,7 +330,7 @@ repeat:
 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
 	}
 
-	mapped_data = kmap_atomic(new_page, KM_USER0);
+	mapped_data = kmap_atomic(new_page);
 	/*
 	 * Check for escaping
 	 */
@@ -337,7 +339,7 @@ repeat:
 		need_copy_out = 1;
 		do_escape = 1;
 	}
-	kunmap_atomic(mapped_data, KM_USER0);
+	kunmap_atomic(mapped_data);
 
 	/*
 	 * Do we need to do a data copy?
@@ -354,9 +356,9 @@ repeat:
 		}
 
 		jh_in->b_frozen_data = tmp;
-		mapped_data = kmap_atomic(new_page, KM_USER0);
+		mapped_data = kmap_atomic(new_page);
 		memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
-		kunmap_atomic(mapped_data, KM_USER0);
+		kunmap_atomic(mapped_data);
 
 		new_page = virt_to_page(tmp);
 		new_offset = offset_in_page(tmp);
@@ -368,9 +370,9 @@ repeat:
 	 * copying, we can finally do so.
 	 */
 	if (do_escape) {
-		mapped_data = kmap_atomic(new_page, KM_USER0);
+		mapped_data = kmap_atomic(new_page);
 		*((unsigned int *)(mapped_data + new_offset)) = 0;
-		kunmap_atomic(mapped_data, KM_USER0);
+		kunmap_atomic(mapped_data);
 	}
 
 	set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 7fce94b04bc..b2a7e5244e3 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -718,9 +718,9 @@ done:
 			    "Possible IO failure.\n");
 		page = jh2bh(jh)->b_page;
 		offset = offset_in_page(jh2bh(jh)->b_data);
-		source = kmap_atomic(page, KM_USER0);
+		source = kmap_atomic(page);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
-		kunmap_atomic(source, KM_USER0);
+		kunmap_atomic(source);
 	}
 	jbd_unlock_bh_state(bh);
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5069b847515..c067a8cae63 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,10 +286,10 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
 	char *addr;
 	__u32 checksum;
 
-	addr = kmap_atomic(page, KM_USER0);
+	addr = kmap_atomic(page);
 	checksum = crc32_be(crc32_sum,
 		(void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
-	kunmap_atomic(addr, KM_USER0);
+	kunmap_atomic(addr);
 
 	return checksum;
 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c0a5f9f1b12..839377e3d62 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -139,6 +139,8 @@ static int kjournald2(void *arg)
 	setup_timer(&journal->j_commit_timer, commit_timeout,
 			(unsigned long)current);
 
+	set_freezable();
+
 	/* Record that the journal thread is running */
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
@@ -345,7 +347,7 @@ repeat:
 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
 	}
 
-	mapped_data = kmap_atomic(new_page, KM_USER0);
+	mapped_data = kmap_atomic(new_page);
 	/*
 	 * Fire data frozen trigger if data already wasn't frozen.  Do this
 	 * before checking for escaping, as the trigger may modify the magic
@@ -364,7 +366,7 @@ repeat:
 		need_copy_out = 1;
 		do_escape = 1;
 	}
-	kunmap_atomic(mapped_data, KM_USER0);
+	kunmap_atomic(mapped_data);
 
 	/*
 	 * Do we need to do a data copy?
@@ -385,9 +387,9 @@ repeat:
 		}
 
 		jh_in->b_frozen_data = tmp;
-		mapped_data = kmap_atomic(new_page, KM_USER0);
+		mapped_data = kmap_atomic(new_page);
 		memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
-		kunmap_atomic(mapped_data, KM_USER0);
+		kunmap_atomic(mapped_data);
 
 		new_page = virt_to_page(tmp);
 		new_offset = offset_in_page(tmp);
@@ -406,9 +408,9 @@ repeat:
 	 * copying, we can finally do so.
 	 */
 	if (do_escape) {
-		mapped_data = kmap_atomic(new_page, KM_USER0);
+		mapped_data = kmap_atomic(new_page);
 		*((unsigned int *)(mapped_data + new_offset)) = 0;
-		kunmap_atomic(mapped_data, KM_USER0);
+		kunmap_atomic(mapped_data);
 	}
 
 	set_bh_page(new_bh, new_page, new_offset);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 35ae096bed5..e5aba56e1fd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -783,12 +783,12 @@ done:
 			    "Possible IO failure.\n");
 		page = jh2bh(jh)->b_page;
 		offset = offset_in_page(jh2bh(jh)->b_data);
-		source = kmap_atomic(page, KM_USER0);
+		source = kmap_atomic(page);
 		/* Fire data frozen trigger just before we copy the data */
 		jbd2_buffer_frozen_trigger(jh, source + offset,
 					   jh->b_triggers);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
-		kunmap_atomic(source, KM_USER0);
+		kunmap_atomic(source);
 
 		/*
 		 * Now that the frozen data is saved off, we need to store
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 5b6c9d1a2fb..96ed3c9ec3f 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -340,7 +340,7 @@ int jffs2_unregister_compressor(struct jffs2_compressor *comp)
 
 	if (comp->usecount) {
 		spin_unlock(&jffs2_compressor_list_lock);
-		printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n");
+		printk(KERN_WARNING "JFFS2: Compressor module is in use. Unregister failed.\n");
 		return -1;
 	}
 	list_del(&comp->list);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2e0123867cb..c0d5c9d770d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -561,9 +561,9 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 	ret = -ENOMEM;
 
 	D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n"));
-	sb->s_root = d_alloc_root(root_i);
+	sb->s_root = d_make_root(root_i);
 	if (!sb->s_root)
-		goto out_root_i;
+		goto out_root;
 
 	sb->s_maxbytes = 0xFFFFFFFF;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -573,8 +573,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 		jffs2_start_garbage_collect_thread(c);
 	return 0;
 
- out_root_i:
-	iput(root_i);
 out_root:
 	jffs2_free_ino_caches(c);
 	jffs2_free_raw_node_refs(c);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 5f7c160ea64..07c91ca6017 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -220,12 +220,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 
 	dquot_initialize(dip);
 
-	/* link count overflow on parent directory ? */
-	if (dip->i_nlink == JFS_LINK_MAX) {
-		rc = -EMLINK;
-		goto out1;
-	}
-
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -806,9 +800,6 @@ static int jfs_link(struct dentry *old_dentry,
 	jfs_info("jfs_link: %s %s", old_dentry->d_name.name,
 		 dentry->d_name.name);
 
-	if (ip->i_nlink == JFS_LINK_MAX)
-		return -EMLINK;
-
 	dquot_initialize(dir);
 
 	tid = txBegin(ip->i_sb, 0);
@@ -1138,10 +1129,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				rc = -ENOTEMPTY;
 				goto out3;
 			}
-		} else if ((new_dir != old_dir) &&
-			   (new_dir->i_nlink == JFS_LINK_MAX)) {
-			rc = -EMLINK;
-			goto out3;
 		}
 	} else if (new_ip) {
 		IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 682bca642f3..4a82950f412 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -441,6 +441,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
+	sb->s_max_links = JFS_LINK_MAX;
 	sbi->sb = sb;
 	sbi->uid = sbi->gid = sbi->umask = -1;
 
@@ -521,7 +522,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		ret = PTR_ERR(inode);
 		goto out_no_rw;
 	}
-	sb->s_root = d_alloc_root(inode);
+	sb->s_root = d_make_root(inode);
 	if (!sb->s_root)
 		goto out_no_root;
 
@@ -539,7 +540,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 
 out_no_root:
 	jfs_err("jfs_read_super: get root dentry failed");
-	iput(inode);
 
 out_no_rw:
 	rc = jfs_umount(sb);
@@ -860,8 +860,14 @@ static int __init init_jfs_fs(void)
 	jfs_proc_init();
 #endif
 
-	return register_filesystem(&jfs_fs_type);
+	rc = register_filesystem(&jfs_fs_type);
+	if (!rc)
+		return 0;
 
+#ifdef PROC_FS_JFS
+	jfs_proc_clean();
+#endif
+	kthread_stop(jfsSyncThread);
 kill_committask:
 	for (i = 0; i < commit_threads; i++)
 		kthread_stop(jfsCommitThread[i]);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b2dbb3ba4f..722e0d5ba18 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -491,11 +491,9 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	set_nlink(inode, 2);
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
+	root = d_make_root(inode);
+	if (!root)
 		return -ENOMEM;
-	}
 	for (i = 0; !files->name || files->name[0]; i++, files++) {
 		if (!files->name)
 			continue;
@@ -536,7 +534,7 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
 	spin_lock(&pin_fs_lock);
 	if (unlikely(!*mount)) {
 		spin_unlock(&pin_fs_lock);
-		mnt = vfs_kern_mount(type, 0, type->name, NULL);
+		mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL);
 		if (IS_ERR(mnt))
 			return PTR_ERR(mnt);
 		spin_lock(&pin_fs_lock);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 3de7a32cadb..bea5d1b9954 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -177,17 +177,17 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
 				(filler_t *)logfs_readpage, NULL);
 		if (IS_ERR(page))
 			return page;
-		dd = kmap_atomic(page, KM_USER0);
+		dd = kmap_atomic(page);
 		BUG_ON(dd->namelen == 0);
 
 		if (name->len != be16_to_cpu(dd->namelen) ||
 				memcmp(name->name, dd->name, name->len)) {
-			kunmap_atomic(dd, KM_USER0);
+			kunmap_atomic(dd);
 			page_cache_release(page);
 			continue;
 		}
 
-		kunmap_atomic(dd, KM_USER0);
+		kunmap_atomic(dd);
 		return page;
 	}
 	return NULL;
@@ -365,9 +365,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
 		return NULL;
 	}
 	index = page->index;
-	dd = kmap_atomic(page, KM_USER0);
+	dd = kmap_atomic(page);
 	ino = be64_to_cpu(dd->ino);
-	kunmap_atomic(dd, KM_USER0);
+	kunmap_atomic(dd);
 	page_cache_release(page);
 
 	inode = logfs_iget(dir->i_sb, ino);
@@ -402,12 +402,12 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
 		if (!page)
 			return -ENOMEM;
 
-		dd = kmap_atomic(page, KM_USER0);
+		dd = kmap_atomic(page);
 		memset(dd, 0, sizeof(*dd));
 		dd->ino = cpu_to_be64(inode->i_ino);
 		dd->type = logfs_type(inode);
 		logfs_set_name(dd, &dentry->d_name);
-		kunmap_atomic(dd, KM_USER0);
+		kunmap_atomic(dd);
 
 		err = logfs_write_buf(dir, page, WF_LOCK);
 		unlock_page(page);
@@ -558,9 +558,6 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
 {
 	struct inode *inode = old_dentry->d_inode;
 
-	if (inode->i_nlink >= LOGFS_LINK_MAX)
-		return -EMLINK;
-
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	ihold(inode);
 	inc_nlink(inode);
@@ -579,9 +576,9 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 	*pos = page->index;
-	map = kmap_atomic(page, KM_USER0);
+	map = kmap_atomic(page);
 	memcpy(dd, map, sizeof(*dd));
-	kunmap_atomic(map, KM_USER0);
+	kunmap_atomic(map);
 	page_cache_release(page);
 	return 0;
 }
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 4153e65b014..e3ab5e5a904 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -517,9 +517,9 @@ static int indirect_write_alias(struct super_block *sb,
 
 		ino = page->mapping->host->i_ino;
 		logfs_unpack_index(page->index, &bix, &level);
-		child = kmap_atomic(page, KM_USER0);
+		child = kmap_atomic(page);
 		val = child[pos];
-		kunmap_atomic(child, KM_USER0);
+		kunmap_atomic(child);
 		err = write_one_alias(sb, ino, bix, level, pos, val);
 		if (err)
 			return err;
@@ -673,9 +673,9 @@ static void alloc_indirect_block(struct inode *inode, struct page *page,
 	alloc_data_block(inode, page);
 
 	block = logfs_block(page);
-	array = kmap_atomic(page, KM_USER0);
+	array = kmap_atomic(page);
 	initialize_block_counters(page, block, array, page_is_empty);
-	kunmap_atomic(array, KM_USER0);
+	kunmap_atomic(array);
 }
 
 static void block_set_pointer(struct page *page, int index, u64 ptr)
@@ -685,10 +685,10 @@ static void block_set_pointer(struct page *page, int index, u64 ptr)
 	u64 oldptr;
 
 	BUG_ON(!block);
-	array = kmap_atomic(page, KM_USER0);
+	array = kmap_atomic(page);
 	oldptr = be64_to_cpu(array[index]);
 	array[index] = cpu_to_be64(ptr);
-	kunmap_atomic(array, KM_USER0);
+	kunmap_atomic(array);
 	SetPageUptodate(page);
 
 	block->full += !!(ptr & LOGFS_FULLY_POPULATED)
@@ -701,9 +701,9 @@ static u64 block_get_pointer(struct page *page, int index)
 	__be64 *block;
 	u64 ptr;
 
-	block = kmap_atomic(page, KM_USER0);
+	block = kmap_atomic(page);
 	ptr = be64_to_cpu(block[index]);
-	kunmap_atomic(block, KM_USER0);
+	kunmap_atomic(block);
 	return ptr;
 }
 
@@ -850,7 +850,7 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
 		}
 
 		slot = get_bits(bix, SUBLEVEL(level));
-		rblock = kmap_atomic(page, KM_USER0);
+		rblock = kmap_atomic(page);
 		while (slot < LOGFS_BLOCK_FACTOR) {
 			if (data && (rblock[slot] != 0))
 				break;
@@ -861,12 +861,12 @@ static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
 			bix &= ~(increment - 1);
 		}
 		if (slot >= LOGFS_BLOCK_FACTOR) {
-			kunmap_atomic(rblock, KM_USER0);
+			kunmap_atomic(rblock);
 			logfs_put_read_page(page);
 			return bix;
 		}
 		bofs = be64_to_cpu(rblock[slot]);
-		kunmap_atomic(rblock, KM_USER0);
+		kunmap_atomic(rblock);
 		logfs_put_read_page(page);
 		if (!bofs) {
 			BUG_ON(data);
@@ -1961,9 +1961,9 @@ int logfs_read_inode(struct inode *inode)
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
-	di = kmap_atomic(page, KM_USER0);
+	di = kmap_atomic(page);
 	logfs_disk_to_inode(di, inode);
-	kunmap_atomic(di, KM_USER0);
+	kunmap_atomic(di);
 	move_page_to_inode(inode, page);
 	page_cache_release(page);
 	return 0;
@@ -1982,9 +1982,9 @@ static struct page *inode_to_page(struct inode *inode)
 	if (!page)
 		return NULL;
 
-	di = kmap_atomic(page, KM_USER0);
+	di = kmap_atomic(page);
 	logfs_inode_to_disk(inode, di);
-	kunmap_atomic(di, KM_USER0);
+	kunmap_atomic(di);
 	move_inode_to_page(page, inode);
 	return page;
 }
@@ -2041,13 +2041,13 @@ static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
 
 	if (write)
 		alloc_indirect_block(inode, page, 0);
-	se = kmap_atomic(page, KM_USER0);
+	se = kmap_atomic(page);
 	change_se(se + child_no, arg);
 	if (write) {
 		logfs_set_alias(sb, logfs_block(page), child_no);
 		BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
 	}
-	kunmap_atomic(se, KM_USER0);
+	kunmap_atomic(se);
 
 	logfs_put_write_page(page);
 }
@@ -2245,10 +2245,10 @@ int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
 	if (!page)
 		return -ENOMEM;
 
-	pagebuf = kmap_atomic(page, KM_USER0);
+	pagebuf = kmap_atomic(page);
 	memcpy(pagebuf, buf, count);
 	flush_dcache_page(page);
-	kunmap_atomic(pagebuf, KM_USER0);
+	kunmap_atomic(pagebuf);
 
 	if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
 		i_size_write(inode, pos + LOGFS_BLOCKSIZE);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index ab798ed1cc8..e28d090c98d 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -543,9 +543,9 @@ void move_page_to_btree(struct page *page)
 		BUG_ON(!item); /* mempool empty */
 		memset(item, 0, sizeof(*item));
 
-		child = kmap_atomic(page, KM_USER0);
+		child = kmap_atomic(page);
 		item->val = child[pos];
-		kunmap_atomic(child, KM_USER0);
+		kunmap_atomic(child);
 		item->child_no = pos;
 		list_add(&item->list, &block->item_list);
 	}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c9ee7f5d1ca..97bca623d89 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -315,11 +315,9 @@ static int logfs_get_sb_final(struct super_block *sb)
 	if (IS_ERR(rootdir))
 		goto fail;
 
-	sb->s_root = d_alloc_root(rootdir);
-	if (!sb->s_root) {
-		iput(rootdir);
+	sb->s_root = d_make_root(rootdir);
+	if (!sb->s_root)
 		goto fail;
-	}
 
 	/* at that point we know that ->put_super() will be called */
 	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
@@ -542,6 +540,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
 	 * the filesystem incompatible with 32bit systems.
 	 */
 	sb->s_maxbytes	= (1ull << 43) - 1;
+	sb->s_max_links = LOGFS_LINK_MAX;
 	sb->s_op	= &logfs_super_operations;
 	sb->s_flags	= flags | MS_NOATIME;
 
@@ -627,7 +626,10 @@ static int __init logfs_init(void)
 	if (ret)
 		goto out2;
 
-	return register_filesystem(&logfs_fs_type);
+	ret = register_filesystem(&logfs_fs_type);
+	if (!ret)
+		return 0;
+	logfs_destroy_inode_cache();
 out2:
 	logfs_compr_exit();
 out1:
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 085a9262c69..685b2d981b8 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -335,7 +335,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
 		goto fail;
 	}
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memset(kaddr, 0, PAGE_CACHE_SIZE);
 
 	if (sbi->s_version == MINIX_V3) {
@@ -355,7 +355,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
 		de->inode = dir->i_ino;
 		strcpy(de->name, "..");
 	}
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
 fail:
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fa8b612b8ce..fcb05d2c6b5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -190,24 +190,24 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		sbi->s_version = MINIX_V1;
 		sbi->s_dirsize = 16;
 		sbi->s_namelen = 14;
-		sbi->s_link_max = MINIX_LINK_MAX;
+		s->s_max_links = MINIX_LINK_MAX;
 	} else if (s->s_magic == MINIX_SUPER_MAGIC2) {
 		sbi->s_version = MINIX_V1;
 		sbi->s_dirsize = 32;
 		sbi->s_namelen = 30;
-		sbi->s_link_max = MINIX_LINK_MAX;
+		s->s_max_links = MINIX_LINK_MAX;
 	} else if (s->s_magic == MINIX2_SUPER_MAGIC) {
 		sbi->s_version = MINIX_V2;
 		sbi->s_nzones = ms->s_zones;
 		sbi->s_dirsize = 16;
 		sbi->s_namelen = 14;
-		sbi->s_link_max = MINIX2_LINK_MAX;
+		s->s_max_links = MINIX2_LINK_MAX;
 	} else if (s->s_magic == MINIX2_SUPER_MAGIC2) {
 		sbi->s_version = MINIX_V2;
 		sbi->s_nzones = ms->s_zones;
 		sbi->s_dirsize = 32;
 		sbi->s_namelen = 30;
-		sbi->s_link_max = MINIX2_LINK_MAX;
+		s->s_max_links = MINIX2_LINK_MAX;
 	} else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
 		m3s = (struct minix3_super_block *) bh->b_data;
 		s->s_magic = m3s->s_magic;
@@ -221,9 +221,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 		sbi->s_dirsize = 64;
 		sbi->s_namelen = 60;
 		sbi->s_version = MINIX_V3;
-		sbi->s_link_max = MINIX2_LINK_MAX;
 		sbi->s_mount_state = MINIX_VALID_FS;
 		sb_set_blocksize(s, m3s->s_blocksize);
+		s->s_max_links = MINIX2_LINK_MAX;
 	} else
 		goto out_no_fs;
 
@@ -254,14 +254,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	minix_set_bit(0,sbi->s_imap[0]->b_data);
 	minix_set_bit(0,sbi->s_zmap[0]->b_data);
 
-	/* set up enough so that it can read an inode */
-	s->s_op = &minix_sops;
-	root_inode = minix_iget(s, MINIX_ROOT_INO);
-	if (IS_ERR(root_inode)) {
-		ret = PTR_ERR(root_inode);
-		goto out_no_root;
-	}
-
 	/* Apparently minix can create filesystems that allocate more blocks for
 	 * the bitmaps than needed.  We simply ignore that, but verify it didn't
 	 * create one with not enough blocks and bail out if so.
@@ -270,7 +262,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	if (sbi->s_imap_blocks < block) {
 		printk("MINIX-fs: file system does not have enough "
 				"imap blocks allocated.  Refusing to mount\n");
-		goto out_iput;
+		goto out_no_bitmap;
 	}
 
 	block = minix_blocks_needed(
@@ -279,13 +271,21 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	if (sbi->s_zmap_blocks < block) {
 		printk("MINIX-fs: file system does not have enough "
 				"zmap blocks allocated.  Refusing to mount.\n");
-		goto out_iput;
+		goto out_no_bitmap;
+	}
+
+	/* set up enough so that it can read an inode */
+	s->s_op = &minix_sops;
+	root_inode = minix_iget(s, MINIX_ROOT_INO);
+	if (IS_ERR(root_inode)) {
+		ret = PTR_ERR(root_inode);
+		goto out_no_root;
 	}
 
 	ret = -ENOMEM;
-	s->s_root = d_alloc_root(root_inode);
+	s->s_root = d_make_root(root_inode);
 	if (!s->s_root)
-		goto out_iput;
+		goto out_no_root;
 
 	if (!(s->s_flags & MS_RDONLY)) {
 		if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
@@ -301,10 +301,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 
 	return 0;
 
-out_iput:
-	iput(root_inode);
-	goto out_freemap;
-
 out_no_root:
 	if (!silent)
 		printk("MINIX-fs: get root inode failed\n");
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c889ef0aa57..1ebd1185462 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -34,7 +34,6 @@ struct minix_sb_info {
 	unsigned long s_max_size;
 	int s_dirsize;
 	int s_namelen;
-	int s_link_max;
 	struct buffer_head ** s_imap;
 	struct buffer_head ** s_zmap;
 	struct buffer_head * s_sbh;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2f76e38c206..2d0ee178630 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -94,9 +94,6 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 {
 	struct inode *inode = old_dentry->d_inode;
 
-	if (inode->i_nlink >= minix_sb(inode->i_sb)->s_link_max)
-		return -EMLINK;
-
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	ihold(inode);
@@ -106,10 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode * inode;
-	int err = -EMLINK;
-
-	if (dir->i_nlink >= minix_sb(dir->i_sb)->s_link_max)
-		goto out;
+	int err;
 
 	inode_inc_link_count(dir);
 
@@ -181,7 +175,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
 static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
 			   struct inode * new_dir, struct dentry *new_dentry)
 {
-	struct minix_sb_info * info = minix_sb(old_dir->i_sb);
 	struct inode * old_inode = old_dentry->d_inode;
 	struct inode * new_inode = new_dentry->d_inode;
 	struct page * dir_page = NULL;
@@ -219,11 +212,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= info->s_link_max)
-				goto out_dir;
-		}
 		err = minix_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/namei.c b/fs/namei.c
index 208c6aa4a98..a94a7f9a03e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -642,7 +642,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 	cond_resched();
 	current->total_link_count++;
 
-	touch_atime(link->mnt, dentry);
+	touch_atime(link);
 	nd_set_link(nd, NULL);
 
 	error = security_inode_follow_link(link->dentry, nd);
@@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
 	struct dentry *old;
 
 	/* Don't create child dentry for a dead directory. */
-	if (unlikely(IS_DEADDIR(inode)))
+	if (unlikely(IS_DEADDIR(inode))) {
+		dput(dentry);
 		return ERR_PTR(-ENOENT);
+	}
 
 	old = inode->i_op->lookup(inode, dentry, nd);
 	if (unlikely(old)) {
@@ -1373,6 +1375,162 @@ static inline int can_lookup(struct inode *inode)
 }
 
 /*
+ * We can do the critical dentry name comparison and hashing
+ * operations one word at a time, but we are limited to:
+ *
+ * - Architectures with fast unaligned word accesses. We could
+ *   do a "get_unaligned()" if this helps and is sufficiently
+ *   fast.
+ *
+ * - Little-endian machines (so that we can generate the mask
+ *   of low bytes efficiently). Again, we *could* do a byte
+ *   swapping load on big-endian architectures if that is not
+ *   expensive enough to make the optimization worthless.
+ *
+ * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
+ *   do not trap on the (extremely unlikely) case of a page
+ *   crossing operation.
+ *
+ * - Furthermore, we need an efficient 64-bit compile for the
+ *   64-bit case in order to generate the "number of bytes in
+ *   the final mask". Again, that could be replaced with a
+ *   efficient population count instruction or similar.
+ */
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+	return mask*0x0001020304050608 >> 56;
+}
+
+static inline unsigned int fold_hash(unsigned long hash)
+{
+	hash += hash >> (8*sizeof(int));
+	return hash;
+}
+
+#else	/* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+#define fold_hash(x) (x)
+
+#endif
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long a, mask;
+	unsigned long hash = 0;
+
+	for (;;) {
+		a = *(unsigned long *)name;
+		hash *= 9;
+		if (len < sizeof(unsigned long))
+			break;
+		hash += a;
+		name += sizeof(unsigned long);
+		len -= sizeof(unsigned long);
+		if (!len)
+			goto done;
+	}
+	mask = ~(~0ul << len*8);
+	hash += mask & a;
+done:
+	return fold_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+#ifdef CONFIG_64BIT
+#define ONEBYTES	0x0101010101010101ul
+#define SLASHBYTES	0x2f2f2f2f2f2f2f2ful
+#define HIGHBITS	0x8080808080808080ul
+#else
+#define ONEBYTES	0x01010101ul
+#define SLASHBYTES	0x2f2f2f2ful
+#define HIGHBITS	0x80808080ul
+#endif
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+	return ((a - ONEBYTES) & ~a) & HIGHBITS;
+}
+
+/*
+ * Calculate the length and hash of the path component, and
+ * return the length of the component;
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+	unsigned long a, mask, hash, len;
+
+	hash = a = 0;
+	len = -sizeof(unsigned long);
+	do {
+		hash = (hash + a) * 9;
+		len += sizeof(unsigned long);
+		a = *(unsigned long *)(name+len);
+		/* Do we have any NUL or '/' bytes in this word? */
+		mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
+	} while (!mask);
+
+	/* The mask *below* the first high bit set */
+	mask = (mask - 1) & ~mask;
+	mask >>= 7;
+	hash += a & mask;
+	*hashp = fold_hash(hash);
+
+	return len + count_masked_bytes(mask);
+}
+
+#else
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long hash = init_name_hash();
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	return end_name_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+/*
+ * We know there's a real path component here of at least
+ * one character.
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+	unsigned long hash = init_name_hash();
+	unsigned long len = 0, c;
+
+	c = (unsigned char)*name;
+	do {
+		len++;
+		hash = partial_name_hash(c, hash);
+		c = (unsigned char)name[len];
+	} while (c && c != '/');
+	*hashp = end_name_hash(hash);
+	return len;
+}
+
+#endif
+
+/*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
  * the final dentry. We expect 'base' to be positive and a directory.
@@ -1392,31 +1550,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
 	/* At this point we know we have a real path component. */
 	for(;;) {
-		unsigned long hash;
 		struct qstr this;
-		unsigned int c;
+		long len;
 		int type;
 
 		err = may_lookup(nd);
  		if (err)
 			break;
 
+		len = hash_name(name, &this.hash);
 		this.name = name;
-		c = *(const unsigned char *)name;
-
-		hash = init_name_hash();
-		do {
-			name++;
-			hash = partial_name_hash(c, hash);
-			c = *(const unsigned char *)name;
-		} while (c && (c != '/'));
-		this.len = name - (const char *) this.name;
-		this.hash = end_name_hash(hash);
+		this.len = len;
 
 		type = LAST_NORM;
-		if (this.name[0] == '.') switch (this.len) {
+		if (name[0] == '.') switch (len) {
 			case 2:
-				if (this.name[1] == '.') {
+				if (name[1] == '.') {
 					type = LAST_DOTDOT;
 					nd->flags |= LOOKUP_JUMPED;
 				}
@@ -1435,12 +1584,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 			}
 		}
 
-		/* remove trailing slashes? */
-		if (!c)
+		if (!name[len])
 			goto last_component;
-		while (*++name == '/');
-		if (!*name)
+		/*
+		 * If it wasn't NUL, we know it was '/'. Skip that
+		 * slash, and continue until no more slashes.
+		 */
+		do {
+			len++;
+		} while (unlikely(name[len] == '/'));
+		if (!name[len])
 			goto last_component;
+		name += len;
 
 		err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
 		if (err < 0)
@@ -1773,24 +1928,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 {
 	struct qstr this;
-	unsigned long hash;
 	unsigned int c;
 
 	WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
 
 	this.name = name;
 	this.len = len;
+	this.hash = full_name_hash(name, len);
 	if (!len)
 		return ERR_PTR(-EACCES);
 
-	hash = init_name_hash();
 	while (len--) {
 		c = *(const unsigned char *)name++;
 		if (c == '/' || c == '\0')
 			return ERR_PTR(-EACCES);
-		hash = partial_name_hash(c, hash);
 	}
-	this.hash = end_name_hash(hash);
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
@@ -2138,7 +2290,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		/* sayonara */
 		error = complete_walk(nd);
 		if (error)
-			return ERR_PTR(-ECHILD);
+			return ERR_PTR(error);
 
 		error = -ENOTDIR;
 		if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2237,7 +2389,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
 	error = complete_walk(nd);
 	if (error)
-		goto exit;
+		return ERR_PTR(error);
 	error = -EISDIR;
 	if (S_ISDIR(nd->inode->i_mode))
 		goto exit;
@@ -2545,6 +2697,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int error = may_create(dir, dentry);
+	unsigned max_links = dir->i_sb->s_max_links;
 
 	if (error)
 		return error;
@@ -2557,6 +2710,9 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (error)
 		return error;
 
+	if (max_links && dir->i_nlink >= max_links)
+		return -EMLINK;
+
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
 		fsnotify_mkdir(dir, dentry);
@@ -2887,6 +3043,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
+	unsigned max_links = dir->i_sb->s_max_links;
 	int error;
 
 	if (!inode)
@@ -2917,6 +3074,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 	/* Make sure we don't allow creating hardlink to an unlinked file */
 	if (inode->i_nlink == 0)
 		error =  -ENOENT;
+	else if (max_links && inode->i_nlink >= max_links)
+		error = -EMLINK;
 	else
 		error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&inode->i_mutex);
@@ -3026,6 +3185,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 {
 	int error = 0;
 	struct inode *target = new_dentry->d_inode;
+	unsigned max_links = new_dir->i_sb->s_max_links;
 
 	/*
 	 * If we are going to change the parent - check write permissions,
@@ -3049,6 +3209,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
 		goto out;
 
+	error = -EMLINK;
+	if (max_links && !target && new_dir != old_dir &&
+	    new_dir->i_nlink >= max_links)
+		goto out;
+
 	if (target)
 		shrink_dcache_parent(new_dentry);
 	error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3347,9 +3512,9 @@ retry:
 	if (err)
 		goto fail;
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memcpy(kaddr, symname, len-1);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
 							page, fsdata);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 3d1e34f8a68..49df0e7f837 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -716,13 +716,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
         if (!root_inode)
 		goto out_disconnect;
 	DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
         if (!sb->s_root)
-		goto out_no_root;
+		goto out_disconnect;
 	return 0;
 
-out_no_root:
-	iput(root_inode);
 out_disconnect:
 	ncp_lock_server(server);
 	ncp_disconnect(server);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 31778f74357..d4f772ebd1e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -36,6 +36,7 @@
 #include <linux/inet.h>
 #include <linux/in6.h>
 #include <linux/slab.h>
+#include <linux/idr.h>
 #include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/bc_xprt.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fd9a872fada..32aa6917265 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page)
 	struct nfs_cache_array *array;
 	int i;
 
-	array = kmap_atomic(page, KM_USER0);
+	array = kmap_atomic(page);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
-	kunmap_atomic(array, KM_USER0);
+	kunmap_atomic(array);
 }
 
 /*
@@ -1870,11 +1870,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	if (!page)
 		return -ENOMEM;
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memcpy(kaddr, symname, pathlen);
 	if (pathlen < PAGE_SIZE)
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
 	if (error != 0) {
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index dcb61548887..801d6d83078 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -49,11 +49,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 {
 	/* The mntroot acts as the dummy root dentry for this superblock */
 	if (sb->s_root == NULL) {
-		sb->s_root = d_alloc_root(inode);
-		if (sb->s_root == NULL) {
-			iput(inode);
+		sb->s_root = d_make_root(inode);
+		if (sb->s_root == NULL)
 			return -ENOMEM;
-		}
 		ihold(inode);
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 2c05f1991e1..a1bbf7780df 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -198,6 +198,7 @@ int nfs_idmap_init(void)
 	if (ret < 0)
 		goto failed_put_key;
 
+	set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
 	cred->thread_keyring = keyring;
 	cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 	id_resolver_cache = cred;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe..caf92d05c3a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -193,7 +193,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	 * when talking to the server, we always send cookie 0
 	 * instead of 1 or 2.
 	 */
-	start = p = kmap_atomic(*readdir->pages, KM_USER0);
+	start = p = kmap_atomic(*readdir->pages);
 	
 	if (cookie == 0) {
 		*p++ = xdr_one;                                  /* next */
@@ -221,7 +221,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 
 	readdir->pgbase = (char *)p - (char *)start;
 	readdir->count -= readdir->pgbase;
-	kunmap_atomic(start, KM_USER0);
+	kunmap_atomic(start);
 }
 
 static int nfs4_wait_clnt_recover(struct nfs_client *clp)
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	}
 	if (npages > 1) {
 		/* for decoding across pages */
-		args.acl_scratch = alloc_page(GFP_KERNEL);
-		if (!args.acl_scratch)
+		res.acl_scratch = alloc_page(GFP_KERNEL);
+		if (!res.acl_scratch)
 			goto out_free;
 	}
 	args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
 	for (i = 0; i < npages; i++)
 		if (pages[i])
 			__free_page(pages[i]);
-	if (args.acl_scratch)
-		__free_page(args.acl_scratch);
+	if (res.acl_scratch)
+		__free_page(res.acl_scratch);
 	return ret;
 }
 
@@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 				clp->cl_rpcclient->cl_auth->au_flavor);
 
 	res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
-	if (unlikely(!res.server_scope))
-		return -ENOMEM;
+	if (unlikely(!res.server_scope)) {
+		status = -ENOMEM;
+		goto out;
+	}
 
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	if (!status)
@@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 			clp->server_scope = NULL;
 		}
 
-		if (!clp->server_scope)
+		if (!clp->server_scope) {
 			clp->server_scope = res.server_scope;
-		else
-			kfree(res.server_scope);
+			goto out;
+		}
 	}
-
+	kfree(res.server_scope);
+out:
 	dprintk("<-- %s status= %d\n", __func__, status);
 	return status;
 }
@@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
+static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+{
+	return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+}
+
+static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *new,
+		u32 max_slots,
+		u32 ivalue)
+{
+	struct nfs4_slot *old = NULL;
+	u32 i;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (new) {
+		old = tbl->slots;
+		tbl->slots = new;
+		tbl->max_slots = max_slots;
+	}
+	tbl->highest_used_slotid = -1;	/* no slot is currently used */
+	for (i = 0; i < tbl->max_slots; i++)
+		tbl->slots[i].seq_nr = ivalue;
+	spin_unlock(&tbl->slot_tbl_lock);
+	kfree(old);
+}
+
 /*
- * Reset a slot table
+ * (re)Initialise a slot table
  */
-static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
-				 int ivalue)
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
+				 u32 ivalue)
 {
 	struct nfs4_slot *new = NULL;
-	int i;
-	int ret = 0;
+	int ret = -ENOMEM;
 
 	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
 		max_reqs, tbl->max_slots);
 
 	/* Does the newly negotiated max_reqs match the existing slot table? */
 	if (max_reqs != tbl->max_slots) {
-		ret = -ENOMEM;
-		new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
-			      GFP_NOFS);
+		new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
 		if (!new)
 			goto out;
-		ret = 0;
-		kfree(tbl->slots);
 	}
-	spin_lock(&tbl->slot_tbl_lock);
-	if (new) {
-		tbl->slots = new;
-		tbl->max_slots = max_reqs;
-	}
-	for (i = 0; i < tbl->max_slots; ++i)
-		tbl->slots[i].seq_nr = ivalue;
-	spin_unlock(&tbl->slot_tbl_lock);
+	ret = 0;
+
+	nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
 	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
 		tbl, tbl->slots, tbl->max_slots);
 out:
@@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
 }
 
 /*
- * Initialize slot table
- */
-static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
-		int max_slots, int ivalue)
-{
-	struct nfs4_slot *slot;
-	int ret = -ENOMEM;
-
-	BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
-
-	dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
-
-	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
-	if (!slot)
-		goto out;
-	ret = 0;
-
-	spin_lock(&tbl->slot_tbl_lock);
-	tbl->max_slots = max_slots;
-	tbl->slots = slot;
-	tbl->highest_used_slotid = -1;  /* no slot is currently used */
-	spin_unlock(&tbl->slot_tbl_lock);
-	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
-		tbl, tbl->slots, tbl->max_slots);
-out:
-	dprintk("<-- %s: return %d\n", __func__, ret);
-	return ret;
-}
-
-/*
  * Initialize or reset the forechannel and backchannel tables
  */
 static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
@@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
 	dprintk("--> %s\n", __func__);
 	/* Fore channel */
 	tbl = &ses->fc_slot_table;
-	if (tbl->slots == NULL) {
-		status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
-		if (status) /* -ENOMEM */
-			return status;
-	} else {
-		status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
-		if (status)
-			return status;
-	}
+	status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
+	if (status) /* -ENOMEM */
+		return status;
 	/* Back channel */
 	tbl = &ses->bc_slot_table;
-	if (tbl->slots == NULL) {
-		status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
-		if (status)
-			/* Fore and back channel share a connection so get
-			 * both slot tables or neither */
-			nfs4_destroy_slot_tables(ses);
-	} else
-		status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+	status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+	if (status && tbl->slots == NULL)
+		/* Fore and back channel share a connection so get
+		 * both slot tables or neither */
+		nfs4_destroy_slot_tables(ses);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a53f33b4ac3..45392032e7b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
 {
 	struct nfs_client *clp = server->nfs_client;
 
+	if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
+		nfs_async_inode_return_delegation(state->inode, &state->stateid);
 	nfs4_state_mark_reclaim_nograce(clp, state);
 	nfs4_schedule_state_manager(clp);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e43840..33bd8d0f745 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
-	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
 
 	encode_nops(&hdr);
 }
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	struct compound_hdr hdr;
 	int status;
 
+	if (res->acl_scratch != NULL) {
+		void *p = page_address(res->acl_scratch);
+		xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+	}
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index ce7f0758d84..9559ce46873 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -72,7 +72,7 @@ int nfsd_fault_inject_init(void)
 {
 	unsigned int i;
 	struct nfsd_fault_inject_op *op;
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 
 	debug_dir = debugfs_create_dir("nfsd", NULL);
 	if (!debug_dir)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index edf6d3ed877..e59f71d0cf7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1541,30 +1541,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32
 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 {
-	struct dentry	*dentry;
 	struct inode	*inode;
 	mm_segment_t	oldfs;
 	__be32		err;
 	int		host_err;
+	struct path path;
 
 	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
 	if (err)
 		goto out;
 
-	dentry = fhp->fh_dentry;
-	inode = dentry->d_inode;
+	path.mnt = fhp->fh_export->ex_path.mnt;
+	path.dentry = fhp->fh_dentry;
+	inode = path.dentry->d_inode;
 
 	err = nfserr_inval;
 	if (!inode->i_op->readlink)
 		goto out;
 
-	touch_atime(fhp->fh_export->ex_path.mnt, dentry);
+	touch_atime(&path);
 	/* N.B. Why does this call need a get_fs()??
 	 * Remove the set_fs and watch the fireworks:-) --okir
 	 */
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	host_err = inode->i_op->readlink(dentry, buf, *lenp);
+	host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
 	set_fs(oldfs);
 
 	if (host_err < 0)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index c9b342c8b50..dab5c4c6dfa 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -218,11 +218,11 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
 								 kaddr, 1);
 		mark_buffer_dirty(cp_bh);
 
-		kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(header_bh->b_page);
 		header = nilfs_cpfile_block_get_header(cpfile, header_bh,
 						       kaddr);
 		le64_add_cpu(&header->ch_ncheckpoints, 1);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		mark_buffer_dirty(header_bh);
 		nilfs_mdt_mark_dirty(cpfile);
 	}
@@ -313,7 +313,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 			continue;
 		}
 
-		kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(cp_bh->b_page);
 		cp = nilfs_cpfile_block_get_checkpoint(
 			cpfile, cno, cp_bh, kaddr);
 		nicps = 0;
@@ -334,7 +334,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 						cpfile, cp_bh, kaddr, nicps);
 				if (count == 0) {
 					/* make hole */
-					kunmap_atomic(kaddr, KM_USER0);
+					kunmap_atomic(kaddr);
 					brelse(cp_bh);
 					ret =
 					  nilfs_cpfile_delete_checkpoint_block(
@@ -349,18 +349,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
 			}
 		}
 
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(cp_bh);
 	}
 
 	if (tnicps > 0) {
-		kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(header_bh->b_page);
 		header = nilfs_cpfile_block_get_header(cpfile, header_bh,
 						       kaddr);
 		le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
 		mark_buffer_dirty(header_bh);
 		nilfs_mdt_mark_dirty(cpfile);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 
 	brelse(header_bh);
@@ -408,7 +408,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
 			continue; /* skip hole */
 		}
 
-		kaddr = kmap_atomic(bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(bh->b_page);
 		cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
 		for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
 			if (!nilfs_checkpoint_invalid(cp)) {
@@ -418,7 +418,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
 				n++;
 			}
 		}
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(bh);
 	}
 
@@ -451,10 +451,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
 		ret = nilfs_cpfile_get_header_block(cpfile, &bh);
 		if (ret < 0)
 			goto out;
-		kaddr = kmap_atomic(bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(bh->b_page);
 		header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
 		curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(bh);
 		if (curr == 0) {
 			ret = 0;
@@ -472,7 +472,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
 			ret = 0; /* No snapshots (started from a hole block) */
 		goto out;
 	}
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	while (n < nci) {
 		cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
 		curr = ~(__u64)0; /* Terminator */
@@ -488,7 +488,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
 
 		next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
 		if (curr_blkoff != next_blkoff) {
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			brelse(bh);
 			ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
 								0, &bh);
@@ -496,12 +496,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
 				WARN_ON(ret == -ENOENT);
 				goto out;
 			}
-			kaddr = kmap_atomic(bh->b_page, KM_USER0);
+			kaddr = kmap_atomic(bh->b_page);
 		}
 		curr = next;
 		curr_blkoff = next_blkoff;
 	}
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(bh);
 	*cnop = curr;
 	ret = n;
@@ -592,24 +592,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
 	ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
 	if (ret < 0)
 		goto out_sem;
-	kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(cp_bh->b_page);
 	cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
 	if (nilfs_checkpoint_invalid(cp)) {
 		ret = -ENOENT;
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		goto out_cp;
 	}
 	if (nilfs_checkpoint_snapshot(cp)) {
 		ret = 0;
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		goto out_cp;
 	}
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
 	if (ret < 0)
 		goto out_cp;
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
 	list = &header->ch_snapshot_list;
 	curr_bh = header_bh;
@@ -621,13 +621,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
 		prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
 		curr = prev;
 		if (curr_blkoff != prev_blkoff) {
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			brelse(curr_bh);
 			ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
 								0, &curr_bh);
 			if (ret < 0)
 				goto out_header;
-			kaddr = kmap_atomic(curr_bh->b_page, KM_USER0);
+			kaddr = kmap_atomic(curr_bh->b_page);
 		}
 		curr_blkoff = prev_blkoff;
 		cp = nilfs_cpfile_block_get_checkpoint(
@@ -635,7 +635,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
 		list = &cp->cp_snapshot_list;
 		prev = le64_to_cpu(list->ssl_prev);
 	}
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (prev != 0) {
 		ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
@@ -647,29 +647,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
 		get_bh(prev_bh);
 	}
 
-	kaddr = kmap_atomic(curr_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(curr_bh->b_page);
 	list = nilfs_cpfile_block_get_snapshot_list(
 		cpfile, curr, curr_bh, kaddr);
 	list->ssl_prev = cpu_to_le64(cno);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(cp_bh->b_page);
 	cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
 	cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
 	cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
 	nilfs_checkpoint_set_snapshot(cp);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(prev_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(prev_bh->b_page);
 	list = nilfs_cpfile_block_get_snapshot_list(
 		cpfile, prev, prev_bh, kaddr);
 	list->ssl_next = cpu_to_le64(cno);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
 	le64_add_cpu(&header->ch_nsnapshots, 1);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(prev_bh);
 	mark_buffer_dirty(curr_bh);
@@ -710,23 +710,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
 	ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
 	if (ret < 0)
 		goto out_sem;
-	kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(cp_bh->b_page);
 	cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
 	if (nilfs_checkpoint_invalid(cp)) {
 		ret = -ENOENT;
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		goto out_cp;
 	}
 	if (!nilfs_checkpoint_snapshot(cp)) {
 		ret = 0;
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		goto out_cp;
 	}
 
 	list = &cp->cp_snapshot_list;
 	next = le64_to_cpu(list->ssl_next);
 	prev = le64_to_cpu(list->ssl_prev);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
 	if (ret < 0)
@@ -750,29 +750,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
 		get_bh(prev_bh);
 	}
 
-	kaddr = kmap_atomic(next_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(next_bh->b_page);
 	list = nilfs_cpfile_block_get_snapshot_list(
 		cpfile, next, next_bh, kaddr);
 	list->ssl_prev = cpu_to_le64(prev);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(prev_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(prev_bh->b_page);
 	list = nilfs_cpfile_block_get_snapshot_list(
 		cpfile, prev, prev_bh, kaddr);
 	list->ssl_next = cpu_to_le64(next);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(cp_bh->b_page);
 	cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
 	cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
 	cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
 	nilfs_checkpoint_clear_snapshot(cp);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
 	le64_add_cpu(&header->ch_nsnapshots, -1);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(next_bh);
 	mark_buffer_dirty(prev_bh);
@@ -829,13 +829,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
 	ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
 	if (ret < 0)
 		goto out;
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
 	if (nilfs_checkpoint_invalid(cp))
 		ret = -ENOENT;
 	else
 		ret = nilfs_checkpoint_snapshot(cp);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(bh);
 
  out:
@@ -912,12 +912,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
 	ret = nilfs_cpfile_get_header_block(cpfile, &bh);
 	if (ret < 0)
 		goto out_sem;
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
 	cpstat->cs_cno = nilfs_mdt_cno(cpfile);
 	cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
 	cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(bh);
 
  out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index fcc2f869af1..b5c13f3576b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -85,13 +85,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
 	struct nilfs_dat_entry *entry;
 	void *kaddr;
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
 	entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
 	entry->de_blocknr = cpu_to_le64(0);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nilfs_palloc_commit_alloc_entry(dat, req);
 	nilfs_dat_commit_entry(dat, req);
@@ -109,13 +109,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
 	struct nilfs_dat_entry *entry;
 	void *kaddr;
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
 	entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
 	entry->de_blocknr = cpu_to_le64(0);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nilfs_dat_commit_entry(dat, req);
 	nilfs_palloc_commit_free_entry(dat, req);
@@ -136,12 +136,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
 	struct nilfs_dat_entry *entry;
 	void *kaddr;
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
 	entry->de_blocknr = cpu_to_le64(blocknr);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nilfs_dat_commit_entry(dat, req);
 }
@@ -160,12 +160,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
 		return ret;
 	}
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	start = le64_to_cpu(entry->de_start);
 	blocknr = le64_to_cpu(entry->de_blocknr);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (blocknr == 0) {
 		ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -186,7 +186,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
 	sector_t blocknr;
 	void *kaddr;
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	end = start = le64_to_cpu(entry->de_start);
@@ -196,7 +196,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
 	}
 	entry->de_end = cpu_to_le64(end);
 	blocknr = le64_to_cpu(entry->de_blocknr);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (blocknr == 0)
 		nilfs_dat_commit_free(dat, req);
@@ -211,12 +211,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
 	sector_t blocknr;
 	void *kaddr;
 
-	kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req->pr_entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
 					     req->pr_entry_bh, kaddr);
 	start = le64_to_cpu(entry->de_start);
 	blocknr = le64_to_cpu(entry->de_blocknr);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (start == nilfs_mdt_cno(dat) && blocknr == 0)
 		nilfs_palloc_abort_free_entry(dat, req);
@@ -346,20 +346,20 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
 		}
 	}
 
-	kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
 	if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
 		printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
 		       (unsigned long long)vblocknr,
 		       (unsigned long long)le64_to_cpu(entry->de_start),
 		       (unsigned long long)le64_to_cpu(entry->de_end));
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(entry_bh);
 		return -EINVAL;
 	}
 	WARN_ON(blocknr == 0);
 	entry->de_blocknr = cpu_to_le64(blocknr);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(entry_bh);
 	nilfs_mdt_mark_dirty(dat);
@@ -409,7 +409,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
 		}
 	}
 
-	kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(entry_bh->b_page);
 	entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
 	blocknr = le64_to_cpu(entry->de_blocknr);
 	if (blocknr == 0) {
@@ -419,7 +419,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
 	*blocknrp = blocknr;
 
  out:
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(entry_bh);
 	return ret;
 }
@@ -440,7 +440,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
 						   0, &entry_bh);
 		if (ret < 0)
 			return ret;
-		kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(entry_bh->b_page);
 		/* last virtual block number in this block */
 		first = vinfo->vi_vblocknr;
 		do_div(first, entries_per_block);
@@ -456,7 +456,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
 			vinfo->vi_end = le64_to_cpu(entry->de_end);
 			vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
 		}
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(entry_bh);
 	}
 
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index ca35b3a46d1..df1a7fb238d 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -602,7 +602,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 		unlock_page(page);
 		goto fail;
 	}
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memset(kaddr, 0, chunk_size);
 	de = (struct nilfs_dir_entry *)kaddr;
 	de->name_len = 1;
@@ -617,7 +617,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 	de->inode = cpu_to_le64(parent->i_ino);
 	memcpy(de->name, "..\0", 4);
 	nilfs_set_de_type(de, inode);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	nilfs_commit_chunk(page, mapping, 0, chunk_size);
 fail:
 	page_cache_release(page);
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 684d76300a8..5a48df79d67 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -122,11 +122,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
 		return ret;
 	}
 
-	kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(req.pr_entry_bh->b_page);
 	raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
 						 req.pr_entry_bh, kaddr);
 	raw_inode->i_flags = 0;
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(req.pr_entry_bh);
 	brelse(req.pr_entry_bh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 800e8d78a83..f9897d09c69 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -58,12 +58,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
 
 	set_buffer_mapped(bh);
 
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
 	if (init_block)
 		init_block(inode, bh, kaddr);
 	flush_dcache_page(bh->b_page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	set_buffer_uptodate(bh);
 	mark_buffer_dirty(bh);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1cd3f624dff..fce2bbee66d 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -193,9 +193,6 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	struct nilfs_transaction_info ti;
 	int err;
 
-	if (inode->i_nlink >= NILFS_LINK_MAX)
-		return -EMLINK;
-
 	err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
 	if (err)
 		return err;
@@ -219,9 +216,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct nilfs_transaction_info ti;
 	int err;
 
-	if (dir->i_nlink >= NILFS_LINK_MAX)
-		return -EMLINK;
-
 	err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
 	if (err)
 		return err;
@@ -400,11 +394,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		drop_nlink(new_inode);
 		nilfs_mark_inode_dirty(new_inode);
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= NILFS_LINK_MAX)
-				goto out_dir;
-		}
 		err = nilfs_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 65221a04c6f..3e7b2a0dc0c 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -119,11 +119,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
 	struct buffer_head *bh;
 
-	kaddr0 = kmap_atomic(spage, KM_USER0);
-	kaddr1 = kmap_atomic(dpage, KM_USER1);
+	kaddr0 = kmap_atomic(spage);
+	kaddr1 = kmap_atomic(dpage);
 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
-	kunmap_atomic(kaddr1, KM_USER1);
-	kunmap_atomic(kaddr0, KM_USER0);
+	kunmap_atomic(kaddr1);
+	kunmap_atomic(kaddr0);
 
 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
 	dbh->b_blocknr = sbh->b_blocknr;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index a604ac0331b..f1626f5011c 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -493,9 +493,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
 	if (unlikely(!bh_org))
 		return -EIO;
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(bh_org);
 	return 0;
 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 850a7c0228f..dc9a913784a 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -227,9 +227,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
 		crc = crc32_le(crc, bh->b_data, bh->b_size);
 	}
 	list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
-		kaddr = kmap_atomic(bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(bh->b_page);
 		crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 	raw_sum->ss_datasum = cpu_to_le32(crc);
 }
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 0a0aba617d8..c5b7653a439 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -111,11 +111,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
 	struct nilfs_sufile_header *header;
 	void *kaddr;
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = kaddr + bh_offset(header_bh);
 	le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
 	le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(header_bh);
 }
@@ -319,11 +319,11 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
 	if (ret < 0)
 		goto out_sem;
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = kaddr + bh_offset(header_bh);
 	ncleansegs = le64_to_cpu(header->sh_ncleansegs);
 	last_alloc = le64_to_cpu(header->sh_last_alloc);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nsegments = nilfs_sufile_get_nsegments(sufile);
 	maxsegnum = sui->allocmax;
@@ -356,7 +356,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 							   &su_bh);
 		if (ret < 0)
 			goto out_header;
-		kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(su_bh->b_page);
 		su = nilfs_sufile_block_get_segment_usage(
 			sufile, segnum, su_bh, kaddr);
 
@@ -367,14 +367,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 				continue;
 			/* found a clean segment */
 			nilfs_segment_usage_set_dirty(su);
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 
-			kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+			kaddr = kmap_atomic(header_bh->b_page);
 			header = kaddr + bh_offset(header_bh);
 			le64_add_cpu(&header->sh_ncleansegs, -1);
 			le64_add_cpu(&header->sh_ndirtysegs, 1);
 			header->sh_last_alloc = cpu_to_le64(segnum);
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 
 			sui->ncleansegs--;
 			mark_buffer_dirty(header_bh);
@@ -385,7 +385,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 			goto out_header;
 		}
 
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(su_bh);
 	}
 
@@ -407,16 +407,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
 	struct nilfs_segment_usage *su;
 	void *kaddr;
 
-	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(su_bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (unlikely(!nilfs_segment_usage_clean(su))) {
 		printk(KERN_WARNING "%s: segment %llu must be clean\n",
 		       __func__, (unsigned long long)segnum);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		return;
 	}
 	nilfs_segment_usage_set_dirty(su);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nilfs_sufile_mod_counter(header_bh, -1, 1);
 	NILFS_SUI(sufile)->ncleansegs--;
@@ -433,11 +433,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
 	void *kaddr;
 	int clean, dirty;
 
-	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(su_bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
 	    su->su_nblocks == cpu_to_le32(0)) {
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		return;
 	}
 	clean = nilfs_segment_usage_clean(su);
@@ -447,7 +447,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
 	su->su_lastmod = cpu_to_le64(0);
 	su->su_nblocks = cpu_to_le32(0);
 	su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
 	NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -464,12 +464,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
 	void *kaddr;
 	int sudirty;
 
-	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(su_bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (nilfs_segment_usage_clean(su)) {
 		printk(KERN_WARNING "%s: segment %llu is already clean\n",
 		       __func__, (unsigned long long)segnum);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		return;
 	}
 	WARN_ON(nilfs_segment_usage_error(su));
@@ -477,7 +477,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
 
 	sudirty = nilfs_segment_usage_dirty(su);
 	nilfs_segment_usage_set_clean(su);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	mark_buffer_dirty(su_bh);
 
 	nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -525,13 +525,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
 	if (ret < 0)
 		goto out_sem;
 
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
 	WARN_ON(nilfs_segment_usage_error(su));
 	if (modtime)
 		su->su_lastmod = cpu_to_le64(modtime);
 	su->su_nblocks = cpu_to_le32(nblocks);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(bh);
 	nilfs_mdt_mark_dirty(sufile);
@@ -572,7 +572,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
 	if (ret < 0)
 		goto out_sem;
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = kaddr + bh_offset(header_bh);
 	sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
 	sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
@@ -582,7 +582,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
 	spin_lock(&nilfs->ns_last_segment_lock);
 	sustat->ss_prot_seq = nilfs->ns_prot_seq;
 	spin_unlock(&nilfs->ns_last_segment_lock);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(header_bh);
 
  out_sem:
@@ -598,15 +598,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
 	void *kaddr;
 	int suclean;
 
-	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(su_bh->b_page);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (nilfs_segment_usage_error(su)) {
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		return;
 	}
 	suclean = nilfs_segment_usage_clean(su);
 	nilfs_segment_usage_set_error(su);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (suclean) {
 		nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -675,7 +675,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
 			/* hole */
 			continue;
 		}
-		kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(su_bh->b_page);
 		su = nilfs_sufile_block_get_segment_usage(
 			sufile, segnum, su_bh, kaddr);
 		su2 = su;
@@ -684,7 +684,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
 			     ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
 			    nilfs_segment_is_active(nilfs, segnum + j)) {
 				ret = -EBUSY;
-				kunmap_atomic(kaddr, KM_USER0);
+				kunmap_atomic(kaddr);
 				brelse(su_bh);
 				goto out_header;
 			}
@@ -696,7 +696,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
 				nc++;
 			}
 		}
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		if (nc > 0) {
 			mark_buffer_dirty(su_bh);
 			ncleaned += nc;
@@ -772,10 +772,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
 		sui->ncleansegs -= nsegs - newnsegs;
 	}
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = kaddr + bh_offset(header_bh);
 	header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	mark_buffer_dirty(header_bh);
 	nilfs_mdt_mark_dirty(sufile);
@@ -840,7 +840,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
 			continue;
 		}
 
-		kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+		kaddr = kmap_atomic(su_bh->b_page);
 		su = nilfs_sufile_block_get_segment_usage(
 			sufile, segnum, su_bh, kaddr);
 		for (j = 0; j < n;
@@ -853,7 +853,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
 				si->sui_flags |=
 					(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
 		}
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		brelse(su_bh);
 	}
 	ret = nsegs;
@@ -902,10 +902,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
 		goto failed;
 
 	sui = NILFS_SUI(sufile);
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(header_bh->b_page);
 	header = kaddr + bh_offset(header_bh);
 	sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	brelse(header_bh);
 
 	sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 08e3d4f9df1..1099a76cee5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -917,9 +917,8 @@ static int nilfs_get_root_dentry(struct super_block *sb,
 	if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
 		dentry = d_find_alias(inode);
 		if (!dentry) {
-			dentry = d_alloc_root(inode);
+			dentry = d_make_root(inode);
 			if (!dentry) {
-				iput(inode);
 				ret = -ENOMEM;
 				goto failed_dentry;
 			}
@@ -1059,6 +1058,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_export_op = &nilfs_export_ops;
 	sb->s_root = NULL;
 	sb->s_time_gran = 1;
+	sb->s_max_links = NILFS_LINK_MAX;
 
 	bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 	sb->s_bdi = bdi ? : &default_backing_dev_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d3271409437..501b7f8b739 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
 	nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
 	nilfs->ns_r_segments_percentage =
 		le32_to_cpu(sbp->s_r_segments_percentage);
+	if (nilfs->ns_r_segments_percentage < 1 ||
+	    nilfs->ns_r_segments_percentage > 99) {
+		printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
+		return -EINVAL;
+	}
+
 	nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
 	nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
 	return 0;
@@ -515,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
 		brelse(sbh[1]);
 		sbh[1] = NULL;
 		sbp[1] = NULL;
+		valid[1] = 0;
 		swp = 0;
 	}
 	if (!valid[swp]) {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 0b1e885b8cf..fa9c05f97af 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -94,11 +94,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 			if (file_ofs < init_size)
 				ofs = init_size - file_ofs;
 			local_irq_save(flags);
-			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+			kaddr = kmap_atomic(page);
 			memset(kaddr + bh_offset(bh) + ofs, 0,
 					bh->b_size - ofs);
 			flush_dcache_page(page);
-			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
+			kunmap_atomic(kaddr);
 			local_irq_restore(flags);
 		}
 	} else {
@@ -147,11 +147,11 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
 		local_irq_save(flags);
-		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+		kaddr = kmap_atomic(page);
 		for (i = 0; i < recs; i++)
 			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
 					i * rec_size), rec_size);
-		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
+		kunmap_atomic(kaddr);
 		local_irq_restore(flags);
 		flush_dcache_page(page);
 		if (likely(page_uptodate && !PageError(page)))
@@ -504,7 +504,7 @@ retry_readpage:
 		/* Race with shrinking truncate. */
 		attr_len = i_size;
 	}
-	addr = kmap_atomic(page, KM_USER0);
+	addr = kmap_atomic(page);
 	/* Copy the data to the page. */
 	memcpy(addr, (u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -512,7 +512,7 @@ retry_readpage:
 	/* Zero the remainder of the page. */
 	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
 	flush_dcache_page(page);
-	kunmap_atomic(addr, KM_USER0);
+	kunmap_atomic(addr);
 put_unm_err_out:
 	ntfs_attr_put_search_ctx(ctx);
 unm_err_out:
@@ -746,14 +746,14 @@ lock_retry_remap:
 			unsigned long *bpos, *bend;
 
 			/* Check if the buffer is zero. */
-			kaddr = kmap_atomic(page, KM_USER0);
+			kaddr = kmap_atomic(page);
 			bpos = (unsigned long *)(kaddr + bh_offset(bh));
 			bend = (unsigned long *)((u8*)bpos + blocksize);
 			do {
 				if (unlikely(*bpos))
 					break;
 			} while (likely(++bpos < bend));
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			if (bpos == bend) {
 				/*
 				 * Buffer is zero and sparse, no need to write
@@ -1495,14 +1495,14 @@ retry_writepage:
 		/* Shrinking cannot fail. */
 		BUG_ON(err);
 	}
-	addr = kmap_atomic(page, KM_USER0);
+	addr = kmap_atomic(page);
 	/* Copy the data from the page to the mft record. */
 	memcpy((u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			addr, attr_len);
 	/* Zero out of bounds area in the page cache page. */
 	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-	kunmap_atomic(addr, KM_USER0);
+	kunmap_atomic(addr);
 	flush_dcache_page(page);
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	/* We are done with the page. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f14fde2b03d..a27e3fecefa 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
 /**
  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 	unsigned long flags;
 	bool is_retry = false;
 
+	BUG_ON(!ni);
 	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
 			ni->mft_no, (unsigned long long)vcn,
 			write_locked ? "write" : "read");
-	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
 	if (!ni->runlist.rl) {
@@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
 	int err = 0;
 	bool is_retry = false;
 
+	BUG_ON(!ni);
 	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
 			ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
-	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
 	if (!ni->runlist.rl) {
@@ -1656,12 +1656,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	attr_size = le32_to_cpu(a->data.resident.value_length);
 	BUG_ON(attr_size != data_size);
 	if (page && !PageUptodate(page)) {
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		memcpy(kaddr, (u8*)a +
 				le16_to_cpu(a->data.resident.value_offset),
 				attr_size);
 		memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 	}
@@ -1806,9 +1806,9 @@ undo_err_out:
 			sizeof(a->data.resident.reserved));
 	/* Copy the data from the page back to the attribute value. */
 	if (page) {
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		memcpy((u8*)a + mp_ofs, kaddr, attr_size);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 	/* Setup the allocated size in the ntfs inode in case it changed. */
 	write_lock_irqsave(&ni->size_lock, flags);
@@ -2540,10 +2540,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 		size = PAGE_CACHE_SIZE;
 		if (idx == end)
 			size = end_ofs;
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		memset(kaddr + start_ofs, val, size - start_ofs);
 		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		set_page_dirty(page);
 		page_cache_release(page);
 		balance_dirty_pages_ratelimited(mapping);
@@ -2561,10 +2561,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 					"page (index 0x%lx).", idx);
 			return -ENOMEM;
 		}
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		memset(kaddr, val, PAGE_CACHE_SIZE);
 		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		/*
 		 * If the page has buffers, mark them uptodate since buffer
 		 * state and not page state is definitive in 2.6 kernels.
@@ -2598,10 +2598,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 					"(error, index 0x%lx).", idx);
 			return PTR_ERR(page);
 		}
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		memset(kaddr, val, end_ofs);
 		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		set_page_dirty(page);
 		page_cache_release(page);
 		balance_dirty_pages_ratelimited(mapping);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c587e2d2718..8639169221c 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -704,7 +704,7 @@ map_buffer_cached:
 				u8 *kaddr;
 				unsigned pofs;
 					
-				kaddr = kmap_atomic(page, KM_USER0);
+				kaddr = kmap_atomic(page);
 				if (bh_pos < pos) {
 					pofs = bh_pos & ~PAGE_CACHE_MASK;
 					memset(kaddr + pofs, 0, pos - bh_pos);
@@ -713,7 +713,7 @@ map_buffer_cached:
 					pofs = end & ~PAGE_CACHE_MASK;
 					memset(kaddr + pofs, 0, bh_end - end);
 				}
-				kunmap_atomic(kaddr, KM_USER0);
+				kunmap_atomic(kaddr);
 				flush_dcache_page(page);
 			}
 			continue;
@@ -1287,9 +1287,9 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
 		len = PAGE_CACHE_SIZE - ofs;
 		if (len > bytes)
 			len = bytes;
-		addr = kmap_atomic(*pages, KM_USER0);
+		addr = kmap_atomic(*pages);
 		left = __copy_from_user_inatomic(addr + ofs, buf, len);
-		kunmap_atomic(addr, KM_USER0);
+		kunmap_atomic(addr);
 		if (unlikely(left)) {
 			/* Do it the slow way. */
 			addr = kmap(*pages);
@@ -1401,10 +1401,10 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
 		len = PAGE_CACHE_SIZE - ofs;
 		if (len > bytes)
 			len = bytes;
-		addr = kmap_atomic(*pages, KM_USER0);
+		addr = kmap_atomic(*pages);
 		copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
 				*iov, *iov_ofs, len);
-		kunmap_atomic(addr, KM_USER0);
+		kunmap_atomic(addr);
 		if (unlikely(copied != len)) {
 			/* Do it the slow way. */
 			addr = kmap(*pages);
@@ -1691,7 +1691,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
 	BUG_ON(end > le32_to_cpu(a->length) -
 			le16_to_cpu(a->data.resident.value_offset));
 	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	/* Copy the received data from the page to the mft record. */
 	memcpy(kattr + pos, kaddr + pos, bytes);
 	/* Update the attribute length if necessary. */
@@ -1713,7 +1713,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 	}
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 	/* Update initialized_size/i_size if necessary. */
 	read_lock_irqsave(&ni->size_lock, flags);
 	initialized_size = ni->initialized_size;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index faece719086..809c0e6d8e0 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2008,14 +2008,14 @@ typedef struct {
  *
  * When a directory is small enough to fit inside the index root then this
  * is the only attribute describing the directory. When the directory is too
- * large to fit in the index root, on the other hand, two aditional attributes
+ * large to fit in the index root, on the other hand, two additional attributes
  * are present: an index allocation attribute, containing sub-nodes of the B+
  * directory tree (see below), and a bitmap attribute, describing which virtual
  * cluster numbers (vcns) in the index allocation attribute are in use by an
  * index block.
  *
  * NOTE: The root directory (FILE_root) contains an entry for itself. Other
- * dircetories do not contain entries for themselves, though.
+ * directories do not contain entries for themselves, though.
  */
 typedef struct {
 	ATTR_TYPE type;			/* Type of the indexed attribute. Is
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 382857f9c7d..3014a36a255 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
 /**
  * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 			ntfs_error(vol->sb, "Failed to merge runlists for mft "
 					"bitmap.");
 			if (ntfs_cluster_free_from_rl(vol, rl2)) {
-				ntfs_error(vol->sb, "Failed to dealocate "
+				ntfs_error(vol->sb, "Failed to deallocate "
 						"allocated cluster.%s", es);
 				NVolSetErrors(vol);
 			}
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		ntfs_error(vol->sb, "Failed to merge runlists for mft data "
 				"attribute.");
 		if (ntfs_cluster_free_from_rl(vol, rl2)) {
-			ntfs_error(vol->sb, "Failed to dealocate clusters "
+			ntfs_error(vol->sb, "Failed to deallocate clusters "
 					"from the mft data attribute.%s", es);
 			NVolSetErrors(vol);
 		}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5a4a8af5c40..b341492542c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
 /*
  * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
  * Copyright (c) 2001,2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
 {
 	MFT_REF mref;
 	struct inode *vi;
-	ntfs_inode *ni;
 	struct page *page;
 	u32 *kaddr, *kend;
 	ntfs_name *name = NULL;
@@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
 				"is not the system volume.", i_size_read(vi));
 		goto iput_out;
 	}
-	ni = NTFS_I(vi);
 	page = ntfs_map_page(vi->i_mapping, 0);
 	if (IS_ERR(page)) {
 		ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
@@ -2475,7 +2473,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 			nr_free -= PAGE_CACHE_SIZE * 8;
 			continue;
 		}
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		/*
 		 * Subtract the number of set bits. If this
 		 * is the last page and it is partial we don't really care as
@@ -2485,7 +2483,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 		 */
 		nr_free -= bitmap_weight(kaddr,
 					PAGE_CACHE_SIZE * BITS_PER_BYTE);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		page_cache_release(page);
 	}
 	ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
@@ -2546,7 +2544,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 			nr_free -= PAGE_CACHE_SIZE * 8;
 			continue;
 		}
-		kaddr = kmap_atomic(page, KM_USER0);
+		kaddr = kmap_atomic(page);
 		/*
 		 * Subtract the number of set bits. If this
 		 * is the last page and it is partial we don't really care as
@@ -2556,7 +2554,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
 		 */
 		nr_free -= bitmap_weight(kaddr,
 					PAGE_CACHE_SIZE * BITS_PER_BYTE);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 		page_cache_release(page);
 	}
 	ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
@@ -2910,9 +2908,10 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		ntfs_error(sb, "Failed to load system files.");
 		goto unl_upcase_iput_tmp_ino_err_out_now;
 	}
-	if ((sb->s_root = d_alloc_root(vol->root_ino))) {
-		/* We grab a reference, simulating an ntfs_iget(). */
-		ihold(vol->root_ino);
+
+	/* We grab a reference, simulating an ntfs_iget(). */
+	ihold(vol->root_ino);
+	if ((sb->s_root = d_make_root(vol->root_ino))) {
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
 		mutex_lock(&ntfs_lock);
@@ -3160,6 +3159,8 @@ static int __init init_ntfs_fs(void)
 	}
 	printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
 
+	/* Unregister the ntfs sysctls. */
+	ntfs_sysctl(0);
 sysctl_err_out:
 	kmem_cache_destroy(ntfs_big_inode_cache);
 big_inode_err_out:
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 78b68af3b0e..657743254eb 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -102,7 +102,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 		 * copy, the data is still good. */
 		if (buffer_jbd(buffer_cache_bh)
 		    && ocfs2_inode_is_new(inode)) {
-			kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
+			kaddr = kmap_atomic(bh_result->b_page);
 			if (!kaddr) {
 				mlog(ML_ERROR, "couldn't kmap!\n");
 				goto bail;
@@ -110,7 +110,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 			memcpy(kaddr + (bh_result->b_size * iblock),
 			       buffer_cache_bh->b_data,
 			       bh_result->b_size);
-			kunmap_atomic(kaddr, KM_USER0);
+			kunmap_atomic(kaddr);
 			set_buffer_uptodate(bh_result);
 		}
 		brelse(buffer_cache_bh);
@@ -236,13 +236,13 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 		return -EROFS;
 	}
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	if (size)
 		memcpy(kaddr, di->id2.i_data.id_data, size);
 	/* Clear the remaining part of the page */
 	memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
 	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	SetPageUptodate(page);
 
@@ -689,7 +689,7 @@ static void ocfs2_clear_page_regions(struct page *page,
 
 	ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 
 	if (from || to) {
 		if (from > cluster_start)
@@ -700,7 +700,7 @@ static void ocfs2_clear_page_regions(struct page *page,
 		memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
 	}
 
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 }
 
 /*
@@ -1981,9 +1981,9 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
 		}
 	}
 
-	kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
+	kaddr = kmap_atomic(wc->w_target_page);
 	memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	trace_ocfs2_write_end_inline(
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index abfac0d7ae9..3b5825ef319 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -582,24 +582,14 @@ static int dlmfs_fill_super(struct super_block * sb,
 			    void * data,
 			    int silent)
 {
-	struct inode * inode;
-	struct dentry * root;
-
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = DLMFS_MAGIC;
 	sb->s_op = &dlmfs_ops;
-	inode = dlmfs_get_root_inode(sb);
-	if (!inode)
-		return -ENOMEM;
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
+	sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
+	if (!sb->s_root)
 		return -ENOMEM;
-	}
-	sb->s_root = root;
 	return 0;
 }
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be244692550..a9856e3eaaf 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	handle_t *handle = NULL;
 	struct buffer_head *old_dir_bh = NULL;
 	struct buffer_head *new_dir_bh = NULL;
-	nlink_t old_dir_nlink = old_dir->i_nlink;
+	u32 old_dir_nlink = old_dir->i_nlink;
 	struct ocfs2_dinode *old_di;
 	struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
 	struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 604e12c4e97..68f4541c2db 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1154,19 +1154,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	status = ocfs2_mount_volume(sb);
-	if (osb->root_inode)
-		inode = igrab(osb->root_inode);
-
 	if (status < 0)
 		goto read_super_error;
 
+	if (osb->root_inode)
+		inode = igrab(osb->root_inode);
+
 	if (!inode) {
 		status = -EIO;
 		mlog_errno(status);
 		goto read_super_error;
 	}
 
-	root = d_alloc_root(inode);
+	root = d_make_root(inode);
 	if (!root) {
 		status = -ENOMEM;
 		mlog_errno(status);
@@ -1220,9 +1220,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 read_super_error:
 	brelse(bh);
 
-	if (inode)
-		iput(inode);
-
 	if (osb) {
 		atomic_set(&osb->vol_state, VOLUME_DISABLED);
 		wake_up(&osb->osb_mount_event);
@@ -1627,21 +1624,17 @@ static int __init ocfs2_init(void)
 		init_waitqueue_head(&ocfs2__ioend_wq[i]);
 
 	status = init_ocfs2_uptodate_cache();
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
+	if (status < 0)
+		goto out1;
 
 	status = ocfs2_initialize_mem_caches();
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
+	if (status < 0)
+		goto out2;
 
 	ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
 	if (!ocfs2_wq) {
 		status = -ENOMEM;
-		goto leave;
+		goto out3;
 	}
 
 	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
@@ -1653,17 +1646,23 @@ static int __init ocfs2_init(void)
 	ocfs2_set_locking_protocol();
 
 	status = register_quota_format(&ocfs2_quota_format);
-leave:
-	if (status < 0) {
-		ocfs2_free_mem_caches();
-		exit_ocfs2_uptodate_cache();
-		mlog_errno(status);
-	}
+	if (status < 0)
+		goto out4;
+	status = register_filesystem(&ocfs2_fs_type);
+	if (!status)
+		return 0;
 
-	if (status >= 0) {
-		return register_filesystem(&ocfs2_fs_type);
-	} else
-		return -1;
+	unregister_quota_format(&ocfs2_quota_format);
+out4:
+	destroy_workqueue(ocfs2_wq);
+	debugfs_remove(ocfs2_debugfs_root);
+out3:
+	ocfs2_free_mem_caches();
+out2:
+	exit_ocfs2_uptodate_cache();
+out1:
+	mlog_errno(status);
+	return status;
 }
 
 static void __exit ocfs2_exit(void)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 6065bb0ba20..dbc84222258 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -539,11 +539,9 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_brelse_bh2;
 	}
 
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
-		iput(root);
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root)
 		goto out_brelse_bh2;
-	}
 	printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
 
 	ret = 0;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a88c03bc749..bc49c975d50 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -408,13 +408,12 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent)
 	oi->type = op_inode_node;
 	oi->u.node = of_find_node_by_path("/");
 
-	s->s_root = d_alloc_root(root_inode);
+	s->s_root = d_make_root(root_inode);
 	if (!s->s_root)
 		goto out_no_root_dentry;
 	return 0;
 
 out_no_root_dentry:
-	iput(root_inode);
 	ret = -ENOMEM;
 out_no_root:
 	printk("openprom_fill_super: get root inode failed\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index a932ced92a1..fe0502f9beb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -230,7 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
 {
 	if (atomic) {
 		buf->flags |= PIPE_BUF_FLAG_ATOMIC;
-		return kmap_atomic(buf->page, KM_USER0);
+		return kmap_atomic(buf->page);
 	}
 
 	return kmap(buf->page);
@@ -251,7 +251,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 {
 	if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
 		buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
-		kunmap_atomic(map_data, KM_USER0);
+		kunmap_atomic(map_data);
 	} else
 		kunmap(buf->page);
 }
@@ -565,14 +565,14 @@ redo1:
 			iov_fault_in_pages_read(iov, chars);
 redo2:
 			if (atomic)
-				src = kmap_atomic(page, KM_USER0);
+				src = kmap_atomic(page);
 			else
 				src = kmap(page);
 
 			error = pipe_iov_copy_from_user(src, iov, chars,
 							atomic);
 			if (atomic)
-				kunmap_atomic(src, KM_USER0);
+				kunmap_atomic(src);
 			else
 				kunmap(page);
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4548dd49b0..3b42c1418f3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
 	if (!p)
 		return -ESRCH;
 
-	err = nice;
-	err = proc_sched_autogroup_set_nice(p, &err);
+	err = proc_sched_autogroup_set_nice(p, nice);
 	if (err)
 		count = err;
 
@@ -2990,9 +2989,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("cmdline",    S_IRUGO, proc_pid_cmdline),
 	ONE("stat",       S_IRUGO, proc_tgid_stat),
 	ONE("statm",      S_IRUGO, proc_pid_statm),
-	REG("maps",       S_IRUGO, proc_maps_operations),
+	REG("maps",       S_IRUGO, proc_pid_maps_operations),
 #ifdef CONFIG_NUMA
-	REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
+	REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
 #endif
 	REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
 	LNK("cwd",        proc_cwd_link),
@@ -3003,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("mountstats", S_IRUSR, proc_mountstats_operations),
 #ifdef CONFIG_PROC_PAGE_MONITOR
 	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
-	REG("smaps",      S_IRUGO, proc_smaps_operations),
+	REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
 	REG("pagemap",    S_IRUGO, proc_pagemap_operations),
 #endif
 #ifdef CONFIG_SECURITY
@@ -3349,9 +3348,9 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("cmdline",   S_IRUGO, proc_pid_cmdline),
 	ONE("stat",      S_IRUGO, proc_tid_stat),
 	ONE("statm",     S_IRUGO, proc_pid_statm),
-	REG("maps",      S_IRUGO, proc_maps_operations),
+	REG("maps",      S_IRUGO, proc_tid_maps_operations),
 #ifdef CONFIG_NUMA
-	REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
+	REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
 #endif
 	REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
 	LNK("cwd",       proc_cwd_link),
@@ -3361,7 +3360,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
 #ifdef CONFIG_PROC_PAGE_MONITOR
 	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
-	REG("smaps",     S_IRUGO, proc_smaps_operations),
+	REG("smaps",     S_IRUGO, proc_tid_smaps_operations),
 	REG("pagemap",    S_IRUGO, proc_pagemap_operations),
 #endif
 #ifdef CONFIG_SECURITY
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 84fd3235a59..8461a7b82fd 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -486,8 +486,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 
 int proc_fill_super(struct super_block *s)
 {
-	struct inode * root_inode;
-
 	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
@@ -496,19 +494,11 @@ int proc_fill_super(struct super_block *s)
 	s->s_time_gran = 1;
 	
 	pde_get(&proc_root);
-	root_inode = proc_get_inode(s, &proc_root);
-	if (!root_inode)
-		goto out_no_root;
-	root_inode->i_uid = 0;
-	root_inode->i_gid = 0;
-	s->s_root = d_alloc_root(root_inode);
-	if (!s->s_root)
-		goto out_no_root;
-	return 0;
+	s->s_root = d_make_root(proc_get_inode(s, &proc_root));
+	if (s->s_root)
+		return 0;
 
-out_no_root:
 	printk("proc_read_super: get root inode failed\n");
-	iput(root_inode);
 	pde_put(&proc_root);
 	return -ENOMEM;
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 292577531ad..c44efe19798 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -53,9 +53,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *task);
 extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
 
-extern const struct file_operations proc_maps_operations;
-extern const struct file_operations proc_numa_maps_operations;
-extern const struct file_operations proc_smaps_operations;
+extern const struct file_operations proc_pid_maps_operations;
+extern const struct file_operations proc_tid_maps_operations;
+extern const struct file_operations proc_pid_numa_maps_operations;
+extern const struct file_operations proc_tid_numa_maps_operations;
+extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_tid_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d245cb23dd7..e5e69aff6c6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -513,7 +513,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 
 				n = copy_to_user(buffer, (char *)start, tsz);
 				/*
-				 * We cannot distingush between fault on source
+				 * We cannot distinguish between fault on source
 				 * and fault on destination. When this happens
 				 * we clear too and hope it will trigger the
 				 * EFAULT again.
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 6d8e6a9e93a..7fcd0d60a96 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page)
 		u |= 1 << KPF_COMPOUND_TAIL;
 	if (PageHuge(page))
 		u |= 1 << KPF_HUGE;
+	else if (PageTransCompound(page))
+		u |= 1 << KPF_THP;
 
 	/*
 	 * Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index a6b62173d4c..67bbf6e4e19 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -6,7 +6,9 @@
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
+#include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/mm.h>
 #include "internal.h"
 
 static const struct dentry_operations proc_sys_dentry_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7dcd2a25049..9694cc28351 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,
 	return ret;
 }
 
-static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
+static void
+show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
+	struct proc_maps_private *priv = m->private;
+	struct task_struct *task = priv->task;
 	vm_flags_t flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
 	unsigned long start, end;
 	dev_t dev = 0;
 	int len;
+	const char *name = NULL;
 
 	if (file) {
 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	if (file) {
 		pad_len_spaces(m, len);
 		seq_path(m, &file->f_path, "\n");
-	} else {
-		const char *name = arch_vma_name(vma);
-		if (!name) {
-			if (mm) {
-				if (vma->vm_start <= mm->brk &&
-						vma->vm_end >= mm->start_brk) {
-					name = "[heap]";
-				} else if (vma->vm_start <= mm->start_stack &&
-					   vma->vm_end >= mm->start_stack) {
-					name = "[stack]";
-				}
+		goto done;
+	}
+
+	name = arch_vma_name(vma);
+	if (!name) {
+		pid_t tid;
+
+		if (!mm) {
+			name = "[vdso]";
+			goto done;
+		}
+
+		if (vma->vm_start <= mm->brk &&
+		    vma->vm_end >= mm->start_brk) {
+			name = "[heap]";
+			goto done;
+		}
+
+		tid = vm_is_stack(task, vma, is_pid);
+
+		if (tid != 0) {
+			/*
+			 * Thread stack in /proc/PID/task/TID/maps or
+			 * the main process stack.
+			 */
+			if (!is_pid || (vma->vm_start <= mm->start_stack &&
+			    vma->vm_end >= mm->start_stack)) {
+				name = "[stack]";
 			} else {
-				name = "[vdso]";
+				/* Thread stack in /proc/PID/maps */
+				pad_len_spaces(m, len);
+				seq_printf(m, "[stack:%d]", tid);
 			}
 		}
-		if (name) {
-			pad_len_spaces(m, len);
-			seq_puts(m, name);
-		}
+	}
+
+done:
+	if (name) {
+		pad_len_spaces(m, len);
+		seq_puts(m, name);
 	}
 	seq_putc(m, '\n');
 }
 
-static int show_map(struct seq_file *m, void *v)
+static int show_map(struct seq_file *m, void *v, int is_pid)
 {
 	struct vm_area_struct *vma = v;
 	struct proc_maps_private *priv = m->private;
 	struct task_struct *task = priv->task;
 
-	show_map_vma(m, vma);
+	show_map_vma(m, vma, is_pid);
 
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task->mm))
@@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int show_pid_map(struct seq_file *m, void *v)
+{
+	return show_map(m, v, 1);
+}
+
+static int show_tid_map(struct seq_file *m, void *v)
+{
+	return show_map(m, v, 0);
+}
+
 static const struct seq_operations proc_pid_maps_op = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
-	.show	= show_map
+	.show	= show_pid_map
 };
 
-static int maps_open(struct inode *inode, struct file *file)
+static const struct seq_operations proc_tid_maps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_tid_map
+};
+
+static int pid_maps_open(struct inode *inode, struct file *file)
 {
 	return do_maps_open(inode, file, &proc_pid_maps_op);
 }
 
-const struct file_operations proc_maps_operations = {
-	.open		= maps_open,
+static int tid_maps_open(struct inode *inode, struct file *file)
+{
+	return do_maps_open(inode, file, &proc_tid_maps_op);
+}
+
+const struct file_operations proc_pid_maps_operations = {
+	.open		= pid_maps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+const struct file_operations proc_tid_maps_operations = {
+	.open		= tid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release_private,
@@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	spin_lock(&walk->mm->page_table_lock);
-	if (pmd_trans_huge(*pmd)) {
-		if (pmd_trans_splitting(*pmd)) {
-			spin_unlock(&walk->mm->page_table_lock);
-			wait_split_huge_page(vma->anon_vma, pmd);
-		} else {
-			smaps_pte_entry(*(pte_t *)pmd, addr,
-					HPAGE_PMD_SIZE, walk);
-			spin_unlock(&walk->mm->page_table_lock);
-			mss->anonymous_thp += HPAGE_PMD_SIZE;
-			return 0;
-		}
-	} else {
+	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
 		spin_unlock(&walk->mm->page_table_lock);
+		mss->anonymous_thp += HPAGE_PMD_SIZE;
+		return 0;
 	}
+
+	if (pmd_trans_unstable(pmd))
+		return 0;
 	/*
 	 * The mmap_sem held all the way back in m_start() is what
 	 * keeps khugepaged out of here and from collapsing things
@@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	return 0;
 }
 
-static int show_smap(struct seq_file *m, void *v)
+static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
 	struct task_struct *task = priv->task;
@@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)
 	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
 		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
-	show_map_vma(m, vma);
+	show_map_vma(m, vma, is_pid);
 
 	seq_printf(m,
 		   "Size:           %8lu kB\n"
@@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int show_pid_smap(struct seq_file *m, void *v)
+{
+	return show_smap(m, v, 1);
+}
+
+static int show_tid_smap(struct seq_file *m, void *v)
+{
+	return show_smap(m, v, 0);
+}
+
 static const struct seq_operations proc_pid_smaps_op = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
-	.show	= show_smap
+	.show	= show_pid_smap
+};
+
+static const struct seq_operations proc_tid_smaps_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_tid_smap
 };
 
-static int smaps_open(struct inode *inode, struct file *file)
+static int pid_smaps_open(struct inode *inode, struct file *file)
 {
 	return do_maps_open(inode, file, &proc_pid_smaps_op);
 }
 
-const struct file_operations proc_smaps_operations = {
-	.open		= smaps_open,
+static int tid_smaps_open(struct inode *inode, struct file *file)
+{
+	return do_maps_open(inode, file, &proc_tid_smaps_op);
+}
+
+const struct file_operations proc_pid_smaps_operations = {
+	.open		= pid_smaps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+const struct file_operations proc_tid_smaps_operations = {
+	.open		= tid_smaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release_private,
@@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	struct page *page;
 
 	split_huge_page_pmd(walk->mm, pmd);
+	if (pmd_trans_unstable(pmd))
+		return 0;
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = {
 	.llseek		= noop_llseek,
 };
 
+typedef struct {
+	u64 pme;
+} pagemap_entry_t;
+
 struct pagemapread {
 	int pos, len;
-	u64 *buffer;
+	pagemap_entry_t *buffer;
 };
 
+#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
+
 #define PM_ENTRY_BYTES      sizeof(u64)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
@@ -620,10 +706,15 @@ struct pagemapread {
 #define PM_NOT_PRESENT      PM_PSHIFT(PAGE_SHIFT)
 #define PM_END_OF_BUFFER    1
 
-static int add_to_pagemap(unsigned long addr, u64 pfn,
+static inline pagemap_entry_t make_pme(u64 val)
+{
+	return (pagemap_entry_t) { .pme = val };
+}
+
+static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
 			  struct pagemapread *pm)
 {
-	pm->buffer[pm->pos++] = pfn;
+	pm->buffer[pm->pos++] = *pme;
 	if (pm->pos >= pm->len)
 		return PM_END_OF_BUFFER;
 	return 0;
@@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 	struct pagemapread *pm = walk->private;
 	unsigned long addr;
 	int err = 0;
+	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+
 	for (addr = start; addr < end; addr += PAGE_SIZE) {
-		err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			break;
 	}
@@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
 	return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
 }
 
-static u64 pte_to_pagemap_entry(pte_t pte)
+static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
 {
-	u64 pme = 0;
 	if (is_swap_pte(pte))
-		pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
-			| PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
+		*pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
+				| PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
 	else if (pte_present(pte))
-		pme = PM_PFRAME(pte_pfn(pte))
-			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
-	return pme;
+		*pme = make_pme(PM_PFRAME(pte_pfn(pte))
+				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
+					pmd_t pmd, int offset)
+{
+	/*
+	 * Currently pmd for thp is always present because thp can not be
+	 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
+	 * This if-check is just to prepare for future implementation.
+	 */
+	if (pmd_present(pmd))
+		*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
+				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
 }
+#else
+static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
+						pmd_t pmd, int offset)
+{
+}
+#endif
 
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			     struct mm_walk *walk)
@@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
+	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
 
-	split_huge_page_pmd(walk->mm, pmd);
+	if (pmd_trans_unstable(pmd))
+		return 0;
 
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
+	spin_lock(&walk->mm->page_table_lock);
+	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+		for (; addr != end; addr += PAGE_SIZE) {
+			unsigned long offset;
+
+			offset = (addr & ~PAGEMAP_WALK_MASK) >>
+					PAGE_SHIFT;
+			thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
+			err = add_to_pagemap(addr, &pme, pm);
+			if (err)
+				break;
+		}
+		spin_unlock(&walk->mm->page_table_lock);
+		return err;
+	}
+
 	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
 
 		/* check to see if we've left 'vma' behind
 		 * and need a new, higher one */
@@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 		if (vma && (vma->vm_start <= addr) &&
 		    !is_vm_hugetlb_page(vma)) {
 			pte = pte_offset_map(pmd, addr);
-			pfn = pte_to_pagemap_entry(*pte);
+			pte_to_pagemap_entry(&pme, *pte);
 			/* unmap before userspace copy */
 			pte_unmap(pte);
 		}
-		err = add_to_pagemap(addr, pfn, pm);
+		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			return err;
 	}
@@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
+static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
+					pte_t pte, int offset)
 {
-	u64 pme = 0;
 	if (pte_present(pte))
-		pme = PM_PFRAME(pte_pfn(pte) + offset)
-			| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
-	return pme;
+		*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
+				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
 }
 
 /* This function walks within one hugetlb entry in the single call */
@@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 {
 	struct pagemapread *pm = walk->private;
 	int err = 0;
-	u64 pfn;
+	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
 
 	for (; addr != end; addr += PAGE_SIZE) {
 		int offset = (addr & ~hmask) >> PAGE_SHIFT;
-		pfn = huge_pte_to_pagemap_entry(*pte, offset);
-		err = add_to_pagemap(addr, pfn, pm);
+		huge_pte_to_pagemap_entry(&pme, *pte, offset);
+		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			return err;
 	}
@@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
-#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
@@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 	pte_t *pte;
 
 	md = walk->private;
-	spin_lock(&walk->mm->page_table_lock);
-	if (pmd_trans_huge(*pmd)) {
-		if (pmd_trans_splitting(*pmd)) {
-			spin_unlock(&walk->mm->page_table_lock);
-			wait_split_huge_page(md->vma->anon_vma, pmd);
-		} else {
-			pte_t huge_pte = *(pte_t *)pmd;
-			struct page *page;
-
-			page = can_gather_numa_stats(huge_pte, md->vma, addr);
-			if (page)
-				gather_stats(page, md, pte_dirty(huge_pte),
-						HPAGE_PMD_SIZE/PAGE_SIZE);
-			spin_unlock(&walk->mm->page_table_lock);
-			return 0;
-		}
-	} else {
+
+	if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
+		pte_t huge_pte = *(pte_t *)pmd;
+		struct page *page;
+
+		page = can_gather_numa_stats(huge_pte, md->vma, addr);
+		if (page)
+			gather_stats(page, md, pte_dirty(huge_pte),
+				     HPAGE_PMD_SIZE/PAGE_SIZE);
 		spin_unlock(&walk->mm->page_table_lock);
+		return 0;
 	}
 
+	if (pmd_trans_unstable(pmd))
+		return 0;
 	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	do {
 		struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
@@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
 /*
  * Display pages allocated per node and memory policy via /proc.
  */
-static int show_numa_map(struct seq_file *m, void *v)
+static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 {
 	struct numa_maps_private *numa_priv = m->private;
 	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v)
 		seq_path(m, &file->f_path, "\n\t= ");
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_printf(m, " heap");
-	} else if (vma->vm_start <= mm->start_stack &&
-			vma->vm_end >= mm->start_stack) {
-		seq_printf(m, " stack");
+	} else {
+		pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
+		if (tid != 0) {
+			/*
+			 * Thread stack in /proc/PID/task/TID/maps or
+			 * the main process stack.
+			 */
+			if (!is_pid || (vma->vm_start <= mm->start_stack &&
+			    vma->vm_end >= mm->start_stack))
+				seq_printf(m, " stack");
+			else
+				seq_printf(m, " stack:%d", tid);
+		}
 	}
 
 	if (is_vm_hugetlb_page(vma))
@@ -1084,21 +1214,39 @@ out:
 	return 0;
 }
 
+static int show_pid_numa_map(struct seq_file *m, void *v)
+{
+	return show_numa_map(m, v, 1);
+}
+
+static int show_tid_numa_map(struct seq_file *m, void *v)
+{
+	return show_numa_map(m, v, 0);
+}
+
 static const struct seq_operations proc_pid_numa_maps_op = {
-        .start  = m_start,
-        .next   = m_next,
-        .stop   = m_stop,
-        .show   = show_numa_map,
+	.start  = m_start,
+	.next   = m_next,
+	.stop   = m_stop,
+	.show   = show_pid_numa_map,
+};
+
+static const struct seq_operations proc_tid_numa_maps_op = {
+	.start  = m_start,
+	.next   = m_next,
+	.stop   = m_stop,
+	.show   = show_tid_numa_map,
 };
 
-static int numa_maps_open(struct inode *inode, struct file *file)
+static int numa_maps_open(struct inode *inode, struct file *file,
+			  const struct seq_operations *ops)
 {
 	struct numa_maps_private *priv;
 	int ret = -ENOMEM;
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv) {
 		priv->proc_maps.pid = proc_pid(inode);
-		ret = seq_open(file, &proc_pid_numa_maps_op);
+		ret = seq_open(file, ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = priv;
@@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-const struct file_operations proc_numa_maps_operations = {
-	.open		= numa_maps_open,
+static int pid_numa_maps_open(struct inode *inode, struct file *file)
+{
+	return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
+}
+
+static int tid_numa_maps_open(struct inode *inode, struct file *file)
+{
+	return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
+}
+
+const struct file_operations proc_pid_numa_maps_operations = {
+	.open		= pid_numa_maps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+const struct file_operations proc_tid_numa_maps_operations = {
+	.open		= tid_numa_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release_private,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 980de547c07..74fe164d1b2 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len)
 /*
  * display a single VMA to a sequenced file
  */
-static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
+static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
+			  int is_pid)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct proc_maps_private *priv = m->private;
 	unsigned long ino = 0;
 	struct file *file;
 	dev_t dev = 0;
@@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 		pad_len_spaces(m, len);
 		seq_path(m, &file->f_path, "");
 	} else if (mm) {
-		if (vma->vm_start <= mm->start_stack &&
-			vma->vm_end >= mm->start_stack) {
+		pid_t tid = vm_is_stack(priv->task, vma, is_pid);
+
+		if (tid != 0) {
 			pad_len_spaces(m, len);
-			seq_puts(m, "[stack]");
+			/*
+			 * Thread stack in /proc/PID/task/TID/maps or
+			 * the main process stack.
+			 */
+			if (!is_pid || (vma->vm_start <= mm->start_stack &&
+			    vma->vm_end >= mm->start_stack))
+				seq_printf(m, "[stack]");
+			else
+				seq_printf(m, "[stack:%d]", tid);
 		}
 	}
 
@@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 /*
  * display mapping lines for a particular process's /proc/pid/maps
  */
-static int show_map(struct seq_file *m, void *_p)
+static int show_map(struct seq_file *m, void *_p, int is_pid)
 {
 	struct rb_node *p = _p;
 
-	return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
+	return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
+			      is_pid);
+}
+
+static int show_pid_map(struct seq_file *m, void *_p)
+{
+	return show_map(m, _p, 1);
+}
+
+static int show_tid_map(struct seq_file *m, void *_p)
+{
+	return show_map(m, _p, 0);
 }
 
 static void *m_start(struct seq_file *m, loff_t *pos)
@@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = {
 	.start	= m_start,
 	.next	= m_next,
 	.stop	= m_stop,
-	.show	= show_map
+	.show	= show_pid_map
+};
+
+static const struct seq_operations proc_tid_maps_ops = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_tid_map
 };
 
-static int maps_open(struct inode *inode, struct file *file)
+static int maps_open(struct inode *inode, struct file *file,
+		     const struct seq_operations *ops)
 {
 	struct proc_maps_private *priv;
 	int ret = -ENOMEM;
@@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file)
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv) {
 		priv->pid = proc_pid(inode);
-		ret = seq_open(file, &proc_pid_maps_ops);
+		ret = seq_open(file, ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = priv;
@@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-const struct file_operations proc_maps_operations = {
-	.open		= maps_open,
+static int pid_maps_open(struct inode *inode, struct file *file)
+{
+	return maps_open(inode, file, &proc_pid_maps_ops);
+}
+
+static int tid_maps_open(struct inode *inode, struct file *file)
+{
+	return maps_open(inode, file, &proc_tid_maps_ops);
+}
+
+const struct file_operations proc_pid_maps_operations = {
+	.open		= pid_maps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+const struct file_operations proc_tid_maps_operations = {
+	.open		= tid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release_private,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b0f450a2bb7..0d5071d2998 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -700,3 +700,26 @@ static int __init vmcore_init(void)
 	return 0;
 }
 module_init(vmcore_init)
+
+/* Cleanup function for vmcore module. */
+void vmcore_cleanup(void)
+{
+	struct list_head *pos, *next;
+
+	if (proc_vmcore) {
+		remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
+		proc_vmcore = NULL;
+	}
+
+	/* clear the vmcore list. */
+	list_for_each_safe(pos, next, &vmcore_list) {
+		struct vmcore *m;
+
+		m = list_entry(pos, struct vmcore, list);
+		list_del(&m->list);
+		kfree(m);
+	}
+	kfree(elfcorebuf);
+	elfcorebuf = NULL;
+}
+EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b3b426edb2f..f37c32b9452 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -278,9 +278,7 @@ fail:
 
 int pstore_fill_super(struct super_block *sb, void *data, int silent)
 {
-	struct inode *inode = NULL;
-	struct dentry *root;
-	int err;
+	struct inode *inode;
 
 	save_mount_options(sb, data);
 
@@ -296,26 +294,17 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
 	parse_options(data);
 
 	inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
-	if (!inode) {
-		err = -ENOMEM;
-		goto fail;
-	}
-	/* override ramfs "dir" options so we catch unlink(2) */
-	inode->i_op = &pstore_dir_inode_operations;
-
-	root = d_alloc_root(inode);
-	sb->s_root = root;
-	if (!root) {
-		err = -ENOMEM;
-		goto fail;
+	if (inode) {
+		/* override ramfs "dir" options so we catch unlink(2) */
+		inode->i_op = &pstore_dir_inode_operations;
 	}
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
+		return -ENOMEM;
 
 	pstore_get_records(0);
 
 	return 0;
-fail:
-	iput(inode);
-	return err;
 }
 
 static struct dentry *pstore_mount(struct file_system_type *fs_type,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 6b009548d2e..552e994e3aa 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -52,38 +52,6 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
-				       int create)
-{
-	struct buffer_head *result = NULL;
-
-	if ( nr >= 0 )
-		nr = qnx4_block_map( inode, nr );
-	if (nr) {
-		result = sb_getblk(inode->i_sb, nr);
-		return result;
-	}
-	return NULL;
-}
-
-struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
-{
-	struct buffer_head *bh;
-
-	bh = qnx4_getblk(inode, block, create);
-	if (!bh || buffer_uptodate(bh)) {
-		return bh;
-	}
-	ll_rw_block(READ, 1, &bh);
-	wait_on_buffer(bh);
-	if (buffer_uptodate(bh)) {
-		return bh;
-	}
-	brelse(bh);
-
-	return NULL;
-}
-
 static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_head *bh, int create )
 {
 	unsigned long phys;
@@ -98,23 +66,31 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
 	return 0;
 }
 
+static inline u32 try_extent(qnx4_xtnt_t *extent, u32 *offset)
+{
+	u32 size = le32_to_cpu(extent->xtnt_size);
+	if (*offset < size)
+		return le32_to_cpu(extent->xtnt_blk) + *offset - 1;
+	*offset -= size;
+	return 0;
+}
+
 unsigned long qnx4_block_map( struct inode *inode, long iblock )
 {
 	int ix;
-	long offset, i_xblk;
-	unsigned long block = 0;
+	long i_xblk;
 	struct buffer_head *bh = NULL;
 	struct qnx4_xblk *xblk = NULL;
 	struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
 	u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
+	u32 offset = iblock;
+	u32 block = try_extent(&qnx4_inode->di_first_xtnt, &offset);
 
-	if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
+	if (block) {
 		// iblock is in the first extent. This is easy.
-		block = le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_blk) + iblock - 1;
 	} else {
 		// iblock is beyond first extent. We have to follow the extent chain.
 		i_xblk = le32_to_cpu(qnx4_inode->di_xblk);
-		offset = iblock - le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size);
 		ix = 0;
 		while ( --nxtnt > 0 ) {
 			if ( ix == 0 ) {
@@ -130,12 +106,11 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 					return -EIO;
 				}
 			}
-			if ( offset < le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size) ) {
+			block = try_extent(&xblk->xblk_xtnts[ix], &offset);
+			if (block) {
 				// got it!
-				block = le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_blk) + offset - 1;
 				break;
 			}
-			offset -= le32_to_cpu(xblk->xblk_xtnts[ix].xtnt_size);
 			if ( ++ix >= xblk->xblk_num_xtnts ) {
 				i_xblk = le32_to_cpu(xblk->xblk_next_xblk);
 				ix = 0;
@@ -260,15 +235,13 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
  	}
 
 	ret = -ENOMEM;
- 	s->s_root = d_alloc_root(root);
+ 	s->s_root = d_make_root(root);
  	if (s->s_root == NULL)
- 		goto outi;
+ 		goto outb;
 
 	brelse(bh);
 	return 0;
 
-      outi:
-	iput(root);
       outb:
 	kfree(qs->BitMap);
       out:
@@ -288,44 +261,17 @@ static void qnx4_put_super(struct super_block *sb)
 	return;
 }
 
-static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page,qnx4_get_block, wbc);
-}
-
 static int qnx4_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,qnx4_get_block);
 }
 
-static int qnx4_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
-{
-	struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
-	int ret;
-
-	*pagep = NULL;
-	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				qnx4_get_block,
-				&qnx4_inode->mmu_private);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
-}
 static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,qnx4_get_block);
 }
 static const struct address_space_operations qnx4_aops = {
 	.readpage	= qnx4_readpage,
-	.writepage	= qnx4_writepage,
-	.write_begin	= qnx4_write_begin,
-	.write_end	= generic_write_end,
 	.bmap		= qnx4_bmap
 };
 
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 275327b5615..a512c0b30e8 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -39,10 +39,6 @@ static int qnx4_match(int len, const char *name,
 	} else {
 		namelen = QNX4_SHORT_NAME_MAX;
 	}
-	/* "" means "." ---> so paths like "/usr/lib//libc.a" work */
-	if (!len && (de->di_fname[0] == '.') && (de->di_fname[1] == '\0')) {
-		return 1;
-	}
 	thislen = strlen( de->di_fname );
 	if ( thislen > namelen )
 		thislen = namelen;
@@ -72,7 +68,9 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
 	block = offset = blkofs = 0;
 	while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) {
 		if (!bh) {
-			bh = qnx4_bread(dir, blkofs, 0);
+			block = qnx4_block_map(dir, blkofs);
+			if (block)
+				bh = sb_bread(dir->i_sb, block);
 			if (!bh) {
 				blkofs++;
 				continue;
@@ -80,7 +78,6 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
 		}
 		*res_dir = (struct qnx4_inode_entry *) (bh->b_data + offset);
 		if (qnx4_match(len, name, bh, &offset)) {
-			block = qnx4_block_map( dir, blkofs );
 			*ino = block * QNX4_INODES_PER_BLOCK +
 			    (offset / QNX4_DIR_ENTRY_SIZE) - 1;
 			return bh;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 33a60858203..244d4620189 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -27,8 +27,6 @@ extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, stru
 extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
 extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
 
-extern struct buffer_head *qnx4_bread(struct inode *, int, int);
-
 extern const struct inode_operations qnx4_dir_inode_operations;
 extern const struct file_operations qnx4_dir_operations;
 extern int qnx4_is_free(struct super_block *sb, long block);
diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig
new file mode 100644
index 00000000000..edbba5c17cc
--- /dev/null
+++ b/fs/qnx6/Kconfig
@@ -0,0 +1,26 @@
+config QNX6FS_FS
+	tristate "QNX6 file system support (read only)"
+	depends on BLOCK && CRC32
+	help
+	  This is the file system used by the real-time operating systems
+	  QNX 6 (also called QNX RTP).
+	  Further information is available at <http://www.qnx.com/>.
+	  Say Y if you intend to mount QNX hard disks or floppies formatted
+          with a mkqnx6fs.
+	  However, keep in mind that this currently is a readonly driver!
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called qnx6.
+
+	  If you don't know whether you need it, then you don't need it:
+	  answer N.
+
+config QNX6FS_DEBUG
+	bool "QNX6 debugging information"
+	depends on QNX6FS_FS
+	help
+	  Turns on extended debugging output.
+
+	  If you are not a developer working on the QNX6FS, you probably don't
+	  want this:
+	  answer N.
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
new file mode 100644
index 00000000000..9dd06199afc
--- /dev/null
+++ b/fs/qnx6/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux qnx4-filesystem routines.
+#
+
+obj-$(CONFIG_QNX6FS_FS) += qnx6.o
+
+qnx6-objs := inode.o dir.o namei.o super_mmi.o
diff --git a/fs/qnx6/README b/fs/qnx6/README
new file mode 100644
index 00000000000..116d622026c
--- /dev/null
+++ b/fs/qnx6/README
@@ -0,0 +1,8 @@
+
+  This is a snapshot of the QNX6 filesystem for Linux.
+  Please send diffs and remarks to <chaosman@ontika.net> .
+
+Credits :
+
+Al Viro		<viro@ZenIV.linux.org.uk> (endless patience with me & support ;))
+Kai Bankett	<chaosman@ontika.net> (Maintainer)
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
new file mode 100644
index 00000000000..dc597353db3
--- /dev/null
+++ b/fs/qnx6/dir.c
@@ -0,0 +1,291 @@
+/*
+ * QNX6 file system, Linux implementation.
+ *
+ * Version : 1.0.0
+ *
+ * History :
+ *
+ * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
+ * 16-02-2012 pagemap extension by Al Viro
+ *
+ */
+
+#include "qnx6.h"
+
+static unsigned qnx6_lfile_checksum(char *name, unsigned size)
+{
+	unsigned crc = 0;
+	char *end = name + size;
+	while (name < end) {
+		crc = ((crc >> 1) + *(name++)) ^
+			((crc & 0x00000001) ? 0x80000000 : 0);
+	}
+	return crc;
+}
+
+static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
+{
+	struct address_space *mapping = dir->i_mapping;
+	struct page *page = read_mapping_page(mapping, n, NULL);
+	if (!IS_ERR(page))
+		kmap(page);
+	return page;
+}
+
+static inline unsigned long dir_pages(struct inode *inode)
+{
+	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
+}
+
+static unsigned last_entry(struct inode *inode, unsigned long page_nr)
+{
+	unsigned long last_byte = inode->i_size;
+	last_byte -= page_nr << PAGE_CACHE_SHIFT;
+	if (last_byte > PAGE_CACHE_SIZE)
+		last_byte = PAGE_CACHE_SIZE;
+	return last_byte / QNX6_DIR_ENTRY_SIZE;
+}
+
+static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
+					 struct qnx6_long_dir_entry *de,
+					 struct page **p)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+	u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
+	u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
+	/* within page */
+	u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
+	struct address_space *mapping = sbi->longfile->i_mapping;
+	struct page *page = read_mapping_page(mapping, n, NULL);
+	if (IS_ERR(page))
+		return ERR_CAST(page);
+	kmap(*p = page);
+	return (struct qnx6_long_filename *)(page_address(page) + offs);
+}
+
+static int qnx6_dir_longfilename(struct inode *inode,
+			struct qnx6_long_dir_entry *de,
+			void *dirent, loff_t pos,
+			unsigned de_inode, filldir_t filldir)
+{
+	struct qnx6_long_filename *lf;
+	struct super_block *s = inode->i_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	struct page *page;
+	int lf_size;
+
+	if (de->de_size != 0xff) {
+		/* error - long filename entries always have size 0xff
+		   in direntry */
+		printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
+				de->de_size);
+		return 0;
+	}
+	lf = qnx6_longname(s, de, &page);
+	if (IS_ERR(lf)) {
+		printk(KERN_ERR "qnx6:Error reading longname\n");
+		return 0;
+	}
+
+	lf_size = fs16_to_cpu(sbi, lf->lf_size);
+
+	if (lf_size > QNX6_LONG_NAME_MAX) {
+		QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
+		printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+		qnx6_put_page(page);
+		return 0;
+	}
+
+	/* calc & validate longfilename checksum
+	   mmi 3g filesystem does not have that checksum */
+	if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
+			qnx6_lfile_checksum(lf->lf_fname, lf_size))
+		printk(KERN_INFO "qnx6: long filename checksum error.\n");
+
+	QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
+					lf_size, lf->lf_fname, de_inode));
+	if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode,
+			DT_UNKNOWN) < 0) {
+		qnx6_put_page(page);
+		return 0;
+	}
+
+	qnx6_put_page(page);
+	/* success */
+	return 1;
+}
+
+static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct super_block *s = inode->i_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
+	unsigned long npages = dir_pages(inode);
+	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
+	bool done = false;
+
+	if (filp->f_pos >= inode->i_size)
+		return 0;
+
+	for ( ; !done && n < npages; n++, start = 0) {
+		struct page *page = qnx6_get_page(inode, n);
+		int limit = last_entry(inode, n);
+		struct qnx6_dir_entry *de;
+		int i = start;
+
+		if (IS_ERR(page)) {
+			printk(KERN_ERR "qnx6_readdir: read failed\n");
+			filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT;
+			return PTR_ERR(page);
+		}
+		de = ((struct qnx6_dir_entry *)page_address(page)) + start;
+		for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) {
+			int size = de->de_size;
+			u32 no_inode = fs32_to_cpu(sbi, de->de_inode);
+
+			if (!no_inode || !size)
+				continue;
+
+			if (size > QNX6_SHORT_NAME_MAX) {
+				/* long filename detected
+				   get the filename from long filename
+				   structure / block */
+				if (!qnx6_dir_longfilename(inode,
+					(struct qnx6_long_dir_entry *)de,
+					dirent, pos, no_inode,
+					filldir)) {
+					done = true;
+					break;
+				}
+			} else {
+				QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
+				   " inode:%u\n", size, de->de_fname,
+							no_inode));
+				if (filldir(dirent, de->de_fname, size,
+				      pos, no_inode, DT_UNKNOWN)
+					< 0) {
+					done = true;
+					break;
+				}
+			}
+		}
+		qnx6_put_page(page);
+	}
+	filp->f_pos = pos;
+	return 0;
+}
+
+/*
+ * check if the long filename is correct.
+ */
+static unsigned qnx6_long_match(int len, const char *name,
+			struct qnx6_long_dir_entry *de, struct inode *dir)
+{
+	struct super_block *s = dir->i_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	struct page *page;
+	int thislen;
+	struct qnx6_long_filename *lf = qnx6_longname(s, de, &page);
+
+	if (IS_ERR(lf))
+		return 0;
+
+	thislen = fs16_to_cpu(sbi, lf->lf_size);
+	if (len != thislen) {
+		qnx6_put_page(page);
+		return 0;
+	}
+	if (memcmp(name, lf->lf_fname, len) == 0) {
+		qnx6_put_page(page);
+		return fs32_to_cpu(sbi, de->de_inode);
+	}
+	qnx6_put_page(page);
+	return 0;
+}
+
+/*
+ * check if the filename is correct.
+ */
+static unsigned qnx6_match(struct super_block *s, int len, const char *name,
+			struct qnx6_dir_entry *de)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	if (memcmp(name, de->de_fname, len) == 0)
+		return fs32_to_cpu(sbi, de->de_inode);
+	return 0;
+}
+
+
+unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
+			 struct page **res_page)
+{
+	struct super_block *s = dir->i_sb;
+	struct qnx6_inode_info *ei = QNX6_I(dir);
+	struct page *page = NULL;
+	unsigned long start, n;
+	unsigned long npages = dir_pages(dir);
+	unsigned ino;
+	struct qnx6_dir_entry *de;
+	struct qnx6_long_dir_entry *lde;
+
+	*res_page = NULL;
+
+	if (npages == 0)
+		return 0;
+	start = ei->i_dir_start_lookup;
+	if (start >= npages)
+		start = 0;
+	n = start;
+
+	do {
+		page = qnx6_get_page(dir, n);
+		if (!IS_ERR(page)) {
+			int limit = last_entry(dir, n);
+			int i;
+
+			de = (struct qnx6_dir_entry *)page_address(page);
+			for (i = 0; i < limit; i++, de++) {
+				if (len <= QNX6_SHORT_NAME_MAX) {
+					/* short filename */
+					if (len != de->de_size)
+						continue;
+					ino = qnx6_match(s, len, name, de);
+					if (ino)
+						goto found;
+				} else if (de->de_size == 0xff) {
+					/* deal with long filename */
+					lde = (struct qnx6_long_dir_entry *)de;
+					ino = qnx6_long_match(len,
+								name, lde, dir);
+					if (ino)
+						goto found;
+				} else
+					printk(KERN_ERR "qnx6: undefined "
+						"filename size in inode.\n");
+			}
+			qnx6_put_page(page);
+		}
+
+		if (++n >= npages)
+			n = 0;
+	} while (n != start);
+	return 0;
+
+found:
+	*res_page = page;
+	ei->i_dir_start_lookup = n;
+	return ino;
+}
+
+const struct file_operations qnx6_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= qnx6_readdir,
+	.fsync		= generic_file_fsync,
+};
+
+const struct inode_operations qnx6_dir_inode_operations = {
+	.lookup		= qnx6_lookup,
+};
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
new file mode 100644
index 00000000000..e44012dc564
--- /dev/null
+++ b/fs/qnx6/inode.c
@@ -0,0 +1,698 @@
+/*
+ * QNX6 file system, Linux implementation.
+ *
+ * Version : 1.0.0
+ *
+ * History :
+ *
+ * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
+ * 16-02-2012 pagemap extension by Al Viro
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/crc32.h>
+#include <linux/mpage.h>
+#include "qnx6.h"
+
+static const struct super_operations qnx6_sops;
+
+static void qnx6_put_super(struct super_block *sb);
+static struct inode *qnx6_alloc_inode(struct super_block *sb);
+static void qnx6_destroy_inode(struct inode *inode);
+static int qnx6_remount(struct super_block *sb, int *flags, char *data);
+static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf);
+static int qnx6_show_options(struct seq_file *seq, struct dentry *root);
+
+static const struct super_operations qnx6_sops = {
+	.alloc_inode	= qnx6_alloc_inode,
+	.destroy_inode	= qnx6_destroy_inode,
+	.put_super	= qnx6_put_super,
+	.statfs		= qnx6_statfs,
+	.remount_fs	= qnx6_remount,
+	.show_options	= qnx6_show_options,
+};
+
+static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct super_block *sb = root->d_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+
+	if (sbi->s_mount_opt & QNX6_MOUNT_MMI_FS)
+		seq_puts(seq, ",mmi_fs");
+	return 0;
+}
+
+static int qnx6_remount(struct super_block *sb, int *flags, char *data)
+{
+	*flags |= MS_RDONLY;
+	return 0;
+}
+
+static unsigned qnx6_get_devblock(struct super_block *sb, __fs32 block)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+	return fs32_to_cpu(sbi, block) + sbi->s_blks_off;
+}
+
+static unsigned qnx6_block_map(struct inode *inode, unsigned iblock);
+
+static int qnx6_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh, int create)
+{
+	unsigned phys;
+
+	QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+			inode->i_ino, (unsigned long)iblock));
+
+	phys = qnx6_block_map(inode, iblock);
+	if (phys) {
+		/* logical block is before EOF */
+		map_bh(bh, inode->i_sb, phys);
+	}
+	return 0;
+}
+
+static int qnx6_check_blockptr(__fs32 ptr)
+{
+	if (ptr == ~(__fs32)0) {
+		printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+		return 0;
+	}
+	return 1;
+}
+
+static int qnx6_readpage(struct file *file, struct page *page)
+{
+	return mpage_readpage(page, qnx6_get_block);
+}
+
+static int qnx6_readpages(struct file *file, struct address_space *mapping,
+		   struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block);
+}
+
+/*
+ * returns the block number for the no-th element in the tree
+ * inodebits requred as there are multiple inodes in one inode block
+ */
+static unsigned qnx6_block_map(struct inode *inode, unsigned no)
+{
+	struct super_block *s = inode->i_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	struct qnx6_inode_info *ei = QNX6_I(inode);
+	unsigned block = 0;
+	struct buffer_head *bh;
+	__fs32 ptr;
+	int levelptr;
+	int ptrbits = sbi->s_ptrbits;
+	int bitdelta;
+	u32 mask = (1 << ptrbits) - 1;
+	int depth = ei->di_filelevels;
+	int i;
+
+	bitdelta = ptrbits * depth;
+	levelptr = no >> bitdelta;
+
+	if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
+		printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
+				no);
+		return 0;
+	}
+
+	block = qnx6_get_devblock(s, ei->di_block_ptr[levelptr]);
+
+	for (i = 0; i < depth; i++) {
+		bh = sb_bread(s, block);
+		if (!bh) {
+			printk(KERN_ERR "qnx6:Error reading block (%u)\n",
+					block);
+			return 0;
+		}
+		bitdelta -= ptrbits;
+		levelptr = (no >> bitdelta) & mask;
+		ptr = ((__fs32 *)bh->b_data)[levelptr];
+
+		if (!qnx6_check_blockptr(ptr))
+			return 0;
+
+		block = qnx6_get_devblock(s, ptr);
+		brelse(bh);
+	}
+	return block;
+}
+
+static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+	buf->f_type    = sb->s_magic;
+	buf->f_bsize   = sb->s_blocksize;
+	buf->f_blocks  = fs32_to_cpu(sbi, sbi->sb->sb_num_blocks);
+	buf->f_bfree   = fs32_to_cpu(sbi, sbi->sb->sb_free_blocks);
+	buf->f_files   = fs32_to_cpu(sbi, sbi->sb->sb_num_inodes);
+	buf->f_ffree   = fs32_to_cpu(sbi, sbi->sb->sb_free_inodes);
+	buf->f_bavail  = buf->f_bfree;
+	buf->f_namelen = QNX6_LONG_NAME_MAX;
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+
+	return 0;
+}
+
+/*
+ * Check the root directory of the filesystem to make sure
+ * it really _is_ a qnx6 filesystem, and to check the size
+ * of the directory entry.
+ */
+static const char *qnx6_checkroot(struct super_block *s)
+{
+	static char match_root[2][3] = {".\0\0", "..\0"};
+	int i, error = 0;
+	struct qnx6_dir_entry *dir_entry;
+	struct inode *root = s->s_root->d_inode;
+	struct address_space *mapping = root->i_mapping;
+	struct page *page = read_mapping_page(mapping, 0, NULL);
+	if (IS_ERR(page))
+		return "error reading root directory";
+	kmap(page);
+	dir_entry = page_address(page);
+	for (i = 0; i < 2; i++) {
+		/* maximum 3 bytes - due to match_root limitation */
+		if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
+			error = 1;
+	}
+	qnx6_put_page(page);
+	if (error)
+		return "error reading root directory.";
+	return NULL;
+}
+
+#ifdef CONFIG_QNX6FS_DEBUG
+void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+
+	QNX6DEBUG((KERN_INFO "magic: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_magic)));
+	QNX6DEBUG((KERN_INFO "checksum: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_checksum)));
+	QNX6DEBUG((KERN_INFO "serial: %llx\n",
+				fs64_to_cpu(sbi, sb->sb_serial)));
+	QNX6DEBUG((KERN_INFO "flags: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_flags)));
+	QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_blocksize)));
+	QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_num_inodes)));
+	QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_free_inodes)));
+	QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_num_blocks)));
+	QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
+				fs32_to_cpu(sbi, sb->sb_free_blocks)));
+	QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
+				sb->Inode.levels));
+}
+#endif
+
+enum {
+	Opt_mmifs,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_mmifs, "mmi_fs"},
+	{Opt_err, NULL}
+};
+
+static int qnx6_parse_options(char *options, struct super_block *sb)
+{
+	char *p;
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+	substring_t args[MAX_OPT_ARGS];
+
+	if (!options)
+		return 1;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_mmifs:
+			set_opt(sbi->s_mount_opt, MMI_FS);
+			break;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
+				int offset, int silent)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(s);
+	struct buffer_head *bh;
+	struct qnx6_super_block *sb;
+
+	/* Check the superblock signatures
+	   start with the first superblock */
+	bh = sb_bread(s, offset);
+	if (!bh) {
+		printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+		return NULL;
+	}
+	sb = (struct qnx6_super_block *)bh->b_data;
+	if (fs32_to_cpu(sbi, sb->sb_magic) != QNX6_SUPER_MAGIC) {
+		sbi->s_bytesex = BYTESEX_BE;
+		if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
+			/* we got a big endian fs */
+			QNX6DEBUG((KERN_INFO "qnx6: fs got different"
+					" endianess.\n"));
+			return bh;
+		} else
+			sbi->s_bytesex = BYTESEX_LE;
+		if (!silent) {
+			if (offset == 0) {
+				printk(KERN_ERR "qnx6: wrong signature (magic)"
+					" in superblock #1.\n");
+			} else {
+				printk(KERN_INFO "qnx6: wrong signature (magic)"
+					" at position (0x%lx) - will try"
+					" alternative position (0x0000).\n",
+						offset * s->s_blocksize);
+			}
+		}
+		brelse(bh);
+		return NULL;
+	}
+	return bh;
+}
+
+static struct inode *qnx6_private_inode(struct super_block *s,
+					struct qnx6_root_node *p);
+
+static int qnx6_fill_super(struct super_block *s, void *data, int silent)
+{
+	struct buffer_head *bh1 = NULL, *bh2 = NULL;
+	struct qnx6_super_block *sb1 = NULL, *sb2 = NULL;
+	struct qnx6_sb_info *sbi;
+	struct inode *root;
+	const char *errmsg;
+	struct qnx6_sb_info *qs;
+	int ret = -EINVAL;
+	u64 offset;
+	int bootblock_offset = QNX6_BOOTBLOCK_SIZE;
+
+	qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL);
+	if (!qs)
+		return -ENOMEM;
+	s->s_fs_info = qs;
+
+	/* Superblock always is 512 Byte long */
+	if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
+		printk(KERN_ERR "qnx6: unable to set blocksize\n");
+		goto outnobh;
+	}
+
+	/* parse the mount-options */
+	if (!qnx6_parse_options((char *) data, s)) {
+		printk(KERN_ERR "qnx6: invalid mount options.\n");
+		goto outnobh;
+	}
+	if (test_opt(s, MMI_FS)) {
+		sb1 = qnx6_mmi_fill_super(s, silent);
+		if (sb1)
+			goto mmi_success;
+		else
+			goto outnobh;
+	}
+	sbi = QNX6_SB(s);
+	sbi->s_bytesex = BYTESEX_LE;
+	/* Check the superblock signatures
+	   start with the first superblock */
+	bh1 = qnx6_check_first_superblock(s,
+		bootblock_offset / QNX6_SUPERBLOCK_SIZE, silent);
+	if (!bh1) {
+		/* try again without bootblock offset */
+		bh1 = qnx6_check_first_superblock(s, 0, silent);
+		if (!bh1) {
+			printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+			goto outnobh;
+		}
+		/* seems that no bootblock at partition start */
+		bootblock_offset = 0;
+	}
+	sb1 = (struct qnx6_super_block *)bh1->b_data;
+
+#ifdef CONFIG_QNX6FS_DEBUG
+	qnx6_superblock_debug(sb1, s);
+#endif
+
+	/* checksum check - start at byte 8 and end at byte 512 */
+	if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
+			crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
+		printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+		goto out;
+	}
+
+	/* set new blocksize */
+	if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
+		printk(KERN_ERR "qnx6: unable to set blocksize\n");
+		goto out;
+	}
+	/* blocksize invalidates bh - pull it back in */
+	brelse(bh1);
+	bh1 = sb_bread(s, bootblock_offset >> s->s_blocksize_bits);
+	if (!bh1)
+		goto outnobh;
+	sb1 = (struct qnx6_super_block *)bh1->b_data;
+
+	/* calculate second superblock blocknumber */
+	offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) +
+		(bootblock_offset >> s->s_blocksize_bits) +
+		(QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
+
+	/* set bootblock offset */
+	sbi->s_blks_off = (bootblock_offset >> s->s_blocksize_bits) +
+			  (QNX6_SUPERBLOCK_AREA >> s->s_blocksize_bits);
+
+	/* next the second superblock */
+	bh2 = sb_bread(s, offset);
+	if (!bh2) {
+		printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+		goto out;
+	}
+	sb2 = (struct qnx6_super_block *)bh2->b_data;
+	if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
+		if (!silent)
+			printk(KERN_ERR "qnx6: wrong signature (magic)"
+					" in superblock #2.\n");
+		goto out;
+	}
+
+	/* checksum check - start at byte 8 and end at byte 512 */
+	if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
+				crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
+		printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+		goto out;
+	}
+
+	if (fs64_to_cpu(sbi, sb1->sb_serial) >=
+					fs64_to_cpu(sbi, sb2->sb_serial)) {
+		/* superblock #1 active */
+		sbi->sb_buf = bh1;
+		sbi->sb = (struct qnx6_super_block *)bh1->b_data;
+		brelse(bh2);
+		printk(KERN_INFO "qnx6: superblock #1 active\n");
+	} else {
+		/* superblock #2 active */
+		sbi->sb_buf = bh2;
+		sbi->sb = (struct qnx6_super_block *)bh2->b_data;
+		brelse(bh1);
+		printk(KERN_INFO "qnx6: superblock #2 active\n");
+	}
+mmi_success:
+	/* sanity check - limit maximum indirect pointer levels */
+	if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
+		printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
+			QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+		goto out;
+	}
+	if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
+		printk(KERN_ERR "qnx6: too many longfilename levels"
+				" (max %i, sb %i)\n",
+			QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+		goto out;
+	}
+	s->s_op = &qnx6_sops;
+	s->s_magic = QNX6_SUPER_MAGIC;
+	s->s_flags |= MS_RDONLY;        /* Yup, read-only yet */
+
+	/* ease the later tree level calculations */
+	sbi = QNX6_SB(s);
+	sbi->s_ptrbits = ilog2(s->s_blocksize / 4);
+	sbi->inodes = qnx6_private_inode(s, &sb1->Inode);
+	if (!sbi->inodes)
+		goto out;
+	sbi->longfile = qnx6_private_inode(s, &sb1->Longfile);
+	if (!sbi->longfile)
+		goto out1;
+
+	/* prefetch root inode */
+	root = qnx6_iget(s, QNX6_ROOT_INO);
+	if (IS_ERR(root)) {
+		printk(KERN_ERR "qnx6: get inode failed\n");
+		ret = PTR_ERR(root);
+		goto out2;
+	}
+
+	ret = -ENOMEM;
+	s->s_root = d_make_root(root);
+	if (!s->s_root)
+		goto out2;
+
+	ret = -EINVAL;
+	errmsg = qnx6_checkroot(s);
+	if (errmsg != NULL) {
+		if (!silent)
+			printk(KERN_ERR "qnx6: %s\n", errmsg);
+		goto out3;
+	}
+	return 0;
+
+out3:
+	dput(s->s_root);
+	s->s_root = NULL;
+out2:
+	iput(sbi->longfile);
+out1:
+	iput(sbi->inodes);
+out:
+	if (bh1)
+		brelse(bh1);
+	if (bh2)
+		brelse(bh2);
+outnobh:
+	kfree(qs);
+	s->s_fs_info = NULL;
+	return ret;
+}
+
+static void qnx6_put_super(struct super_block *sb)
+{
+	struct qnx6_sb_info *qs = QNX6_SB(sb);
+	brelse(qs->sb_buf);
+	iput(qs->longfile);
+	iput(qs->inodes);
+	kfree(qs);
+	sb->s_fs_info = NULL;
+	return;
+}
+
+static sector_t qnx6_bmap(struct address_space *mapping, sector_t block)
+{
+	return generic_block_bmap(mapping, block, qnx6_get_block);
+}
+static const struct address_space_operations qnx6_aops = {
+	.readpage	= qnx6_readpage,
+	.readpages	= qnx6_readpages,
+	.bmap		= qnx6_bmap
+};
+
+static struct inode *qnx6_private_inode(struct super_block *s,
+					struct qnx6_root_node *p)
+{
+	struct inode *inode = new_inode(s);
+	if (inode) {
+		struct qnx6_inode_info *ei = QNX6_I(inode);
+		struct qnx6_sb_info *sbi = QNX6_SB(s);
+		inode->i_size = fs64_to_cpu(sbi, p->size);
+		memcpy(ei->di_block_ptr, p->ptr, sizeof(p->ptr));
+		ei->di_filelevels = p->levels;
+		inode->i_mode = S_IFREG | S_IRUSR; /* probably wrong */
+		inode->i_mapping->a_ops = &qnx6_aops;
+	}
+	return inode;
+}
+
+struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
+{
+	struct qnx6_sb_info *sbi = QNX6_SB(sb);
+	struct qnx6_inode_entry *raw_inode;
+	struct inode *inode;
+	struct qnx6_inode_info	*ei;
+	struct address_space *mapping;
+	struct page *page;
+	u32 n, offs;
+
+	inode = iget_locked(sb, ino);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	ei = QNX6_I(inode);
+
+	inode->i_mode = 0;
+
+	if (ino == 0) {
+		printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
+				"out of range\n",
+		       sb->s_id, ino);
+		iget_failed(inode);
+		return ERR_PTR(-EIO);
+	}
+	n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
+	offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
+	mapping = sbi->inodes->i_mapping;
+	page = read_mapping_page(mapping, n, NULL);
+	if (IS_ERR(page)) {
+		printk(KERN_ERR "qnx6: major problem: unable to read inode from "
+		       "dev %s\n", sb->s_id);
+		iget_failed(inode);
+		return ERR_CAST(page);
+	}
+	kmap(page);
+	raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
+
+	inode->i_mode    = fs16_to_cpu(sbi, raw_inode->di_mode);
+	inode->i_uid     = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid);
+	inode->i_gid     = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid);
+	inode->i_size    = fs64_to_cpu(sbi, raw_inode->di_size);
+	inode->i_mtime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_mtime);
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_atime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_atime);
+	inode->i_atime.tv_nsec = 0;
+	inode->i_ctime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_ctime);
+	inode->i_ctime.tv_nsec = 0;
+
+	/* calc blocks based on 512 byte blocksize */
+	inode->i_blocks = (inode->i_size + 511) >> 9;
+
+	memcpy(&ei->di_block_ptr, &raw_inode->di_block_ptr,
+				sizeof(raw_inode->di_block_ptr));
+	ei->di_filelevels = raw_inode->di_filelevels;
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_fop = &generic_ro_fops;
+		inode->i_mapping->a_ops = &qnx6_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &qnx6_dir_inode_operations;
+		inode->i_fop = &qnx6_dir_operations;
+		inode->i_mapping->a_ops = &qnx6_aops;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &page_symlink_inode_operations;
+		inode->i_mapping->a_ops = &qnx6_aops;
+	} else
+		init_special_inode(inode, inode->i_mode, 0);
+	qnx6_put_page(page);
+	unlock_new_inode(inode);
+	return inode;
+}
+
+static struct kmem_cache *qnx6_inode_cachep;
+
+static struct inode *qnx6_alloc_inode(struct super_block *sb)
+{
+	struct qnx6_inode_info *ei;
+	ei = kmem_cache_alloc(qnx6_inode_cachep, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+	return &ei->vfs_inode;
+}
+
+static void qnx6_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
+}
+
+static void qnx6_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, qnx6_i_callback);
+}
+
+static void init_once(void *foo)
+{
+	struct qnx6_inode_info *ei = (struct qnx6_inode_info *) foo;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static int init_inodecache(void)
+{
+	qnx6_inode_cachep = kmem_cache_create("qnx6_inode_cache",
+					     sizeof(struct qnx6_inode_info),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once);
+	if (!qnx6_inode_cachep)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(qnx6_inode_cachep);
+}
+
+static struct dentry *qnx6_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super);
+}
+
+static struct file_system_type qnx6_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "qnx6",
+	.mount		= qnx6_mount,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
+static int __init init_qnx6_fs(void)
+{
+	int err;
+
+	err = init_inodecache();
+	if (err)
+		return err;
+
+	err = register_filesystem(&qnx6_fs_type);
+	if (err) {
+		destroy_inodecache();
+		return err;
+	}
+
+	printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+	return 0;
+}
+
+static void __exit exit_qnx6_fs(void)
+{
+	unregister_filesystem(&qnx6_fs_type);
+	destroy_inodecache();
+}
+
+module_init(init_qnx6_fs)
+module_exit(exit_qnx6_fs)
+MODULE_LICENSE("GPL");
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
new file mode 100644
index 00000000000..8a97289e04a
--- /dev/null
+++ b/fs/qnx6/namei.c
@@ -0,0 +1,42 @@
+/*
+ * QNX6 file system, Linux implementation.
+ *
+ * Version : 1.0.0
+ *
+ * History :
+ *
+ * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
+ * 16-02-2012 pagemap extension by Al Viro
+ *
+ */
+
+#include "qnx6.h"
+
+struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
+				struct nameidata *nd)
+{
+	unsigned ino;
+	struct page *page;
+	struct inode *foundinode = NULL;
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+
+	if (len > QNX6_LONG_NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	ino = qnx6_find_entry(len, dir, name, &page);
+	if (ino) {
+		foundinode = qnx6_iget(dir->i_sb, ino);
+		qnx6_put_page(page);
+		if (IS_ERR(foundinode)) {
+			QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
+				" error %ld\n", PTR_ERR(foundinode)));
+			return ERR_CAST(foundinode);
+		}
+	} else {
+		QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+		return NULL;
+	}
+	d_add(dentry, foundinode);
+	return NULL;
+}
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
new file mode 100644
index 00000000000..6c5e02a0b6a
--- /dev/null
+++ b/fs/qnx6/qnx6.h
@@ -0,0 +1,135 @@
+/*
+ * QNX6 file system, Linux implementation.
+ *
+ * Version : 1.0.0
+ *
+ * History :
+ *
+ * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
+ * 16-02-2012 page map extension by Al Viro
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+typedef __u16 __bitwise __fs16;
+typedef __u32 __bitwise __fs32;
+typedef __u64 __bitwise __fs64;
+
+#include <linux/qnx6_fs.h>
+
+#ifdef CONFIG_QNX6FS_DEBUG
+#define QNX6DEBUG(X) printk X
+#else
+#define QNX6DEBUG(X) (void) 0
+#endif
+
+struct qnx6_sb_info {
+	struct buffer_head	*sb_buf;	/* superblock buffer */
+	struct qnx6_super_block	*sb;		/* our superblock */
+	int			s_blks_off;	/* blkoffset fs-startpoint */
+	int			s_ptrbits;	/* indirect pointer bitfield */
+	unsigned long		s_mount_opt;	/* all mount options */
+	int			s_bytesex;	/* holds endianess info */
+	struct inode *		inodes;
+	struct inode *		longfile;
+};
+
+struct qnx6_inode_info {
+	__fs32			di_block_ptr[QNX6_NO_DIRECT_POINTERS];
+	__u8			di_filelevels;
+	__u32			i_dir_start_lookup;
+	struct inode		vfs_inode;
+};
+
+extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
+extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
+					struct nameidata *nd);
+
+#ifdef CONFIG_QNX6FS_DEBUG
+extern void qnx6_superblock_debug(struct qnx6_super_block *,
+						struct super_block *);
+#endif
+
+extern const struct inode_operations qnx6_dir_inode_operations;
+extern const struct file_operations qnx6_dir_operations;
+
+static inline struct qnx6_sb_info *QNX6_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct qnx6_inode_info *QNX6_I(struct inode *inode)
+{
+	return container_of(inode, struct qnx6_inode_info, vfs_inode);
+}
+
+#define clear_opt(o, opt)		(o &= ~(QNX6_MOUNT_##opt))
+#define set_opt(o, opt)			(o |= (QNX6_MOUNT_##opt))
+#define test_opt(sb, opt)		(QNX6_SB(sb)->s_mount_opt & \
+					 QNX6_MOUNT_##opt)
+enum {
+	BYTESEX_LE,
+	BYTESEX_BE,
+};
+
+static inline __u64 fs64_to_cpu(struct qnx6_sb_info *sbi, __fs64 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return le64_to_cpu((__force __le64)n);
+	else
+		return be64_to_cpu((__force __be64)n);
+}
+
+static inline __fs64 cpu_to_fs64(struct qnx6_sb_info *sbi, __u64 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return (__force __fs64)cpu_to_le64(n);
+	else
+		return (__force __fs64)cpu_to_be64(n);
+}
+
+static inline __u32 fs32_to_cpu(struct qnx6_sb_info *sbi, __fs32 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return le32_to_cpu((__force __le32)n);
+	else
+		return be32_to_cpu((__force __be32)n);
+}
+
+static inline __fs32 cpu_to_fs32(struct qnx6_sb_info *sbi, __u32 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return (__force __fs32)cpu_to_le32(n);
+	else
+		return (__force __fs32)cpu_to_be32(n);
+}
+
+static inline __u16 fs16_to_cpu(struct qnx6_sb_info *sbi, __fs16 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return le16_to_cpu((__force __le16)n);
+	else
+		return be16_to_cpu((__force __be16)n);
+}
+
+static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n)
+{
+	if (sbi->s_bytesex == BYTESEX_LE)
+		return (__force __fs16)cpu_to_le16(n);
+	else
+		return (__force __fs16)cpu_to_be16(n);
+}
+
+extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
+						    int silent);
+
+static inline void qnx6_put_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+}
+
+extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
+				struct page **res_page);
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
new file mode 100644
index 00000000000..29c32cba62d
--- /dev/null
+++ b/fs/qnx6/super_mmi.c
@@ -0,0 +1,150 @@
+/*
+ * QNX6 file system, Linux implementation.
+ *
+ * Version : 1.0.0
+ *
+ * History :
+ *
+ * 01-02-2012 by Kai Bankett (chaosman@ontika.net) : first release.
+ *
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include "qnx6.h"
+
+static void qnx6_mmi_copy_sb(struct qnx6_super_block *qsb,
+		struct qnx6_mmi_super_block *sb)
+{
+	qsb->sb_magic = sb->sb_magic;
+	qsb->sb_checksum = sb->sb_checksum;
+	qsb->sb_serial = sb->sb_serial;
+	qsb->sb_blocksize = sb->sb_blocksize;
+	qsb->sb_num_inodes = sb->sb_num_inodes;
+	qsb->sb_free_inodes = sb->sb_free_inodes;
+	qsb->sb_num_blocks = sb->sb_num_blocks;
+	qsb->sb_free_blocks = sb->sb_free_blocks;
+
+	/* the rest of the superblock is the same */
+	memcpy(&qsb->Inode, &sb->Inode, sizeof(sb->Inode));
+	memcpy(&qsb->Bitmap, &sb->Bitmap, sizeof(sb->Bitmap));
+	memcpy(&qsb->Longfile, &sb->Longfile, sizeof(sb->Longfile));
+}
+
+struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
+{
+	struct buffer_head *bh1, *bh2 = NULL;
+	struct qnx6_mmi_super_block *sb1, *sb2;
+	struct qnx6_super_block *qsb = NULL;
+	struct qnx6_sb_info *sbi;
+	__u64 offset;
+
+	/* Check the superblock signatures
+	   start with the first superblock */
+	bh1 = sb_bread(s, 0);
+	if (!bh1) {
+		printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+		return NULL;
+	}
+	sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
+	sbi = QNX6_SB(s);
+	if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
+		if (!silent) {
+			printk(KERN_ERR "qnx6: wrong signature (magic) in"
+					" superblock #1.\n");
+			goto out;
+		}
+	}
+
+	/* checksum check - start at byte 8 and end at byte 512 */
+	if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
+				crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
+		printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+		goto out;
+	}
+
+	/* calculate second superblock blocknumber */
+	offset = fs32_to_cpu(sbi, sb1->sb_num_blocks) + QNX6_SUPERBLOCK_AREA /
+					fs32_to_cpu(sbi, sb1->sb_blocksize);
+
+	/* set new blocksize */
+	if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
+		printk(KERN_ERR "qnx6: unable to set blocksize\n");
+		goto out;
+	}
+	/* blocksize invalidates bh - pull it back in */
+	brelse(bh1);
+	bh1 = sb_bread(s, 0);
+	if (!bh1)
+		goto out;
+	sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
+
+	/* read second superblock */
+	bh2 = sb_bread(s, offset);
+	if (!bh2) {
+		printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+		goto out;
+	}
+	sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
+	if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
+		if (!silent)
+			printk(KERN_ERR "qnx6: wrong signature (magic) in"
+					" superblock #2.\n");
+		goto out;
+	}
+
+	/* checksum check - start at byte 8 and end at byte 512 */
+	if (fs32_to_cpu(sbi, sb2->sb_checksum)
+			!= crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
+		printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+		goto out;
+	}
+
+	qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
+	if (!qsb) {
+		printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+		goto out;
+	}
+
+	if (fs64_to_cpu(sbi, sb1->sb_serial) >
+					fs64_to_cpu(sbi, sb2->sb_serial)) {
+		/* superblock #1 active */
+		qnx6_mmi_copy_sb(qsb, sb1);
+#ifdef CONFIG_QNX6FS_DEBUG
+		qnx6_superblock_debug(qsb, s);
+#endif
+		memcpy(bh1->b_data, qsb, sizeof(struct qnx6_super_block));
+
+		sbi->sb_buf = bh1;
+		sbi->sb = (struct qnx6_super_block *)bh1->b_data;
+		brelse(bh2);
+		printk(KERN_INFO "qnx6: superblock #1 active\n");
+	} else {
+		/* superblock #2 active */
+		qnx6_mmi_copy_sb(qsb, sb2);
+#ifdef CONFIG_QNX6FS_DEBUG
+		qnx6_superblock_debug(qsb, s);
+#endif
+		memcpy(bh2->b_data, qsb, sizeof(struct qnx6_super_block));
+
+		sbi->sb_buf = bh2;
+		sbi->sb = (struct qnx6_super_block *)bh2->b_data;
+		brelse(bh1);
+		printk(KERN_INFO "qnx6: superblock #2 active\n");
+	}
+	kfree(qsb);
+
+	/* offset for mmi_fs is just SUPERBLOCK_AREA bytes */
+	sbi->s_blks_off = QNX6_SUPERBLOCK_AREA / s->s_blocksize;
+
+	/* success */
+	return sbi->sb;
+
+out:
+	if (bh1 != NULL)
+		brelse(bh1);
+	if (bh2 != NULL)
+		brelse(bh2);
+	return NULL;
+}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 46741970371..8b4f12b33f5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -71,6 +71,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
+#include <linux/sched.h>
 #include <linux/kmod.h>
 #include <linux/namei.h>
 #include <linux/capability.h>
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7898cd688a0..fc2c4388d12 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	}
 }
 
+/* Return 1 if 'cmd' will block on frozen filesystem */
+static int quotactl_cmd_write(int cmd)
+{
+	switch (cmd) {
+	case Q_GETFMT:
+	case Q_GETINFO:
+	case Q_SYNC:
+	case Q_XGETQSTAT:
+	case Q_XGETQUOTA:
+	case Q_XQUOTASYNC:
+		return 0;
+	}
+	return 1;
+}
+
 /*
  * look up a superblock on which quota ops will be performed
  * - use the name of a block device to find the superblock thereon
  */
-static struct super_block *quotactl_block(const char __user *special)
+static struct super_block *quotactl_block(const char __user *special, int cmd)
 {
 #ifdef CONFIG_BLOCK
 	struct block_device *bdev;
@@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special)
 	putname(tmp);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
-	sb = get_super(bdev);
+	if (quotactl_cmd_write(cmd))
+		sb = get_super_thawed(bdev);
+	else
+		sb = get_super(bdev);
 	bdput(bdev);
 	if (!sb)
 		return ERR_PTR(-ENODEV);
@@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
 			pathp = &path;
 	}
 
-	sb = quotactl_block(special);
+	sb = quotactl_block(special, cmds);
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		goto out;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index aec766abe3a..a1fdabe21de 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -209,22 +209,19 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
 int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ramfs_fs_info *fsi;
-	struct inode *inode = NULL;
-	struct dentry *root;
+	struct inode *inode;
 	int err;
 
 	save_mount_options(sb, data);
 
 	fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
 	sb->s_fs_info = fsi;
-	if (!fsi) {
-		err = -ENOMEM;
-		goto fail;
-	}
+	if (!fsi)
+		return -ENOMEM;
 
 	err = ramfs_parse_options(data, &fsi->mount_opts);
 	if (err)
-		goto fail;
+		return err;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
 	sb->s_blocksize		= PAGE_CACHE_SIZE;
@@ -234,24 +231,11 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran		= 1;
 
 	inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
-	if (!inode) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	root = d_alloc_root(inode);
-	sb->s_root = root;
-	if (!root) {
-		err = -ENOMEM;
-		goto fail;
-	}
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
+		return -ENOMEM;
 
 	return 0;
-fail:
-	kfree(fsi);
-	sb->s_fs_info = NULL;
-	iput(inode);
-	return err;
 }
 
 struct dentry *ramfs_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h
new file mode 100644
index 00000000000..f096b80e73d
--- /dev/null
+++ b/fs/reiserfs/acl.h
@@ -0,0 +1,76 @@
+#include <linux/init.h>
+#include <linux/posix_acl.h>
+
+#define REISERFS_ACL_VERSION	0x0001
+
+typedef struct {
+	__le16 e_tag;
+	__le16 e_perm;
+	__le32 e_id;
+} reiserfs_acl_entry;
+
+typedef struct {
+	__le16 e_tag;
+	__le16 e_perm;
+} reiserfs_acl_entry_short;
+
+typedef struct {
+	__le32 a_version;
+} reiserfs_acl_header;
+
+static inline size_t reiserfs_acl_size(int count)
+{
+	if (count <= 4) {
+		return sizeof(reiserfs_acl_header) +
+		    count * sizeof(reiserfs_acl_entry_short);
+	} else {
+		return sizeof(reiserfs_acl_header) +
+		    4 * sizeof(reiserfs_acl_entry_short) +
+		    (count - 4) * sizeof(reiserfs_acl_entry);
+	}
+}
+
+static inline int reiserfs_acl_count(size_t size)
+{
+	ssize_t s;
+	size -= sizeof(reiserfs_acl_header);
+	s = size - 4 * sizeof(reiserfs_acl_entry_short);
+	if (s < 0) {
+		if (size % sizeof(reiserfs_acl_entry_short))
+			return -1;
+		return size / sizeof(reiserfs_acl_entry_short);
+	} else {
+		if (s % sizeof(reiserfs_acl_entry))
+			return -1;
+		return s / sizeof(reiserfs_acl_entry) + 4;
+	}
+}
+
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
+struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
+int reiserfs_acl_chmod(struct inode *inode);
+int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+				 struct inode *dir, struct dentry *dentry,
+				 struct inode *inode);
+int reiserfs_cache_default_acl(struct inode *dir);
+extern const struct xattr_handler reiserfs_posix_acl_default_handler;
+extern const struct xattr_handler reiserfs_posix_acl_access_handler;
+
+#else
+
+#define reiserfs_cache_default_acl(inode) 0
+#define reiserfs_get_acl NULL
+
+static inline int reiserfs_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int
+reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+			     const struct inode *dir, struct dentry *dentry,
+			     struct inode *inode)
+{
+	return 0;
+}
+#endif
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 70de42f09f1..4c0c7d163d1 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -4,14 +4,12 @@
 /* Reiserfs block (de)allocator, bitmap-based. */
 
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/errno.h>
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
-#include <linux/reiserfs_fs_sb.h>
-#include <linux/reiserfs_fs_i.h>
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 133e9355dc6..66c53b642a8 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -5,7 +5,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/stat.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c08044066..2b7882b508d 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -17,7 +17,7 @@
 
 #include <asm/uaccess.h>
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ace635053a3..8375c922c0d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -3,9 +3,9 @@
  */
 
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_acl.h>
-#include <linux/reiserfs_xattr.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 1e4250bc3a6..430e0658704 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -37,7 +37,7 @@
 #include <linux/time.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 
 /* To make any changes in the tree we find a node, that contains item
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 6471c670743..91b0cc1242a 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -19,7 +19,7 @@
 //
 
 #include <linux/kernel.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <asm/types.h>
 
 #define DELTA 0x9E3779B9
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 2074fd95046..e1978fd895f 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -5,7 +5,7 @@
 #include <asm/uaccess.h>
 #include <linux/string.h>
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 
 /* this is one and only function that is used outside (do_balance.c) */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9e8cd5acd79..494c315c741 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -4,9 +4,9 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_acl.h>
-#include <linux/reiserfs_xattr.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
 #include <linux/exportfs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 950e3d1b5c9..0c2185042d5 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -5,7 +5,7 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/time.h>
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 72cb1cc51b8..ee382ef3d30 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 
 // this contains item handlers for old item types: sd, direct,
 // indirect, directory
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c3cf54fd4de..cf9f4de00a9 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -37,7 +37,7 @@
 #include <linux/time.h>
 #include <linux/semaphore.h>
 #include <linux/vmalloc.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 03d85cbf90b..79e5a8b4c22 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -5,7 +5,7 @@
 #include <asm/uaccess.h>
 #include <linux/string.h>
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 
 /* these are used in do_balance.c */
@@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
 	   remove */
 	RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
 	RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
-	       "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d",
+	       "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
 	       I_ENTRY_COUNT(ih), from, del_count);
 
 	if (del_count == 0)
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index 7df1ce48203..d735bc8470e 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -1,4 +1,4 @@
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/mutex.h>
 
 /*
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 14637886523..84e8a69cee9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -14,9 +14,9 @@
 #include <linux/time.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_acl.h>
-#include <linux/reiserfs_xattr.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
 #include <linux/quotaops.h>
 
 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 3a6de810bd6..f732d6a5251 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -5,8 +5,7 @@
 #include <linux/string.h>
 #include <linux/random.h>
 #include <linux/time.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_fs_sb.h>
+#include "reiserfs.h"
 
 // find where objectid map starts
 #define objectid_map(s,rs) (old_format_only (s) ? \
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 45de98b5946..c0b1112ab7e 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -4,7 +4,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/string.h>
 #include <linux/buffer_head.h>
 
@@ -329,7 +329,7 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
     Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
     pointless complexity):
 
-    panics in reiserfs_fs.h have numbers from 1000 to 1999
+    panics in reiserfs.h have numbers from 1000 to 1999
     super.c				        2000 to 2999
     preserve.c (unused)			    3000 to 3999
     bitmap.c				    4000 to 4999
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 7a9981196c1..2c1ade692cc 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -12,8 +12,7 @@
 #include <linux/time.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_fs_sb.h>
+#include "reiserfs.h"
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
new file mode 100644
index 00000000000..445d768eea4
--- /dev/null
+++ b/fs/reiserfs/reiserfs.h
@@ -0,0 +1,2922 @@
+/*
+ * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+#include <linux/reiserfs_fs.h>
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
+#include <linux/proc_fs.h>
+#include <linux/buffer_head.h>
+
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
+
+struct reiserfs_journal_list;
+
+/** bitmasks for i_flags field in reiserfs-specific part of inode */
+typedef enum {
+    /** this says what format of key do all items (but stat data) of
+      an object have.  If this is set, that format is 3.6 otherwise
+      - 3.5 */
+	i_item_key_version_mask = 0x0001,
+    /** If this is unset, object has 3.5 stat data, otherwise, it has
+      3.6 stat data with 64bit size, 32bit nlink etc. */
+	i_stat_data_version_mask = 0x0002,
+    /** file might need tail packing on close */
+	i_pack_on_close_mask = 0x0004,
+    /** don't pack tail of file */
+	i_nopack_mask = 0x0008,
+    /** If those is set, "safe link" was created for this file during
+      truncate or unlink. Safe link is used to avoid leakage of disk
+      space on crash with some files open, but unlinked. */
+	i_link_saved_unlink_mask = 0x0010,
+	i_link_saved_truncate_mask = 0x0020,
+	i_has_xattr_dir = 0x0040,
+	i_data_log = 0x0080,
+} reiserfs_inode_flags;
+
+struct reiserfs_inode_info {
+	__u32 i_key[4];		/* key is still 4 32 bit integers */
+    /** transient inode flags that are never stored on disk. Bitmasks
+      for this field are defined above. */
+	__u32 i_flags;
+
+	__u32 i_first_direct_byte;	// offset of first byte stored in direct item.
+
+	/* copy of persistent inode flags read from sd_attrs. */
+	__u32 i_attrs;
+
+	int i_prealloc_block;	/* first unused block of a sequence of unused blocks */
+	int i_prealloc_count;	/* length of that sequence */
+	struct list_head i_prealloc_list;	/* per-transaction list of inodes which
+						 * have preallocated blocks */
+
+	unsigned new_packing_locality:1;	/* new_packig_locality is created; new blocks
+						 * for the contents of this directory should be
+						 * displaced */
+
+	/* we use these for fsync or O_SYNC to decide which transaction
+	 ** needs to be committed in order for this inode to be properly
+	 ** flushed */
+	unsigned int i_trans_id;
+	struct reiserfs_journal_list *i_jl;
+	atomic_t openers;
+	struct mutex tailpack;
+#ifdef CONFIG_REISERFS_FS_XATTR
+	struct rw_semaphore i_xattr_sem;
+#endif
+	struct inode vfs_inode;
+};
+
+typedef enum {
+	reiserfs_attrs_cleared = 0x00000001,
+} reiserfs_super_block_flags;
+
+/* struct reiserfs_super_block accessors/mutators
+ * since this is a disk structure, it will always be in
+ * little endian format. */
+#define sb_block_count(sbp)         (le32_to_cpu((sbp)->s_v1.s_block_count))
+#define set_sb_block_count(sbp,v)   ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
+#define sb_free_blocks(sbp)         (le32_to_cpu((sbp)->s_v1.s_free_blocks))
+#define set_sb_free_blocks(sbp,v)   ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
+#define sb_root_block(sbp)          (le32_to_cpu((sbp)->s_v1.s_root_block))
+#define set_sb_root_block(sbp,v)    ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
+
+#define sb_jp_journal_1st_block(sbp)  \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
+#define set_sb_jp_journal_1st_block(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
+#define sb_jp_journal_dev(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
+#define set_sb_jp_journal_dev(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
+#define sb_jp_journal_size(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
+#define set_sb_jp_journal_size(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
+#define sb_jp_journal_trans_max(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
+#define set_sb_jp_journal_trans_max(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
+#define sb_jp_journal_magic(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
+#define set_sb_jp_journal_magic(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
+#define sb_jp_journal_max_batch(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
+#define set_sb_jp_journal_max_batch(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
+#define sb_jp_jourmal_max_commit_age(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
+#define set_sb_jp_journal_max_commit_age(sbp,v) \
+              ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
+
+#define sb_blocksize(sbp)          (le16_to_cpu((sbp)->s_v1.s_blocksize))
+#define set_sb_blocksize(sbp,v)    ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
+#define sb_oid_maxsize(sbp)        (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
+#define set_sb_oid_maxsize(sbp,v)  ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
+#define sb_oid_cursize(sbp)        (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
+#define set_sb_oid_cursize(sbp,v)  ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
+#define sb_umount_state(sbp)       (le16_to_cpu((sbp)->s_v1.s_umount_state))
+#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
+#define sb_fs_state(sbp)           (le16_to_cpu((sbp)->s_v1.s_fs_state))
+#define set_sb_fs_state(sbp,v)     ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
+#define sb_hash_function_code(sbp) \
+              (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
+#define set_sb_hash_function_code(sbp,v) \
+              ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
+#define sb_tree_height(sbp)        (le16_to_cpu((sbp)->s_v1.s_tree_height))
+#define set_sb_tree_height(sbp,v)  ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
+#define sb_bmap_nr(sbp)            (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
+#define set_sb_bmap_nr(sbp,v)      ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
+#define sb_version(sbp)            (le16_to_cpu((sbp)->s_v1.s_version))
+#define set_sb_version(sbp,v)      ((sbp)->s_v1.s_version = cpu_to_le16(v))
+
+#define sb_mnt_count(sbp)	   (le16_to_cpu((sbp)->s_mnt_count))
+#define set_sb_mnt_count(sbp, v)   ((sbp)->s_mnt_count = cpu_to_le16(v))
+
+#define sb_reserved_for_journal(sbp) \
+              (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
+#define set_sb_reserved_for_journal(sbp,v) \
+              ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
+
+/* LOGGING -- */
+
+/* These all interelate for performance.
+**
+** If the journal block count is smaller than n transactions, you lose speed.
+** I don't know what n is yet, I'm guessing 8-16.
+**
+** typical transaction size depends on the application, how often fsync is
+** called, and how many metadata blocks you dirty in a 30 second period.
+** The more small files (<16k) you use, the larger your transactions will
+** be.
+**
+** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
+** to wrap, which slows things down.  If you need high speed meta data updates, the journal should be big enough
+** to prevent wrapping before dirty meta blocks get to disk.
+**
+** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
+** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
+**
+** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
+**
+*/
+
+/* don't mess with these for a while */
+				/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
+#define JOURNAL_BLOCK_SIZE  4096	/* BUG gotta get rid of this */
+#define JOURNAL_MAX_CNODE   1500	/* max cnodes to allocate. */
+#define JOURNAL_HASH_SIZE 8192
+#define JOURNAL_NUM_BITMAPS 5	/* number of copies of the bitmaps to have floating.  Must be >= 2 */
+
+/* One of these for every block in every transaction
+** Each one is in two hash tables.  First, a hash of the current transaction, and after journal_end, a
+** hash of all the in memory transactions.
+** next and prev are used by the current transaction (journal_hash).
+** hnext and hprev are used by journal_list_hash.  If a block is in more than one transaction, the journal_list_hash
+** links it in multiple times.  This allows flush_journal_list to remove just the cnode belonging
+** to a given transaction.
+*/
+struct reiserfs_journal_cnode {
+	struct buffer_head *bh;	/* real buffer head */
+	struct super_block *sb;	/* dev of real buffer head */
+	__u32 blocknr;		/* block number of real buffer head, == 0 when buffer on disk */
+	unsigned long state;
+	struct reiserfs_journal_list *jlist;	/* journal list this cnode lives in */
+	struct reiserfs_journal_cnode *next;	/* next in transaction list */
+	struct reiserfs_journal_cnode *prev;	/* prev in transaction list */
+	struct reiserfs_journal_cnode *hprev;	/* prev in hash list */
+	struct reiserfs_journal_cnode *hnext;	/* next in hash list */
+};
+
+struct reiserfs_bitmap_node {
+	int id;
+	char *data;
+	struct list_head list;
+};
+
+struct reiserfs_list_bitmap {
+	struct reiserfs_journal_list *journal_list;
+	struct reiserfs_bitmap_node **bitmaps;
+};
+
+/*
+** one of these for each transaction.  The most important part here is the j_realblock.
+** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
+** real buffer heads dirty once all the commits hit the disk,
+** and to make sure every real block in a transaction is on disk before allowing the log area
+** to be overwritten */
+struct reiserfs_journal_list {
+	unsigned long j_start;
+	unsigned long j_state;
+	unsigned long j_len;
+	atomic_t j_nonzerolen;
+	atomic_t j_commit_left;
+	atomic_t j_older_commits_done;	/* all commits older than this on disk */
+	struct mutex j_commit_mutex;
+	unsigned int j_trans_id;
+	time_t j_timestamp;
+	struct reiserfs_list_bitmap *j_list_bitmap;
+	struct buffer_head *j_commit_bh;	/* commit buffer head */
+	struct reiserfs_journal_cnode *j_realblock;
+	struct reiserfs_journal_cnode *j_freedlist;	/* list of buffers that were freed during this trans.  free each of these on flush */
+	/* time ordered list of all active transactions */
+	struct list_head j_list;
+
+	/* time ordered list of all transactions we haven't tried to flush yet */
+	struct list_head j_working_list;
+
+	/* list of tail conversion targets in need of flush before commit */
+	struct list_head j_tail_bh_list;
+	/* list of data=ordered buffers in need of flush before commit */
+	struct list_head j_bh_list;
+	int j_refcount;
+};
+
+struct reiserfs_journal {
+	struct buffer_head **j_ap_blocks;	/* journal blocks on disk */
+	struct reiserfs_journal_cnode *j_last;	/* newest journal block */
+	struct reiserfs_journal_cnode *j_first;	/*  oldest journal block.  start here for traverse */
+
+	struct block_device *j_dev_bd;
+	fmode_t j_dev_mode;
+	int j_1st_reserved_block;	/* first block on s_dev of reserved area journal */
+
+	unsigned long j_state;
+	unsigned int j_trans_id;
+	unsigned long j_mount_id;
+	unsigned long j_start;	/* start of current waiting commit (index into j_ap_blocks) */
+	unsigned long j_len;	/* length of current waiting commit */
+	unsigned long j_len_alloc;	/* number of buffers requested by journal_begin() */
+	atomic_t j_wcount;	/* count of writers for current commit */
+	unsigned long j_bcount;	/* batch count. allows turning X transactions into 1 */
+	unsigned long j_first_unflushed_offset;	/* first unflushed transactions offset */
+	unsigned j_last_flush_trans_id;	/* last fully flushed journal timestamp */
+	struct buffer_head *j_header_bh;
+
+	time_t j_trans_start_time;	/* time this transaction started */
+	struct mutex j_mutex;
+	struct mutex j_flush_mutex;
+	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
+	atomic_t j_jlock;	/* lock for j_join_wait */
+	int j_list_bitmap_index;	/* number of next list bitmap to use */
+	int j_must_wait;	/* no more journal begins allowed. MUST sleep on j_join_wait */
+	int j_next_full_flush;	/* next journal_end will flush all journal list */
+	int j_next_async_flush;	/* next journal_end will flush all async commits */
+
+	int j_cnode_used;	/* number of cnodes on the used list */
+	int j_cnode_free;	/* number of cnodes on the free list */
+
+	unsigned int j_trans_max;	/* max number of blocks in a transaction.  */
+	unsigned int j_max_batch;	/* max number of blocks to batch into a trans */
+	unsigned int j_max_commit_age;	/* in seconds, how old can an async commit be */
+	unsigned int j_max_trans_age;	/* in seconds, how old can a transaction be */
+	unsigned int j_default_max_commit_age;	/* the default for the max commit age */
+
+	struct reiserfs_journal_cnode *j_cnode_free_list;
+	struct reiserfs_journal_cnode *j_cnode_free_orig;	/* orig pointer returned from vmalloc */
+
+	struct reiserfs_journal_list *j_current_jl;
+	int j_free_bitmap_nodes;
+	int j_used_bitmap_nodes;
+
+	int j_num_lists;	/* total number of active transactions */
+	int j_num_work_lists;	/* number that need attention from kreiserfsd */
+
+	/* debugging to make sure things are flushed in order */
+	unsigned int j_last_flush_id;
+
+	/* debugging to make sure things are committed in order */
+	unsigned int j_last_commit_id;
+
+	struct list_head j_bitmap_nodes;
+	struct list_head j_dirty_buffers;
+	spinlock_t j_dirty_buffers_lock;	/* protects j_dirty_buffers */
+
+	/* list of all active transactions */
+	struct list_head j_journal_list;
+	/* lists that haven't been touched by writeback attempts */
+	struct list_head j_working_list;
+
+	struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];	/* array of bitmaps to record the deleted blocks */
+	struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];	/* hash table for real buffer heads in current trans */
+	struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];	/* hash table for all the real buffer heads in all
+										   the transactions */
+	struct list_head j_prealloc_list;	/* list of inodes which have preallocated blocks */
+	int j_persistent_trans;
+	unsigned long j_max_trans_size;
+	unsigned long j_max_batch_size;
+
+	int j_errno;
+
+	/* when flushing ordered buffers, throttle new ordered writers */
+	struct delayed_work j_work;
+	struct super_block *j_work_sb;
+	atomic_t j_async_throttle;
+};
+
+enum journal_state_bits {
+	J_WRITERS_BLOCKED = 1,	/* set when new writers not allowed */
+	J_WRITERS_QUEUED,	/* set when log is full due to too many writers */
+	J_ABORTED,		/* set when log is aborted */
+};
+
+#define JOURNAL_DESC_MAGIC "ReIsErLB"	/* ick.  magic string to find desc blocks in the journal */
+
+typedef __u32(*hashf_t) (const signed char *, int);
+
+struct reiserfs_bitmap_info {
+	__u32 free_count;
+};
+
+struct proc_dir_entry;
+
+#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
+typedef unsigned long int stat_cnt_t;
+typedef struct reiserfs_proc_info_data {
+	spinlock_t lock;
+	int exiting;
+	int max_hash_collisions;
+
+	stat_cnt_t breads;
+	stat_cnt_t bread_miss;
+	stat_cnt_t search_by_key;
+	stat_cnt_t search_by_key_fs_changed;
+	stat_cnt_t search_by_key_restarted;
+
+	stat_cnt_t insert_item_restarted;
+	stat_cnt_t paste_into_item_restarted;
+	stat_cnt_t cut_from_item_restarted;
+	stat_cnt_t delete_solid_item_restarted;
+	stat_cnt_t delete_item_restarted;
+
+	stat_cnt_t leaked_oid;
+	stat_cnt_t leaves_removable;
+
+	/* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
+	stat_cnt_t balance_at[5];	/* XXX */
+	/* sbk == search_by_key */
+	stat_cnt_t sbk_read_at[5];	/* XXX */
+	stat_cnt_t sbk_fs_changed[5];
+	stat_cnt_t sbk_restarted[5];
+	stat_cnt_t items_at[5];	/* XXX */
+	stat_cnt_t free_at[5];	/* XXX */
+	stat_cnt_t can_node_be_removed[5];	/* XXX */
+	long int lnum[5];	/* XXX */
+	long int rnum[5];	/* XXX */
+	long int lbytes[5];	/* XXX */
+	long int rbytes[5];	/* XXX */
+	stat_cnt_t get_neighbors[5];
+	stat_cnt_t get_neighbors_restart[5];
+	stat_cnt_t need_l_neighbor[5];
+	stat_cnt_t need_r_neighbor[5];
+
+	stat_cnt_t free_block;
+	struct __scan_bitmap_stats {
+		stat_cnt_t call;
+		stat_cnt_t wait;
+		stat_cnt_t bmap;
+		stat_cnt_t retry;
+		stat_cnt_t in_journal_hint;
+		stat_cnt_t in_journal_nohint;
+		stat_cnt_t stolen;
+	} scan_bitmap;
+	struct __journal_stats {
+		stat_cnt_t in_journal;
+		stat_cnt_t in_journal_bitmap;
+		stat_cnt_t in_journal_reusable;
+		stat_cnt_t lock_journal;
+		stat_cnt_t lock_journal_wait;
+		stat_cnt_t journal_being;
+		stat_cnt_t journal_relock_writers;
+		stat_cnt_t journal_relock_wcount;
+		stat_cnt_t mark_dirty;
+		stat_cnt_t mark_dirty_already;
+		stat_cnt_t mark_dirty_notjournal;
+		stat_cnt_t restore_prepared;
+		stat_cnt_t prepare;
+		stat_cnt_t prepare_retry;
+	} journal;
+} reiserfs_proc_info_data_t;
+#else
+typedef struct reiserfs_proc_info_data {
+} reiserfs_proc_info_data_t;
+#endif
+
+/* reiserfs union of in-core super block data */
+struct reiserfs_sb_info {
+	struct buffer_head *s_sbh;	/* Buffer containing the super block */
+	/* both the comment and the choice of
+	   name are unclear for s_rs -Hans */
+	struct reiserfs_super_block *s_rs;	/* Pointer to the super block in the buffer */
+	struct reiserfs_bitmap_info *s_ap_bitmap;
+	struct reiserfs_journal *s_journal;	/* pointer to journal information */
+	unsigned short s_mount_state;	/* reiserfs state (valid, invalid) */
+
+	/* Serialize writers access, replace the old bkl */
+	struct mutex lock;
+	/* Owner of the lock (can be recursive) */
+	struct task_struct *lock_owner;
+	/* Depth of the lock, start from -1 like the bkl */
+	int lock_depth;
+
+	/* Comment? -Hans */
+	void (*end_io_handler) (struct buffer_head *, int);
+	hashf_t s_hash_function;	/* pointer to function which is used
+					   to sort names in directory. Set on
+					   mount */
+	unsigned long s_mount_opt;	/* reiserfs's mount options are set
+					   here (currently - NOTAIL, NOLOG,
+					   REPLAYONLY) */
+
+	struct {		/* This is a structure that describes block allocator options */
+		unsigned long bits;	/* Bitfield for enable/disable kind of options */
+		unsigned long large_file_size;	/* size started from which we consider file to be a large one(in blocks) */
+		int border;	/* percentage of disk, border takes */
+		int preallocmin;	/* Minimal file size (in blocks) starting from which we do preallocations */
+		int preallocsize;	/* Number of blocks we try to prealloc when file
+					   reaches preallocmin size (in blocks) or
+					   prealloc_list is empty. */
+	} s_alloc_options;
+
+	/* Comment? -Hans */
+	wait_queue_head_t s_wait;
+	/* To be obsoleted soon by per buffer seals.. -Hans */
+	atomic_t s_generation_counter;	// increased by one every time the
+	// tree gets re-balanced
+	unsigned long s_properties;	/* File system properties. Currently holds
+					   on-disk FS format */
+
+	/* session statistics */
+	int s_disk_reads;
+	int s_disk_writes;
+	int s_fix_nodes;
+	int s_do_balance;
+	int s_unneeded_left_neighbor;
+	int s_good_search_by_key_reada;
+	int s_bmaps;
+	int s_bmaps_without_search;
+	int s_direct2indirect;
+	int s_indirect2direct;
+	/* set up when it's ok for reiserfs_read_inode2() to read from
+	   disk inode with nlink==0. Currently this is only used during
+	   finish_unfinished() processing at mount time */
+	int s_is_unlinked_ok;
+	reiserfs_proc_info_data_t s_proc_info_data;
+	struct proc_dir_entry *procdir;
+	int reserved_blocks;	/* amount of blocks reserved for further allocations */
+	spinlock_t bitmap_lock;	/* this lock on now only used to protect reserved_blocks variable */
+	struct dentry *priv_root;	/* root of /.reiserfs_priv */
+	struct dentry *xattr_root;	/* root of /.reiserfs_priv/xattrs */
+	int j_errno;
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];
+	int s_jquota_fmt;
+#endif
+	char *s_jdev;		/* Stored jdev for mount option showing */
+#ifdef CONFIG_REISERFS_CHECK
+
+	struct tree_balance *cur_tb;	/*
+					 * Detects whether more than one
+					 * copy of tb exists per superblock
+					 * as a means of checking whether
+					 * do_balance is executing concurrently
+					 * against another tree reader/writer
+					 * on a same mount point.
+					 */
+#endif
+};
+
+/* Definitions of reiserfs on-disk properties: */
+#define REISERFS_3_5 0
+#define REISERFS_3_6 1
+#define REISERFS_OLD_FORMAT 2
+
+enum reiserfs_mount_options {
+/* Mount options */
+	REISERFS_LARGETAIL,	/* large tails will be created in a session */
+	REISERFS_SMALLTAIL,	/* small (for files less than block size) tails will be created in a session */
+	REPLAYONLY,		/* replay journal and return 0. Use by fsck */
+	REISERFS_CONVERT,	/* -o conv: causes conversion of old
+				   format super block to the new
+				   format. If not specified - old
+				   partition will be dealt with in a
+				   manner of 3.5.x */
+
+/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
+** reiserfs disks from 3.5.19 or earlier.  99% of the time, this option
+** is not required.  If the normal autodection code can't determine which
+** hash to use (because both hashes had the same value for a file)
+** use this option to force a specific hash.  It won't allow you to override
+** the existing hash on the FS, so if you have a tea hash disk, and mount
+** with -o hash=rupasov, the mount will fail.
+*/
+	FORCE_TEA_HASH,		/* try to force tea hash on mount */
+	FORCE_RUPASOV_HASH,	/* try to force rupasov hash on mount */
+	FORCE_R5_HASH,		/* try to force rupasov hash on mount */
+	FORCE_HASH_DETECT,	/* try to detect hash function on mount */
+
+	REISERFS_DATA_LOG,
+	REISERFS_DATA_ORDERED,
+	REISERFS_DATA_WRITEBACK,
+
+/* used for testing experimental features, makes benchmarking new
+   features with and without more convenient, should never be used by
+   users in any code shipped to users (ideally) */
+
+	REISERFS_NO_BORDER,
+	REISERFS_NO_UNHASHED_RELOCATION,
+	REISERFS_HASHED_RELOCATION,
+	REISERFS_ATTRS,
+	REISERFS_XATTRS_USER,
+	REISERFS_POSIXACL,
+	REISERFS_EXPOSE_PRIVROOT,
+	REISERFS_BARRIER_NONE,
+	REISERFS_BARRIER_FLUSH,
+
+	/* Actions on error */
+	REISERFS_ERROR_PANIC,
+	REISERFS_ERROR_RO,
+	REISERFS_ERROR_CONTINUE,
+
+	REISERFS_USRQUOTA,	/* User quota option specified */
+	REISERFS_GRPQUOTA,	/* Group quota option specified */
+
+	REISERFS_TEST1,
+	REISERFS_TEST2,
+	REISERFS_TEST3,
+	REISERFS_TEST4,
+	REISERFS_UNSUPPORTED_OPT,
+};
+
+#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
+#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
+#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
+#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
+#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
+#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
+#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
+
+#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
+#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
+#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
+#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
+#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
+#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
+#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
+#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
+#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
+#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
+#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
+#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
+#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
+#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
+
+#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
+#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
+
+void reiserfs_file_buffer(struct buffer_head *bh, int list);
+extern struct file_system_type reiserfs_fs_type;
+int reiserfs_resize(struct super_block *, unsigned long);
+
+#define CARRY_ON                0
+#define SCHEDULE_OCCURRED       1
+
+#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
+#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
+#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
+#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
+#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
+
+#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
+
+/* A safe version of the "bdevname", which returns the "s_id" field of
+ * a superblock or else "Null superblock" if the super block is NULL.
+ */
+static inline char *reiserfs_bdevname(struct super_block *s)
+{
+	return (s == NULL) ? "Null superblock" : s->s_id;
+}
+
+#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
+static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
+						*journal)
+{
+	return test_bit(J_ABORTED, &journal->j_state);
+}
+
+/*
+ * Locking primitives. The write lock is a per superblock
+ * special mutex that has properties close to the Big Kernel Lock
+ * which was used in the previous locking scheme.
+ */
+void reiserfs_write_lock(struct super_block *s);
+void reiserfs_write_unlock(struct super_block *s);
+int reiserfs_write_lock_once(struct super_block *s);
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ * - The inode mutex
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+			       struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	mutex_lock(m);
+	reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_lock_check_recursive(s);
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
+/*
+ * When we schedule, we usually want to also release the write lock,
+ * according to the previous bkl based locking scheme of reiserfs.
+ */
+static inline void reiserfs_cond_resched(struct super_block *s)
+{
+	if (need_resched()) {
+		reiserfs_write_unlock(s);
+		schedule();
+		reiserfs_write_lock(s);
+	}
+}
+
+struct fid;
+
+/* in reading the #defines, it may help to understand that they employ
+   the following abbreviations:
+
+   B = Buffer
+   I = Item header
+   H = Height within the tree (should be changed to LEV)
+   N = Number of the item in the node
+   STAT = stat data
+   DEH = Directory Entry Header
+   EC = Entry Count
+   E = Entry number
+   UL = Unsigned Long
+   BLKH = BLocK Header
+   UNFM = UNForMatted node
+   DC = Disk Child
+   P = Path
+
+   These #defines are named by concatenating these abbreviations,
+   where first comes the arguments, and last comes the return value,
+   of the macro.
+
+*/
+
+#define USE_INODE_GENERATION_COUNTER
+
+#define REISERFS_PREALLOCATE
+#define DISPLACE_NEW_PACKING_LOCALITIES
+#define PREALLOCATION_SIZE 9
+
+/* n must be power of 2 */
+#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
+
+// to be ok for alpha and others we have to align structures to 8 byte
+// boundary.
+// FIXME: do not change 4 by anything else: there is code which relies on that
+#define ROUND_UP(x) _ROUND_UP(x,8LL)
+
+/* debug levels.  Right now, CONFIG_REISERFS_CHECK means print all debug
+** messages.
+*/
+#define REISERFS_DEBUG_CODE 5	/* extra messages to help find/debug errors */
+
+void __reiserfs_warning(struct super_block *s, const char *id,
+			 const char *func, const char *fmt, ...);
+#define reiserfs_warning(s, id, fmt, args...) \
+	 __reiserfs_warning(s, id, __func__, fmt, ##args)
+/* assertions handling */
+
+/** always check a condition and panic if it's false. */
+#define __RASSERT(cond, scond, format, args...)			\
+do {									\
+	if (!(cond))							\
+		reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
+			       __FILE__ ":%i:%s: " format "\n",		\
+			       in_interrupt() ? -1 : task_pid_nr(current), \
+			       __LINE__, __func__ , ##args);		\
+} while (0)
+
+#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
+
+#if defined( CONFIG_REISERFS_CHECK )
+#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
+#else
+#define RFALSE( cond, format, args... ) do {;} while( 0 )
+#endif
+
+#define CONSTF __attribute_const__
+/*
+ * Disk Data Structures
+ */
+
+/***************************************************************************/
+/*                             SUPER BLOCK                                 */
+/***************************************************************************/
+
+/*
+ * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
+ * the version in RAM is part of a larger structure containing fields never written to disk.
+ */
+#define UNSET_HASH 0		// read_super will guess about, what hash names
+		     // in directories were sorted with
+#define TEA_HASH  1
+#define YURA_HASH 2
+#define R5_HASH   3
+#define DEFAULT_HASH R5_HASH
+
+struct journal_params {
+	__le32 jp_journal_1st_block;	/* where does journal start from on its
+					 * device */
+	__le32 jp_journal_dev;	/* journal device st_rdev */
+	__le32 jp_journal_size;	/* size of the journal */
+	__le32 jp_journal_trans_max;	/* max number of blocks in a transaction. */
+	__le32 jp_journal_magic;	/* random value made on fs creation (this
+					 * was sb_journal_block_count) */
+	__le32 jp_journal_max_batch;	/* max number of blocks to batch into a
+					 * trans */
+	__le32 jp_journal_max_commit_age;	/* in seconds, how old can an async
+						 * commit be */
+	__le32 jp_journal_max_trans_age;	/* in seconds, how old can a transaction
+						 * be */
+};
+
+/* this is the super from 3.5.X, where X >= 10 */
+struct reiserfs_super_block_v1 {
+	__le32 s_block_count;	/* blocks count         */
+	__le32 s_free_blocks;	/* free blocks count    */
+	__le32 s_root_block;	/* root block number    */
+	struct journal_params s_journal;
+	__le16 s_blocksize;	/* block size */
+	__le16 s_oid_maxsize;	/* max size of object id array, see
+				 * get_objectid() commentary  */
+	__le16 s_oid_cursize;	/* current size of object id array */
+	__le16 s_umount_state;	/* this is set to 1 when filesystem was
+				 * umounted, to 2 - when not */
+	char s_magic[10];	/* reiserfs magic string indicates that
+				 * file system is reiserfs:
+				 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
+	__le16 s_fs_state;	/* it is set to used by fsck to mark which
+				 * phase of rebuilding is done */
+	__le32 s_hash_function_code;	/* indicate, what hash function is being use
+					 * to sort names in a directory*/
+	__le16 s_tree_height;	/* height of disk tree */
+	__le16 s_bmap_nr;	/* amount of bitmap blocks needed to address
+				 * each block of file system */
+	__le16 s_version;	/* this field is only reliable on filesystem
+				 * with non-standard journal */
+	__le16 s_reserved_for_journal;	/* size in blocks of journal area on main
+					 * device, we need to keep after
+					 * making fs with non-standard journal */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
+
+/* this is the on disk super block */
+struct reiserfs_super_block {
+	struct reiserfs_super_block_v1 s_v1;
+	__le32 s_inode_generation;
+	__le32 s_flags;		/* Right now used only by inode-attributes, if enabled */
+	unsigned char s_uuid[16];	/* filesystem unique identifier */
+	unsigned char s_label[16];	/* filesystem volume label */
+	__le16 s_mnt_count;		/* Count of mounts since last fsck */
+	__le16 s_max_mnt_count;		/* Maximum mounts before check */
+	__le32 s_lastcheck;		/* Timestamp of last fsck */
+	__le32 s_check_interval;	/* Interval between checks */
+	char s_unused[76];	/* zero filled by mkreiserfs and
+				 * reiserfs_convert_objectid_map_v1()
+				 * so any additions must be updated
+				 * there as well. */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE (sizeof(struct reiserfs_super_block))
+
+#define REISERFS_VERSION_1 0
+#define REISERFS_VERSION_2 2
+
+// on-disk super block fields converted to cpu form
+#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
+#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
+#define SB_BLOCKSIZE(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
+#define SB_BLOCK_COUNT(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
+#define SB_FREE_BLOCKS(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
+#define SB_REISERFS_MAGIC(s) \
+        (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
+#define SB_ROOT_BLOCK(s) \
+        le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
+#define SB_TREE_HEIGHT(s) \
+        le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
+#define SB_REISERFS_STATE(s) \
+        le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
+#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
+#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
+
+#define PUT_SB_BLOCK_COUNT(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
+#define PUT_SB_FREE_BLOCKS(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
+#define PUT_SB_ROOT_BLOCK(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
+#define PUT_SB_TREE_HEIGHT(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
+#define PUT_SB_REISERFS_STATE(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
+#define PUT_SB_VERSION(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
+#define PUT_SB_BMAP_NR(s, val) \
+   do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
+
+#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
+#define SB_ONDISK_JOURNAL_SIZE(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
+#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
+#define SB_ONDISK_JOURNAL_DEVICE(s) \
+         le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
+#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
+         le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
+
+#define is_block_in_log_or_reserved_area(s, block) \
+         block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
+         && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) +  \
+         ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
+         SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
+
+int is_reiserfs_3_5(struct reiserfs_super_block *rs);
+int is_reiserfs_3_6(struct reiserfs_super_block *rs);
+int is_reiserfs_jr(struct reiserfs_super_block *rs);
+
+/* ReiserFS leaves the first 64k unused, so that partition labels have
+   enough space.  If someone wants to write a fancy bootloader that
+   needs more than 64k, let us know, and this will be increased in size.
+   This number must be larger than than the largest block size on any
+   platform, or code will break.  -Hans */
+#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
+#define REISERFS_FIRST_BLOCK unused_define
+#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
+
+/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
+#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
+
+/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
+#define CARRY_ON      0
+#define REPEAT_SEARCH -1
+#define IO_ERROR      -2
+#define NO_DISK_SPACE -3
+#define NO_BALANCING_NEEDED  (-4)
+#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
+#define QUOTA_EXCEEDED -6
+
+typedef __u32 b_blocknr_t;
+typedef __le32 unp_t;
+
+struct unfm_nodeinfo {
+	unp_t unfm_nodenum;
+	unsigned short unfm_freespace;
+};
+
+/* there are two formats of keys: 3.5 and 3.6
+ */
+#define KEY_FORMAT_3_5 0
+#define KEY_FORMAT_3_6 1
+
+/* there are two stat datas */
+#define STAT_DATA_V1 0
+#define STAT_DATA_V2 1
+
+static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode)
+{
+	return container_of(inode, struct reiserfs_inode_info, vfs_inode);
+}
+
+static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems. */
+static inline __u32 reiserfs_bmap_count(struct super_block *sb)
+{
+	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
+}
+
+static inline int bmap_would_wrap(unsigned bmap_nr)
+{
+	return bmap_nr > ((1LL << 16) - 1);
+}
+
+/** this says about version of key of all items (but stat data) the
+    object consists of */
+#define get_inode_item_key_version( inode )                                    \
+    ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
+
+#define set_inode_item_key_version( inode, version )                           \
+         ({ if((version)==KEY_FORMAT_3_6)                                      \
+                REISERFS_I(inode)->i_flags |= i_item_key_version_mask;      \
+            else                                                               \
+                REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; })
+
+#define get_inode_sd_version(inode)                                            \
+    ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1)
+
+#define set_inode_sd_version(inode, version)                                   \
+         ({ if((version)==STAT_DATA_V2)                                        \
+                REISERFS_I(inode)->i_flags |= i_stat_data_version_mask;     \
+            else                                                               \
+                REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
+
+/* This is an aggressive tail suppression policy, I am hoping it
+   improves our benchmarks. The principle behind it is that percentage
+   space saving is what matters, not absolute space saving.  This is
+   non-intuitive, but it helps to understand it if you consider that the
+   cost to access 4 blocks is not much more than the cost to access 1
+   block, if you have to do a seek and rotate.  A tail risks a
+   non-linear disk access that is significant as a percentage of total
+   time cost for a 4 block file and saves an amount of space that is
+   less significant as a percentage of space, or so goes the hypothesis.
+   -Hans */
+#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
+(\
+  (!(n_tail_size)) || \
+  (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
+   ( (n_file_size) >= (n_block_size) * 4 ) || \
+   ( ( (n_file_size) >= (n_block_size) * 3 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
+   ( ( (n_file_size) >= (n_block_size) * 2 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
+   ( ( (n_file_size) >= (n_block_size) ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
+)
+
+/* Another strategy for tails, this one means only create a tail if all the
+   file would fit into one DIRECT item.
+   Primary intention for this one is to increase performance by decreasing
+   seeking.
+*/
+#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
+(\
+  (!(n_tail_size)) || \
+  (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
+)
+
+/*
+ * values for s_umount_state field
+ */
+#define REISERFS_VALID_FS    1
+#define REISERFS_ERROR_FS    2
+
+//
+// there are 5 item types currently
+//
+#define TYPE_STAT_DATA 0
+#define TYPE_INDIRECT 1
+#define TYPE_DIRECT 2
+#define TYPE_DIRENTRY 3
+#define TYPE_MAXTYPE 3
+#define TYPE_ANY 15		// FIXME: comment is required
+
+/***************************************************************************/
+/*                       KEY & ITEM HEAD                                   */
+/***************************************************************************/
+
+//
+// directories use this key as well as old files
+//
+struct offset_v1 {
+	__le32 k_offset;
+	__le32 k_uniqueness;
+} __attribute__ ((__packed__));
+
+struct offset_v2 {
+	__le64 v;
+} __attribute__ ((__packed__));
+
+static inline __u16 offset_v2_k_type(const struct offset_v2 *v2)
+{
+	__u8 type = le64_to_cpu(v2->v) >> 60;
+	return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY;
+}
+
+static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type)
+{
+	v2->v =
+	    (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60);
+}
+
+static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2)
+{
+	return le64_to_cpu(v2->v) & (~0ULL >> 4);
+}
+
+static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
+{
+	offset &= (~0ULL >> 4);
+	v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
+}
+
+/* Key of an item determines its location in the S+tree, and
+   is composed of 4 components */
+struct reiserfs_key {
+	__le32 k_dir_id;	/* packing locality: by default parent
+				   directory object id */
+	__le32 k_objectid;	/* object identifier */
+	union {
+		struct offset_v1 k_offset_v1;
+		struct offset_v2 k_offset_v2;
+	} __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+
+struct in_core_key {
+	__u32 k_dir_id;		/* packing locality: by default parent
+				   directory object id */
+	__u32 k_objectid;	/* object identifier */
+	__u64 k_offset;
+	__u8 k_type;
+};
+
+struct cpu_key {
+	struct in_core_key on_disk_key;
+	int version;
+	int key_length;		/* 3 in all cases but direct2indirect and
+				   indirect2direct conversion */
+};
+
+/* Our function for comparing keys can compare keys of different
+   lengths.  It takes as a parameter the length of the keys it is to
+   compare.  These defines are used in determining what is to be passed
+   to it as that parameter. */
+#define REISERFS_FULL_KEY_LEN     4
+#define REISERFS_SHORT_KEY_LEN    2
+
+/* The result of the key compare */
+#define FIRST_GREATER 1
+#define SECOND_GREATER -1
+#define KEYS_IDENTICAL 0
+#define KEY_FOUND 1
+#define KEY_NOT_FOUND 0
+
+#define KEY_SIZE (sizeof(struct reiserfs_key))
+#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
+
+/* return values for search_by_key and clones */
+#define ITEM_FOUND 1
+#define ITEM_NOT_FOUND 0
+#define ENTRY_FOUND 1
+#define ENTRY_NOT_FOUND 0
+#define DIRECTORY_NOT_FOUND -1
+#define REGULAR_FILE_FOUND -2
+#define DIRECTORY_FOUND -3
+#define BYTE_FOUND 1
+#define BYTE_NOT_FOUND 0
+#define FILE_NOT_FOUND -1
+
+#define POSITION_FOUND 1
+#define POSITION_NOT_FOUND 0
+
+// return values for reiserfs_find_entry and search_by_entry_key
+#define NAME_FOUND 1
+#define NAME_NOT_FOUND 0
+#define GOTO_PREVIOUS_ITEM 2
+#define NAME_FOUND_INVISIBLE 3
+
+/*  Everything in the filesystem is stored as a set of items.  The
+    item head contains the key of the item, its free space (for
+    indirect items) and specifies the location of the item itself
+    within the block.  */
+
+struct item_head {
+	/* Everything in the tree is found by searching for it based on
+	 * its key.*/
+	struct reiserfs_key ih_key;
+	union {
+		/* The free space in the last unformatted node of an
+		   indirect item if this is an indirect item.  This
+		   equals 0xFFFF iff this is a direct item or stat data
+		   item. Note that the key, not this field, is used to
+		   determine the item type, and thus which field this
+		   union contains. */
+		__le16 ih_free_space_reserved;
+		/* Iff this is a directory item, this field equals the
+		   number of directory entries in the directory item. */
+		__le16 ih_entry_count;
+	} __attribute__ ((__packed__)) u;
+	__le16 ih_item_len;	/* total size of the item body */
+	__le16 ih_item_location;	/* an offset to the item body
+					 * within the block */
+	__le16 ih_version;	/* 0 for all old items, 2 for new
+				   ones. Highest bit is set by fsck
+				   temporary, cleaned after all
+				   done */
+} __attribute__ ((__packed__));
+/* size of item header     */
+#define IH_SIZE (sizeof(struct item_head))
+
+#define ih_free_space(ih)            le16_to_cpu((ih)->u.ih_free_space_reserved)
+#define ih_version(ih)               le16_to_cpu((ih)->ih_version)
+#define ih_entry_count(ih)           le16_to_cpu((ih)->u.ih_entry_count)
+#define ih_location(ih)              le16_to_cpu((ih)->ih_item_location)
+#define ih_item_len(ih)              le16_to_cpu((ih)->ih_item_len)
+
+#define put_ih_free_space(ih, val)   do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
+#define put_ih_version(ih, val)      do { (ih)->ih_version = cpu_to_le16(val); } while (0)
+#define put_ih_entry_count(ih, val)  do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
+#define put_ih_location(ih, val)     do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
+#define put_ih_item_len(ih, val)     do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
+
+#define unreachable_item(ih) (ih_version(ih) & (1 << 15))
+
+#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
+#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
+
+/* these operate on indirect items, where you've got an array of ints
+** at a possibly unaligned location.  These are a noop on ia32
+** 
+** p is the array of __u32, i is the index into the array, v is the value
+** to store there.
+*/
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
+
+//
+// in old version uniqueness field shows key type
+//
+#define V1_SD_UNIQUENESS 0
+#define V1_INDIRECT_UNIQUENESS 0xfffffffe
+#define V1_DIRECT_UNIQUENESS 0xffffffff
+#define V1_DIRENTRY_UNIQUENESS 500
+#define V1_ANY_UNIQUENESS 555	// FIXME: comment is required
+
+//
+// here are conversion routines
+//
+static inline int uniqueness2type(__u32 uniqueness) CONSTF;
+static inline int uniqueness2type(__u32 uniqueness)
+{
+	switch ((int)uniqueness) {
+	case V1_SD_UNIQUENESS:
+		return TYPE_STAT_DATA;
+	case V1_INDIRECT_UNIQUENESS:
+		return TYPE_INDIRECT;
+	case V1_DIRECT_UNIQUENESS:
+		return TYPE_DIRECT;
+	case V1_DIRENTRY_UNIQUENESS:
+		return TYPE_DIRENTRY;
+	case V1_ANY_UNIQUENESS:
+	default:
+		return TYPE_ANY;
+	}
+}
+
+static inline __u32 type2uniqueness(int type) CONSTF;
+static inline __u32 type2uniqueness(int type)
+{
+	switch (type) {
+	case TYPE_STAT_DATA:
+		return V1_SD_UNIQUENESS;
+	case TYPE_INDIRECT:
+		return V1_INDIRECT_UNIQUENESS;
+	case TYPE_DIRECT:
+		return V1_DIRECT_UNIQUENESS;
+	case TYPE_DIRENTRY:
+		return V1_DIRENTRY_UNIQUENESS;
+	case TYPE_ANY:
+	default:
+		return V1_ANY_UNIQUENESS;
+	}
+}
+
+//
+// key is pointer to on disk key which is stored in le, result is cpu,
+// there is no way to get version of object from key, so, provide
+// version to these defines
+//
+static inline loff_t le_key_k_offset(int version,
+				     const struct reiserfs_key *key)
+{
+	return (version == KEY_FORMAT_3_5) ?
+	    le32_to_cpu(key->u.k_offset_v1.k_offset) :
+	    offset_v2_k_offset(&(key->u.k_offset_v2));
+}
+
+static inline loff_t le_ih_k_offset(const struct item_head *ih)
+{
+	return le_key_k_offset(ih_version(ih), &(ih->ih_key));
+}
+
+static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
+{
+	return (version == KEY_FORMAT_3_5) ?
+	    uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) :
+	    offset_v2_k_type(&(key->u.k_offset_v2));
+}
+
+static inline loff_t le_ih_k_type(const struct item_head *ih)
+{
+	return le_key_k_type(ih_version(ih), &(ih->ih_key));
+}
+
+static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
+				       loff_t offset)
+{
+	(version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) :	/* jdm check */
+	    (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset));
+}
+
+static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
+{
+	set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
+}
+
+static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
+				     int type)
+{
+	(version == KEY_FORMAT_3_5) ?
+	    (void)(key->u.k_offset_v1.k_uniqueness =
+		   cpu_to_le32(type2uniqueness(type)))
+	    : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
+}
+
+static inline void set_le_ih_k_type(struct item_head *ih, int type)
+{
+	set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
+}
+
+static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRENTRY;
+}
+
+static inline int is_direct_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRECT;
+}
+
+static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_INDIRECT;
+}
+
+static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_STAT_DATA;
+}
+
+//
+// item header has version.
+//
+static inline int is_direntry_le_ih(struct item_head *ih)
+{
+	return is_direntry_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_direct_le_ih(struct item_head *ih)
+{
+	return is_direct_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_indirect_le_ih(struct item_head *ih)
+{
+	return is_indirect_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_statdata_le_ih(struct item_head *ih)
+{
+	return is_statdata_le_key(ih_version(ih), &ih->ih_key);
+}
+
+//
+// key is pointer to cpu key, result is cpu
+//
+static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
+{
+	return key->on_disk_key.k_offset;
+}
+
+static inline loff_t cpu_key_k_type(const struct cpu_key *key)
+{
+	return key->on_disk_key.k_type;
+}
+
+static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset)
+{
+	key->on_disk_key.k_offset = offset;
+}
+
+static inline void set_cpu_key_k_type(struct cpu_key *key, int type)
+{
+	key->on_disk_key.k_type = type;
+}
+
+static inline void cpu_key_k_offset_dec(struct cpu_key *key)
+{
+	key->on_disk_key.k_offset--;
+}
+
+#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
+#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
+#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
+#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
+
+/* are these used ? */
+#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
+#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
+#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
+#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
+
+#define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \
+    (!COMP_SHORT_KEYS(ih, key) && \
+	  I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize))
+
+/* maximal length of item */
+#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
+#define MIN_ITEM_LEN 1
+
+/* object identifier for root dir */
+#define REISERFS_ROOT_OBJECTID 2
+#define REISERFS_ROOT_PARENT_OBJECTID 1
+
+extern struct reiserfs_key root_key;
+
+/* 
+ * Picture represents a leaf of the S+tree
+ *  ______________________________________________________
+ * |      |  Array of     |                   |           |
+ * |Block |  Object-Item  |      F r e e      |  Objects- |
+ * | head |  Headers      |     S p a c e     |   Items   |
+ * |______|_______________|___________________|___________|
+ */
+
+/* Header of a disk block.  More precisely, header of a formatted leaf
+   or internal node, and not the header of an unformatted node. */
+struct block_head {
+	__le16 blk_level;	/* Level of a block in the tree. */
+	__le16 blk_nr_item;	/* Number of keys/items in a block. */
+	__le16 blk_free_space;	/* Block free space in bytes. */
+	__le16 blk_reserved;
+	/* dump this in v4/planA */
+	struct reiserfs_key blk_right_delim_key;	/* kept only for compatibility */
+};
+
+#define BLKH_SIZE                     (sizeof(struct block_head))
+#define blkh_level(p_blkh)            (le16_to_cpu((p_blkh)->blk_level))
+#define blkh_nr_item(p_blkh)          (le16_to_cpu((p_blkh)->blk_nr_item))
+#define blkh_free_space(p_blkh)       (le16_to_cpu((p_blkh)->blk_free_space))
+#define blkh_reserved(p_blkh)         (le16_to_cpu((p_blkh)->blk_reserved))
+#define set_blkh_level(p_blkh,val)    ((p_blkh)->blk_level = cpu_to_le16(val))
+#define set_blkh_nr_item(p_blkh,val)  ((p_blkh)->blk_nr_item = cpu_to_le16(val))
+#define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val))
+#define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val))
+#define blkh_right_delim_key(p_blkh)  ((p_blkh)->blk_right_delim_key)
+#define set_blkh_right_delim_key(p_blkh,val)  ((p_blkh)->blk_right_delim_key = val)
+
+/*
+ * values for blk_level field of the struct block_head
+ */
+
+#define FREE_LEVEL 0		/* when node gets removed from the tree its
+				   blk_level is set to FREE_LEVEL. It is then
+				   used to see whether the node is still in the
+				   tree */
+
+#define DISK_LEAF_NODE_LEVEL  1	/* Leaf node level. */
+
+/* Given the buffer head of a formatted node, resolve to the block head of that node. */
+#define B_BLK_HEAD(bh)			((struct block_head *)((bh)->b_data))
+/* Number of items that are in buffer. */
+#define B_NR_ITEMS(bh)			(blkh_nr_item(B_BLK_HEAD(bh)))
+#define B_LEVEL(bh)			(blkh_level(B_BLK_HEAD(bh)))
+#define B_FREE_SPACE(bh)		(blkh_free_space(B_BLK_HEAD(bh)))
+
+#define PUT_B_NR_ITEMS(bh, val)		do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0)
+#define PUT_B_LEVEL(bh, val)		do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0)
+#define PUT_B_FREE_SPACE(bh, val)	do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0)
+
+/* Get right delimiting key. -- little endian */
+#define B_PRIGHT_DELIM_KEY(bh)		(&(blk_right_delim_key(B_BLK_HEAD(bh))))
+
+/* Does the buffer contain a disk leaf. */
+#define B_IS_ITEMS_LEVEL(bh)		(B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL)
+
+/* Does the buffer contain a disk internal node */
+#define B_IS_KEYS_LEVEL(bh)      (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
+					    && B_LEVEL(bh) <= MAX_HEIGHT)
+
+/***************************************************************************/
+/*                             STAT DATA                                   */
+/***************************************************************************/
+
+//
+// old stat data is 32 bytes long. We are going to distinguish new one by
+// different size
+//
+struct stat_data_v1 {
+	__le16 sd_mode;		/* file type, permissions */
+	__le16 sd_nlink;	/* number of hard links */
+	__le16 sd_uid;		/* owner */
+	__le16 sd_gid;		/* group */
+	__le32 sd_size;		/* file size */
+	__le32 sd_atime;	/* time of last access */
+	__le32 sd_mtime;	/* time file was last modified  */
+	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+	union {
+		__le32 sd_rdev;
+		__le32 sd_blocks;	/* number of blocks file uses */
+	} __attribute__ ((__packed__)) u;
+	__le32 sd_first_direct_byte;	/* first byte of file which is stored
+					   in a direct item: except that if it
+					   equals 1 it is a symlink and if it
+					   equals ~(__u32)0 there is no
+					   direct item.  The existence of this
+					   field really grates on me. Let's
+					   replace it with a macro based on
+					   sd_size and our tail suppression
+					   policy.  Someday.  -Hans */
+} __attribute__ ((__packed__));
+
+#define SD_V1_SIZE              (sizeof(struct stat_data_v1))
+#define stat_data_v1(ih)        (ih_version (ih) == KEY_FORMAT_3_5)
+#define sd_v1_mode(sdp)         (le16_to_cpu((sdp)->sd_mode))
+#define set_sd_v1_mode(sdp,v)   ((sdp)->sd_mode = cpu_to_le16(v))
+#define sd_v1_nlink(sdp)        (le16_to_cpu((sdp)->sd_nlink))
+#define set_sd_v1_nlink(sdp,v)  ((sdp)->sd_nlink = cpu_to_le16(v))
+#define sd_v1_uid(sdp)          (le16_to_cpu((sdp)->sd_uid))
+#define set_sd_v1_uid(sdp,v)    ((sdp)->sd_uid = cpu_to_le16(v))
+#define sd_v1_gid(sdp)          (le16_to_cpu((sdp)->sd_gid))
+#define set_sd_v1_gid(sdp,v)    ((sdp)->sd_gid = cpu_to_le16(v))
+#define sd_v1_size(sdp)         (le32_to_cpu((sdp)->sd_size))
+#define set_sd_v1_size(sdp,v)   ((sdp)->sd_size = cpu_to_le32(v))
+#define sd_v1_atime(sdp)        (le32_to_cpu((sdp)->sd_atime))
+#define set_sd_v1_atime(sdp,v)  ((sdp)->sd_atime = cpu_to_le32(v))
+#define sd_v1_mtime(sdp)        (le32_to_cpu((sdp)->sd_mtime))
+#define set_sd_v1_mtime(sdp,v)  ((sdp)->sd_mtime = cpu_to_le32(v))
+#define sd_v1_ctime(sdp)        (le32_to_cpu((sdp)->sd_ctime))
+#define set_sd_v1_ctime(sdp,v)  ((sdp)->sd_ctime = cpu_to_le32(v))
+#define sd_v1_rdev(sdp)         (le32_to_cpu((sdp)->u.sd_rdev))
+#define set_sd_v1_rdev(sdp,v)   ((sdp)->u.sd_rdev = cpu_to_le32(v))
+#define sd_v1_blocks(sdp)       (le32_to_cpu((sdp)->u.sd_blocks))
+#define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v))
+#define sd_v1_first_direct_byte(sdp) \
+                                (le32_to_cpu((sdp)->sd_first_direct_byte))
+#define set_sd_v1_first_direct_byte(sdp,v) \
+                                ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
+
+/* inode flags stored in sd_attrs (nee sd_reserved) */
+
+/* we want common flags to have the same values as in ext2,
+   so chattr(1) will work without problems */
+#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define REISERFS_APPEND_FL    FS_APPEND_FL
+#define REISERFS_SYNC_FL      FS_SYNC_FL
+#define REISERFS_NOATIME_FL   FS_NOATIME_FL
+#define REISERFS_NODUMP_FL    FS_NODUMP_FL
+#define REISERFS_SECRM_FL     FS_SECRM_FL
+#define REISERFS_UNRM_FL      FS_UNRM_FL
+#define REISERFS_COMPR_FL     FS_COMPR_FL
+#define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
+
+/* persistent flags that file inherits from the parent directory */
+#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |	\
+				REISERFS_SYNC_FL |	\
+				REISERFS_NOATIME_FL |	\
+				REISERFS_NODUMP_FL |	\
+				REISERFS_SECRM_FL |	\
+				REISERFS_COMPR_FL |	\
+				REISERFS_NOTAIL_FL )
+
+/* Stat Data on disk (reiserfs version of UFS disk inode minus the
+   address blocks) */
+struct stat_data {
+	__le16 sd_mode;		/* file type, permissions */
+	__le16 sd_attrs;	/* persistent inode flags */
+	__le32 sd_nlink;	/* number of hard links */
+	__le64 sd_size;		/* file size */
+	__le32 sd_uid;		/* owner */
+	__le32 sd_gid;		/* group */
+	__le32 sd_atime;	/* time of last access */
+	__le32 sd_mtime;	/* time file was last modified  */
+	__le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+	__le32 sd_blocks;
+	union {
+		__le32 sd_rdev;
+		__le32 sd_generation;
+		//__le32 sd_first_direct_byte;
+		/* first byte of file which is stored in a
+		   direct item: except that if it equals 1
+		   it is a symlink and if it equals
+		   ~(__u32)0 there is no direct item.  The
+		   existence of this field really grates
+		   on me. Let's replace it with a macro
+		   based on sd_size and our tail
+		   suppression policy? */
+	} __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+//
+// this is 44 bytes long
+//
+#define SD_SIZE (sizeof(struct stat_data))
+#define SD_V2_SIZE              SD_SIZE
+#define stat_data_v2(ih)        (ih_version (ih) == KEY_FORMAT_3_6)
+#define sd_v2_mode(sdp)         (le16_to_cpu((sdp)->sd_mode))
+#define set_sd_v2_mode(sdp,v)   ((sdp)->sd_mode = cpu_to_le16(v))
+/* sd_reserved */
+/* set_sd_reserved */
+#define sd_v2_nlink(sdp)        (le32_to_cpu((sdp)->sd_nlink))
+#define set_sd_v2_nlink(sdp,v)  ((sdp)->sd_nlink = cpu_to_le32(v))
+#define sd_v2_size(sdp)         (le64_to_cpu((sdp)->sd_size))
+#define set_sd_v2_size(sdp,v)   ((sdp)->sd_size = cpu_to_le64(v))
+#define sd_v2_uid(sdp)          (le32_to_cpu((sdp)->sd_uid))
+#define set_sd_v2_uid(sdp,v)    ((sdp)->sd_uid = cpu_to_le32(v))
+#define sd_v2_gid(sdp)          (le32_to_cpu((sdp)->sd_gid))
+#define set_sd_v2_gid(sdp,v)    ((sdp)->sd_gid = cpu_to_le32(v))
+#define sd_v2_atime(sdp)        (le32_to_cpu((sdp)->sd_atime))
+#define set_sd_v2_atime(sdp,v)  ((sdp)->sd_atime = cpu_to_le32(v))
+#define sd_v2_mtime(sdp)        (le32_to_cpu((sdp)->sd_mtime))
+#define set_sd_v2_mtime(sdp,v)  ((sdp)->sd_mtime = cpu_to_le32(v))
+#define sd_v2_ctime(sdp)        (le32_to_cpu((sdp)->sd_ctime))
+#define set_sd_v2_ctime(sdp,v)  ((sdp)->sd_ctime = cpu_to_le32(v))
+#define sd_v2_blocks(sdp)       (le32_to_cpu((sdp)->sd_blocks))
+#define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v))
+#define sd_v2_rdev(sdp)         (le32_to_cpu((sdp)->u.sd_rdev))
+#define set_sd_v2_rdev(sdp,v)   ((sdp)->u.sd_rdev = cpu_to_le32(v))
+#define sd_v2_generation(sdp)   (le32_to_cpu((sdp)->u.sd_generation))
+#define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v))
+#define sd_v2_attrs(sdp)         (le16_to_cpu((sdp)->sd_attrs))
+#define set_sd_v2_attrs(sdp,v)   ((sdp)->sd_attrs = cpu_to_le16(v))
+
+/***************************************************************************/
+/*                      DIRECTORY STRUCTURE                                */
+/***************************************************************************/
+/* 
+   Picture represents the structure of directory items
+   ________________________________________________
+   |  Array of     |   |     |        |       |   |
+   | directory     |N-1| N-2 | ....   |   1st |0th|
+   | entry headers |   |     |        |       |   |
+   |_______________|___|_____|________|_______|___|
+                    <----   directory entries         ------>
+
+ First directory item has k_offset component 1. We store "." and ".."
+ in one item, always, we never split "." and ".." into differing
+ items.  This makes, among other things, the code for removing
+ directories simpler. */
+#define SD_OFFSET  0
+#define SD_UNIQUENESS 0
+#define DOT_OFFSET 1
+#define DOT_DOT_OFFSET 2
+#define DIRENTRY_UNIQUENESS 500
+
+/* */
+#define FIRST_ITEM_OFFSET 1
+
+/*
+   Q: How to get key of object pointed to by entry from entry?  
+
+   A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
+      of object, entry points to */
+
+/* NOT IMPLEMENTED:   
+   Directory will someday contain stat data of object */
+
+struct reiserfs_de_head {
+	__le32 deh_offset;	/* third component of the directory entry key */
+	__le32 deh_dir_id;	/* objectid of the parent directory of the object, that is referenced
+				   by directory entry */
+	__le32 deh_objectid;	/* objectid of the object, that is referenced by directory entry */
+	__le16 deh_location;	/* offset of name in the whole item */
+	__le16 deh_state;	/* whether 1) entry contains stat data (for future), and 2) whether
+				   entry is hidden (unlinked) */
+} __attribute__ ((__packed__));
+#define DEH_SIZE                  sizeof(struct reiserfs_de_head)
+#define deh_offset(p_deh)         (le32_to_cpu((p_deh)->deh_offset))
+#define deh_dir_id(p_deh)         (le32_to_cpu((p_deh)->deh_dir_id))
+#define deh_objectid(p_deh)       (le32_to_cpu((p_deh)->deh_objectid))
+#define deh_location(p_deh)       (le16_to_cpu((p_deh)->deh_location))
+#define deh_state(p_deh)          (le16_to_cpu((p_deh)->deh_state))
+
+#define put_deh_offset(p_deh,v)   ((p_deh)->deh_offset = cpu_to_le32((v)))
+#define put_deh_dir_id(p_deh,v)   ((p_deh)->deh_dir_id = cpu_to_le32((v)))
+#define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v)))
+#define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v)))
+#define put_deh_state(p_deh,v)    ((p_deh)->deh_state = cpu_to_le16((v)))
+
+/* empty directory contains two entries "." and ".." and their headers */
+#define EMPTY_DIR_SIZE \
+(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
+
+/* old format directories have this size when empty */
+#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
+
+#define DEH_Statdata 0		/* not used now */
+#define DEH_Visible 2
+
+/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */
+#if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__)
+#   define ADDR_UNALIGNED_BITS  (3)
+#endif
+
+/* These are only used to manipulate deh_state.
+ * Because of this, we'll use the ext2_ bit routines,
+ * since they are little endian */
+#ifdef ADDR_UNALIGNED_BITS
+
+#   define aligned_address(addr)           ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
+#   define unaligned_offset(addr)          (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
+
+#   define set_bit_unaligned(nr, addr)	\
+	__test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define clear_bit_unaligned(nr, addr)	\
+	__test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define test_bit_unaligned(nr, addr)	\
+	test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr))
+
+#else
+
+#   define set_bit_unaligned(nr, addr)	__test_and_set_bit_le(nr, addr)
+#   define clear_bit_unaligned(nr, addr)	__test_and_clear_bit_le(nr, addr)
+#   define test_bit_unaligned(nr, addr)	test_bit_le(nr, addr)
+
+#endif
+
+#define mark_de_with_sd(deh)        set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_without_sd(deh)     clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_visible(deh)	    set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define mark_de_hidden(deh)	    clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+#define de_with_sd(deh)		    test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
+				   __le32 par_dirid, __le32 par_objid);
+extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
+				__le32 par_dirid, __le32 par_objid);
+
+/* array of the entry headers */
+ /* get item body */
+#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
+#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
+
+/* length of the directory entry in directory item. This define
+   calculates length of i-th directory entry using directory entry
+   locations from dir entry head. When it calculates length of 0-th
+   directory entry, it uses length of whole item in place of entry
+   location of the non-existent following entry in the calculation.
+   See picture above.*/
+/*
+#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
+((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
+*/
+static inline int entry_length(const struct buffer_head *bh,
+			       const struct item_head *ih, int pos_in_item)
+{
+	struct reiserfs_de_head *deh;
+
+	deh = B_I_DEH(bh, ih) + pos_in_item;
+	if (pos_in_item)
+		return deh_location(deh - 1) - deh_location(deh);
+
+	return ih_item_len(ih) - deh_location(deh);
+}
+
+/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
+#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
+
+/* name by bh, ih and entry_num */
+#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
+
+// two entries per block (at least)
+#define REISERFS_MAX_NAME(block_size) 255
+
+/* this structure is used for operations on directory entries. It is
+   not a disk structure. */
+/* When reiserfs_find_entry or search_by_entry_key find directory
+   entry, they return filled reiserfs_dir_entry structure */
+struct reiserfs_dir_entry {
+	struct buffer_head *de_bh;
+	int de_item_num;
+	struct item_head *de_ih;
+	int de_entry_num;
+	struct reiserfs_de_head *de_deh;
+	int de_entrylen;
+	int de_namelen;
+	char *de_name;
+	unsigned long *de_gen_number_bit_string;
+
+	__u32 de_dir_id;
+	__u32 de_objectid;
+
+	struct cpu_key de_entry_key;
+};
+
+/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
+
+/* pointer to file name, stored in entry */
+#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh))
+
+/* length of name */
+#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
+(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
+
+/* hash value occupies bits from 7 up to 30 */
+#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
+/* generation number occupies 7 bits starting from 0 up to 6 */
+#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
+#define MAX_GENERATION_NUMBER  127
+
+#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
+
+/*
+ * Picture represents an internal node of the reiserfs tree
+ *  ______________________________________________________
+ * |      |  Array of     |  Array of         |  Free     |
+ * |block |    keys       |  pointers         | space     |
+ * | head |      N        |      N+1          |           |
+ * |______|_______________|___________________|___________|
+ */
+
+/***************************************************************************/
+/*                      DISK CHILD                                         */
+/***************************************************************************/
+/* Disk child pointer: The pointer from an internal node of the tree
+   to a node that is on disk. */
+struct disk_child {
+	__le32 dc_block_number;	/* Disk child's block number. */
+	__le16 dc_size;		/* Disk child's used space.   */
+	__le16 dc_reserved;
+};
+
+#define DC_SIZE (sizeof(struct disk_child))
+#define dc_block_number(dc_p)	(le32_to_cpu((dc_p)->dc_block_number))
+#define dc_size(dc_p)		(le16_to_cpu((dc_p)->dc_size))
+#define put_dc_block_number(dc_p, val)   do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0)
+#define put_dc_size(dc_p, val)   do { (dc_p)->dc_size = cpu_to_le16(val); } while(0)
+
+/* Get disk child by buffer header and position in the tree node. */
+#define B_N_CHILD(bh, n_pos)  ((struct disk_child *)\
+((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos)))
+
+/* Get disk child number by buffer header and position in the tree node. */
+#define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos)))
+#define PUT_B_N_CHILD_NUM(bh, n_pos, val) \
+				(put_dc_block_number(B_N_CHILD(bh, n_pos), val))
+
+ /* maximal value of field child_size in structure disk_child */
+ /* child size is the combined size of all items and their headers */
+#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
+
+/* amount of used space in buffer (not including block head) */
+#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
+
+/* max and min number of keys in internal node */
+#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
+#define MIN_NR_KEY(bh)    (MAX_NR_KEY(bh)/2)
+
+/***************************************************************************/
+/*                      PATH STRUCTURES AND DEFINES                        */
+/***************************************************************************/
+
+/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
+   key.  It uses reiserfs_bread to try to find buffers in the cache given their block number.  If it
+   does not find them in the cache it reads them from disk.  For each node search_by_key finds using
+   reiserfs_bread it then uses bin_search to look through that node.  bin_search will find the
+   position of the block_number of the next node if it is looking through an internal node.  If it
+   is looking through a leaf node bin_search will find the position of the item which has key either
+   equal to given key, or which is the maximal key less than the given key. */
+
+struct path_element {
+	struct buffer_head *pe_buffer;	/* Pointer to the buffer at the path in the tree. */
+	int pe_position;	/* Position in the tree node which is placed in the */
+	/* buffer above.                                  */
+};
+
+#define MAX_HEIGHT 5		/* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
+#define EXTENDED_MAX_HEIGHT         7	/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
+#define FIRST_PATH_ELEMENT_OFFSET   2	/* Must be equal to at least 2. */
+
+#define ILLEGAL_PATH_ELEMENT_OFFSET 1	/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
+#define MAX_FEB_SIZE 6		/* this MUST be MAX_HEIGHT + 1. See about FEB below */
+
+/* We need to keep track of who the ancestors of nodes are.  When we
+   perform a search we record which nodes were visited while
+   descending the tree looking for the node we searched for. This list
+   of nodes is called the path.  This information is used while
+   performing balancing.  Note that this path information may become
+   invalid, and this means we must check it when using it to see if it
+   is still valid. You'll need to read search_by_key and the comments
+   in it, especially about decrement_counters_in_path(), to understand
+   this structure.  
+
+Paths make the code so much harder to work with and debug.... An
+enormous number of bugs are due to them, and trying to write or modify
+code that uses them just makes my head hurt.  They are based on an
+excessive effort to avoid disturbing the precious VFS code.:-( The
+gods only know how we are going to SMP the code that uses them.
+znodes are the way! */
+
+#define PATH_READA	0x1	/* do read ahead */
+#define PATH_READA_BACK 0x2	/* read backwards */
+
+struct treepath {
+	int path_length;	/* Length of the array above.   */
+	int reada;
+	struct path_element path_elements[EXTENDED_MAX_HEIGHT];	/* Array of the path elements.  */
+	int pos_in_item;
+};
+
+#define pos_in_item(path) ((path)->pos_in_item)
+
+#define INITIALIZE_PATH(var) \
+struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
+
+/* Get path element by path and path position. */
+#define PATH_OFFSET_PELEMENT(path, n_offset)  ((path)->path_elements + (n_offset))
+
+/* Get buffer header at the path by path and path position. */
+#define PATH_OFFSET_PBUFFER(path, n_offset)   (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer)
+
+/* Get position in the element at the path by path and path position. */
+#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
+
+#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
+				/* you know, to the person who didn't
+				   write this the macro name does not
+				   at first suggest what it does.
+				   Maybe POSITION_FROM_PATH_END? Or
+				   maybe we should just focus on
+				   dumping paths... -Hans */
+#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
+
+#define PATH_PITEM_HEAD(path)    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path))
+
+/* in do_balance leaf has h == 0 in contrast with path structure,
+   where root has level == 0. That is why we need these defines */
+#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h))	/* tb->S[h] */
+#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1)	/* tb->F[h] or tb->S[0]->b_parent */
+#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
+#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)	/* tb->S[h]->b_item_order */
+
+#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
+
+#define get_last_bh(path) PATH_PLAST_BUFFER(path)
+#define get_ih(path) PATH_PITEM_HEAD(path)
+#define get_item_pos(path) PATH_LAST_POSITION(path)
+#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
+#define item_moved(ih,path) comp_items(ih, path)
+#define path_changed(ih,path) comp_items (ih, path)
+
+/***************************************************************************/
+/*                       MISC                                              */
+/***************************************************************************/
+
+/* Size of pointer to the unformatted node. */
+#define UNFM_P_SIZE (sizeof(unp_t))
+#define UNFM_P_SHIFT 2
+
+// in in-core inode key is stored on le form
+#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
+
+#define MAX_UL_INT 0xffffffff
+#define MAX_INT    0x7ffffff
+#define MAX_US_INT 0xffff
+
+// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
+#define U32_MAX (~(__u32)0)
+
+static inline loff_t max_reiserfs_offset(struct inode *inode)
+{
+	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
+		return (loff_t) U32_MAX;
+
+	return (loff_t) ((~(__u64) 0) >> 4);
+}
+
+/*#define MAX_KEY_UNIQUENESS	MAX_UL_INT*/
+#define MAX_KEY_OBJECTID	MAX_UL_INT
+
+#define MAX_B_NUM  MAX_UL_INT
+#define MAX_FC_NUM MAX_US_INT
+
+/* the purpose is to detect overflow of an unsigned short */
+#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
+
+/* The following defines are used in reiserfs_insert_item and reiserfs_append_item  */
+#define REISERFS_KERNEL_MEM		0	/* reiserfs kernel memory mode  */
+#define REISERFS_USER_MEM		1	/* reiserfs user memory mode            */
+
+#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
+#define get_generation(s) atomic_read (&fs_generation(s))
+#define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
+#define __fs_changed(gen,s) (gen != get_generation (s))
+#define fs_changed(gen,s)		\
+({					\
+	reiserfs_cond_resched(s);	\
+	__fs_changed(gen, s);		\
+})
+
+/***************************************************************************/
+/*                  FIXATE NODES                                           */
+/***************************************************************************/
+
+#define VI_TYPE_LEFT_MERGEABLE 1
+#define VI_TYPE_RIGHT_MERGEABLE 2
+
+/* To make any changes in the tree we always first find node, that
+   contains item to be changed/deleted or place to insert a new
+   item. We call this node S. To do balancing we need to decide what
+   we will shift to left/right neighbor, or to a new node, where new
+   item will be etc. To make this analysis simpler we build virtual
+   node. Virtual node is an array of items, that will replace items of
+   node S. (For instance if we are going to delete an item, virtual
+   node does not contain it). Virtual node keeps information about
+   item sizes and types, mergeability of first and last items, sizes
+   of all entries in directory item. We use this array of items when
+   calculating what we can shift to neighbors and how many nodes we
+   have to have if we do not any shiftings, if we shift to left/right
+   neighbor or to both. */
+struct virtual_item {
+	int vi_index;		// index in the array of item operations
+	unsigned short vi_type;	// left/right mergeability
+	unsigned short vi_item_len;	/* length of item that it will have after balancing */
+	struct item_head *vi_ih;
+	const char *vi_item;	// body of item (old or new)
+	const void *vi_new_data;	// 0 always but paste mode
+	void *vi_uarea;		// item specific area
+};
+
+struct virtual_node {
+	char *vn_free_ptr;	/* this is a pointer to the free space in the buffer */
+	unsigned short vn_nr_item;	/* number of items in virtual node */
+	short vn_size;		/* size of node , that node would have if it has unlimited size and no balancing is performed */
+	short vn_mode;		/* mode of balancing (paste, insert, delete, cut) */
+	short vn_affected_item_num;
+	short vn_pos_in_item;
+	struct item_head *vn_ins_ih;	/* item header of inserted item, 0 for other modes */
+	const void *vn_data;
+	struct virtual_item *vn_vi;	/* array of items (including a new one, excluding item to be deleted) */
+};
+
+/* used by directory items when creating virtual nodes */
+struct direntry_uarea {
+	int flags;
+	__u16 entry_count;
+	__u16 entry_sizes[1];
+} __attribute__ ((__packed__));
+
+/***************************************************************************/
+/*                  TREE BALANCE                                           */
+/***************************************************************************/
+
+/* This temporary structure is used in tree balance algorithms, and
+   constructed as we go to the extent that its various parts are
+   needed.  It contains arrays of nodes that can potentially be
+   involved in the balancing of node S, and parameters that define how
+   each of the nodes must be balanced.  Note that in these algorithms
+   for balancing the worst case is to need to balance the current node
+   S and the left and right neighbors and all of their parents plus
+   create a new node.  We implement S1 balancing for the leaf nodes
+   and S0 balancing for the internal nodes (S1 and S0 are defined in
+   our papers.)*/
+
+#define MAX_FREE_BLOCK 7	/* size of the array of buffers to free at end of do_balance */
+
+/* maximum number of FEB blocknrs on a single level */
+#define MAX_AMOUNT_NEEDED 2
+
+/* someday somebody will prefix every field in this struct with tb_ */
+struct tree_balance {
+	int tb_mode;
+	int need_balance_dirty;
+	struct super_block *tb_sb;
+	struct reiserfs_transaction_handle *transaction_handle;
+	struct treepath *tb_path;
+	struct buffer_head *L[MAX_HEIGHT];	/* array of left neighbors of nodes in the path */
+	struct buffer_head *R[MAX_HEIGHT];	/* array of right neighbors of nodes in the path */
+	struct buffer_head *FL[MAX_HEIGHT];	/* array of fathers of the left  neighbors      */
+	struct buffer_head *FR[MAX_HEIGHT];	/* array of fathers of the right neighbors      */
+	struct buffer_head *CFL[MAX_HEIGHT];	/* array of common parents of center node and its left neighbor  */
+	struct buffer_head *CFR[MAX_HEIGHT];	/* array of common parents of center node and its right neighbor */
+
+	struct buffer_head *FEB[MAX_FEB_SIZE];	/* array of empty buffers. Number of buffers in array equals
+						   cur_blknum. */
+	struct buffer_head *used[MAX_FEB_SIZE];
+	struct buffer_head *thrown[MAX_FEB_SIZE];
+	int lnum[MAX_HEIGHT];	/* array of number of items which must be
+				   shifted to the left in order to balance the
+				   current node; for leaves includes item that
+				   will be partially shifted; for internal
+				   nodes, it is the number of child pointers
+				   rather than items. It includes the new item
+				   being created. The code sometimes subtracts
+				   one to get the number of wholly shifted
+				   items for other purposes. */
+	int rnum[MAX_HEIGHT];	/* substitute right for left in comment above */
+	int lkey[MAX_HEIGHT];	/* array indexed by height h mapping the key delimiting L[h] and
+				   S[h] to its item number within the node CFL[h] */
+	int rkey[MAX_HEIGHT];	/* substitute r for l in comment above */
+	int insert_size[MAX_HEIGHT];	/* the number of bytes by we are trying to add or remove from
+					   S[h]. A negative value means removing.  */
+	int blknum[MAX_HEIGHT];	/* number of nodes that will replace node S[h] after
+				   balancing on the level h of the tree.  If 0 then S is
+				   being deleted, if 1 then S is remaining and no new nodes
+				   are being created, if 2 or 3 then 1 or 2 new nodes is
+				   being created */
+
+	/* fields that are used only for balancing leaves of the tree */
+	int cur_blknum;		/* number of empty blocks having been already allocated                 */
+	int s0num;		/* number of items that fall into left most  node when S[0] splits     */
+	int s1num;		/* number of items that fall into first  new node when S[0] splits     */
+	int s2num;		/* number of items that fall into second new node when S[0] splits     */
+	int lbytes;		/* number of bytes which can flow to the left neighbor from the        left    */
+	/* most liquid item that cannot be shifted from S[0] entirely         */
+	/* if -1 then nothing will be partially shifted */
+	int rbytes;		/* number of bytes which will flow to the right neighbor from the right        */
+	/* most liquid item that cannot be shifted from S[0] entirely         */
+	/* if -1 then nothing will be partially shifted                           */
+	int s1bytes;		/* number of bytes which flow to the first  new node when S[0] splits   */
+	/* note: if S[0] splits into 3 nodes, then items do not need to be cut  */
+	int s2bytes;
+	struct buffer_head *buf_to_free[MAX_FREE_BLOCK];	/* buffers which are to be freed after do_balance finishes by unfix_nodes */
+	char *vn_buf;		/* kmalloced memory. Used to create
+				   virtual node and keep map of
+				   dirtied bitmap blocks */
+	int vn_buf_size;	/* size of the vn_buf */
+	struct virtual_node *tb_vn;	/* VN starts after bitmap of bitmap blocks */
+
+	int fs_gen;		/* saved value of `reiserfs_generation' counter
+				   see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
+#ifdef DISPLACE_NEW_PACKING_LOCALITIES
+	struct in_core_key key;	/* key pointer, to pass to block allocator or
+				   another low-level subsystem */
+#endif
+};
+
+/* These are modes of balancing */
+
+/* When inserting an item. */
+#define M_INSERT	'i'
+/* When inserting into (directories only) or appending onto an already
+   existent item. */
+#define M_PASTE		'p'
+/* When deleting an item. */
+#define M_DELETE	'd'
+/* When truncating an item or removing an entry from a (directory) item. */
+#define M_CUT 		'c'
+
+/* used when balancing on leaf level skipped (in reiserfsck) */
+#define M_INTERNAL	'n'
+
+/* When further balancing is not needed, then do_balance does not need
+   to be called. */
+#define M_SKIP_BALANCING 		's'
+#define M_CONVERT	'v'
+
+/* modes of leaf_move_items */
+#define LEAF_FROM_S_TO_L 0
+#define LEAF_FROM_S_TO_R 1
+#define LEAF_FROM_R_TO_L 2
+#define LEAF_FROM_L_TO_R 3
+#define LEAF_FROM_S_TO_SNEW 4
+
+#define FIRST_TO_LAST 0
+#define LAST_TO_FIRST 1
+
+/* used in do_balance for passing parent of node information that has
+   been gotten from tb struct */
+struct buffer_info {
+	struct tree_balance *tb;
+	struct buffer_head *bi_bh;
+	struct buffer_head *bi_parent;
+	int bi_position;
+};
+
+static inline struct super_block *sb_from_tb(struct tree_balance *tb)
+{
+	return tb ? tb->tb_sb : NULL;
+}
+
+static inline struct super_block *sb_from_bi(struct buffer_info *bi)
+{
+	return bi ? sb_from_tb(bi->tb) : NULL;
+}
+
+/* there are 4 types of items: stat data, directory item, indirect, direct.
++-------------------+------------+--------------+------------+
+|	            |  k_offset  | k_uniqueness | mergeable? |
++-------------------+------------+--------------+------------+
+|     stat data     |	0        |      0       |   no       |
++-------------------+------------+--------------+------------+
+| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS|   no       | 
+| non 1st directory | hash value |              |   yes      |
+|     item          |            |              |            |
++-------------------+------------+--------------+------------+
+| indirect item     | offset + 1 |TYPE_INDIRECT |   if this is not the first indirect item of the object
++-------------------+------------+--------------+------------+
+| direct item       | offset + 1 |TYPE_DIRECT   | if not this is not the first direct item of the object
++-------------------+------------+--------------+------------+
+*/
+
+struct item_operations {
+	int (*bytes_number) (struct item_head * ih, int block_size);
+	void (*decrement_key) (struct cpu_key *);
+	int (*is_left_mergeable) (struct reiserfs_key * ih,
+				  unsigned long bsize);
+	void (*print_item) (struct item_head *, char *item);
+	void (*check_item) (struct item_head *, char *item);
+
+	int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi,
+			  int is_affected, int insert_size);
+	int (*check_left) (struct virtual_item * vi, int free,
+			   int start_skip, int end_skip);
+	int (*check_right) (struct virtual_item * vi, int free);
+	int (*part_size) (struct virtual_item * vi, int from, int to);
+	int (*unit_num) (struct virtual_item * vi);
+	void (*print_vi) (struct virtual_item * vi);
+};
+
+extern struct item_operations *item_ops[TYPE_ANY + 1];
+
+#define op_bytes_number(ih,bsize)                    item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
+#define op_is_left_mergeable(key,bsize)              item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
+#define op_print_item(ih,item)                       item_ops[le_ih_k_type (ih)]->print_item (ih, item)
+#define op_check_item(ih,item)                       item_ops[le_ih_k_type (ih)]->check_item (ih, item)
+#define op_create_vi(vn,vi,is_affected,insert_size)  item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
+#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
+#define op_check_right(vi,free)                      item_ops[(vi)->vi_index]->check_right (vi, free)
+#define op_part_size(vi,from,to)                     item_ops[(vi)->vi_index]->part_size (vi, from, to)
+#define op_unit_num(vi)				     item_ops[(vi)->vi_index]->unit_num (vi)
+#define op_print_vi(vi)                              item_ops[(vi)->vi_index]->print_vi (vi)
+
+#define COMP_SHORT_KEYS comp_short_keys
+
+/* number of blocks pointed to by the indirect item */
+#define I_UNFM_NUM(ih)	(ih_item_len(ih) / UNFM_P_SIZE)
+
+/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
+#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
+
+/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
+
+/* get the item header */
+#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get key */
+#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get the key */
+#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
+
+/* get item body */
+#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
+
+/* get the stat data by the buffer header and the item order */
+#define B_N_STAT_DATA(bh,nr) \
+( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
+
+    /* following defines use reiserfs buffer header and item header */
+
+/* get stat-data */
+#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
+
+// this is 3976 for size==4096
+#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
+
+/* indirect items consist of entries which contain blocknrs, pos
+   indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
+   blocknr contained by the entry pos points to */
+#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
+#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
+
+struct reiserfs_iget_args {
+	__u32 objectid;
+	__u32 dirid;
+};
+
+/***************************************************************************/
+/*                    FUNCTION DECLARATIONS                                */
+/***************************************************************************/
+
+#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
+
+#define journal_trans_half(blocksize) \
+	((blocksize - sizeof (struct reiserfs_journal_desc) + sizeof (__u32) - 12) / sizeof (__u32))
+
+/* journal.c see journal.c for all the comments here */
+
+/* first block written in a commit.  */
+struct reiserfs_journal_desc {
+	__le32 j_trans_id;	/* id of commit */
+	__le32 j_len;		/* length of commit. len +1 is the commit block */
+	__le32 j_mount_id;	/* mount id of this trans */
+	__le32 j_realblock[1];	/* real locations for each block */
+};
+
+#define get_desc_trans_id(d)   le32_to_cpu((d)->j_trans_id)
+#define get_desc_trans_len(d)  le32_to_cpu((d)->j_len)
+#define get_desc_mount_id(d)   le32_to_cpu((d)->j_mount_id)
+
+#define set_desc_trans_id(d,val)       do { (d)->j_trans_id = cpu_to_le32 (val); } while (0)
+#define set_desc_trans_len(d,val)      do { (d)->j_len = cpu_to_le32 (val); } while (0)
+#define set_desc_mount_id(d,val)       do { (d)->j_mount_id = cpu_to_le32 (val); } while (0)
+
+/* last block written in a commit */
+struct reiserfs_journal_commit {
+	__le32 j_trans_id;	/* must match j_trans_id from the desc block */
+	__le32 j_len;		/* ditto */
+	__le32 j_realblock[1];	/* real locations for each block */
+};
+
+#define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
+#define get_commit_trans_len(c)        le32_to_cpu((c)->j_len)
+#define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id)
+
+#define set_commit_trans_id(c,val)     do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
+#define set_commit_trans_len(c,val)    do { (c)->j_len = cpu_to_le32 (val); } while (0)
+
+/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
+** last fully flushed transaction.  fully flushed means all the log blocks and all the real blocks are on disk,
+** and this transaction does not need to be replayed.
+*/
+struct reiserfs_journal_header {
+	__le32 j_last_flush_trans_id;	/* id of last fully flushed transaction */
+	__le32 j_first_unflushed_offset;	/* offset in the log of where to start replay after a crash */
+	__le32 j_mount_id;
+	/* 12 */ struct journal_params jh_journal;
+};
+
+/* biggest tunable defines are right here */
+#define JOURNAL_BLOCK_COUNT 8192	/* number of blocks in the journal */
+#define JOURNAL_TRANS_MAX_DEFAULT 1024	/* biggest possible single transaction, don't change for now (8/3/99) */
+#define JOURNAL_TRANS_MIN_DEFAULT 256
+#define JOURNAL_MAX_BATCH_DEFAULT   900	/* max blocks to batch into one transaction, don't make this any bigger than 900 */
+#define JOURNAL_MIN_RATIO 2
+#define JOURNAL_MAX_COMMIT_AGE 30
+#define JOURNAL_MAX_TRANS_AGE 30
+#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
+#define JOURNAL_BLOCKS_PER_OBJECT(sb)  (JOURNAL_PER_BALANCE_CNT * 3 + \
+					 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
+					      REISERFS_QUOTA_TRANS_BLOCKS(sb)))
+
+#ifdef CONFIG_QUOTA
+#define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA))
+/* We need to update data and inode (atime) */
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0)
+/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
+#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
+(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
+/* same as with INIT */
+#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \
+(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
+#else
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
+#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
+#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
+#endif
+
+/* both of these can be as low as 1, or as high as you want.  The min is the
+** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
+** as needed, and released when transactions are committed.  On release, if 
+** the current number of nodes is > max, the node is freed, otherwise, 
+** it is put on a free list for faster use later.
+*/
+#define REISERFS_MIN_BITMAP_NODES 10
+#define REISERFS_MAX_BITMAP_NODES 100
+
+#define JBH_HASH_SHIFT 13	/* these are based on journal hash size of 8192 */
+#define JBH_HASH_MASK 8191
+
+#define _jhashfn(sb,block)	\
+	(((unsigned long)sb>>L1_CACHE_SHIFT) ^ \
+	 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
+#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
+
+// We need these to make journal.c code more readable
+#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
+
+enum reiserfs_bh_state_bits {
+	BH_JDirty = BH_PrivateStart,	/* buffer is in current transaction */
+	BH_JDirty_wait,
+	BH_JNew,		/* disk block was taken off free list before
+				 * being in a finished transaction, or
+				 * written to disk. Can be reused immed. */
+	BH_JPrepared,
+	BH_JRestore_dirty,
+	BH_JTest,		// debugging only will go away
+};
+
+BUFFER_FNS(JDirty, journaled);
+TAS_BUFFER_FNS(JDirty, journaled);
+BUFFER_FNS(JDirty_wait, journal_dirty);
+TAS_BUFFER_FNS(JDirty_wait, journal_dirty);
+BUFFER_FNS(JNew, journal_new);
+TAS_BUFFER_FNS(JNew, journal_new);
+BUFFER_FNS(JPrepared, journal_prepared);
+TAS_BUFFER_FNS(JPrepared, journal_prepared);
+BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
+TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
+BUFFER_FNS(JTest, journal_test);
+TAS_BUFFER_FNS(JTest, journal_test);
+
+/*
+** transaction handle which is passed around for all journal calls
+*/
+struct reiserfs_transaction_handle {
+	struct super_block *t_super;	/* super for this FS when journal_begin was
+					   called. saves calls to reiserfs_get_super
+					   also used by nested transactions to make
+					   sure they are nesting on the right FS
+					   _must_ be first in the handle
+					 */
+	int t_refcount;
+	int t_blocks_logged;	/* number of blocks this writer has logged */
+	int t_blocks_allocated;	/* number of blocks this writer allocated */
+	unsigned int t_trans_id;	/* sanity check, equals the current trans id */
+	void *t_handle_save;	/* save existing current->journal_info */
+	unsigned displace_new_blocks:1;	/* if new block allocation occurres, that block
+					   should be displaced from others */
+	struct list_head t_list;
+};
+
+/* used to keep track of ordered and tail writes, attached to the buffer
+ * head through b_journal_head.
+ */
+struct reiserfs_jh {
+	struct reiserfs_journal_list *jl;
+	struct buffer_head *bh;
+	struct list_head list;
+};
+
+void reiserfs_free_jh(struct buffer_head *bh);
+int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
+int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
+int journal_mark_dirty(struct reiserfs_transaction_handle *,
+		       struct super_block *, struct buffer_head *bh);
+
+static inline int reiserfs_file_data_log(struct inode *inode)
+{
+	if (reiserfs_data_log(inode->i_sb) ||
+	    (REISERFS_I(inode)->i_flags & i_data_log))
+		return 1;
+	return 0;
+}
+
+static inline int reiserfs_transaction_running(struct super_block *s)
+{
+	struct reiserfs_transaction_handle *th = current->journal_info;
+	if (th && th->t_super == s)
+		return 1;
+	if (th && th->t_super == NULL)
+		BUG();
+	return 0;
+}
+
+static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th)
+{
+	return th->t_blocks_allocated - th->t_blocks_logged;
+}
+
+struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
+								    super_block
+								    *,
+								    int count);
+int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *);
+int reiserfs_commit_page(struct inode *inode, struct page *page,
+			 unsigned from, unsigned to);
+int reiserfs_flush_old_commits(struct super_block *);
+int reiserfs_commit_for_inode(struct inode *);
+int reiserfs_inode_needs_commit(struct inode *);
+void reiserfs_update_inode_transaction(struct inode *);
+void reiserfs_wait_on_write_block(struct super_block *s);
+void reiserfs_block_writes(struct reiserfs_transaction_handle *th);
+void reiserfs_allow_writes(struct super_block *s);
+void reiserfs_check_lock_depth(struct super_block *s, char *caller);
+int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh,
+				 int wait);
+void reiserfs_restore_prepared_buffer(struct super_block *,
+				      struct buffer_head *bh);
+int journal_init(struct super_block *, const char *j_dev_name, int old_format,
+		 unsigned int);
+int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
+int journal_release_error(struct reiserfs_transaction_handle *,
+			  struct super_block *);
+int journal_end(struct reiserfs_transaction_handle *, struct super_block *,
+		unsigned long);
+int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
+		     unsigned long);
+int journal_mark_freed(struct reiserfs_transaction_handle *,
+		       struct super_block *, b_blocknr_t blocknr);
+int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
+int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
+			 int bit_nr, int searchall, b_blocknr_t *next);
+int journal_begin(struct reiserfs_transaction_handle *,
+		  struct super_block *sb, unsigned long);
+int journal_join_abort(struct reiserfs_transaction_handle *,
+		       struct super_block *sb, unsigned long);
+void reiserfs_abort_journal(struct super_block *sb, int errno);
+void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
+int reiserfs_allocate_list_bitmaps(struct super_block *s,
+				   struct reiserfs_list_bitmap *, unsigned int);
+
+void add_save_link(struct reiserfs_transaction_handle *th,
+		   struct inode *inode, int truncate);
+int remove_save_link(struct inode *inode, int truncate);
+
+/* objectid.c */
+__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th);
+void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
+			       __u32 objectid_to_release);
+int reiserfs_convert_objectid_map_v1(struct super_block *);
+
+/* stree.c */
+int B_IS_IN_TREE(const struct buffer_head *);
+extern void copy_item_head(struct item_head *to,
+			   const struct item_head *from);
+
+// first key is in cpu form, second - le
+extern int comp_short_keys(const struct reiserfs_key *le_key,
+			   const struct cpu_key *cpu_key);
+extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
+
+// both are in le form
+extern int comp_le_keys(const struct reiserfs_key *,
+			const struct reiserfs_key *);
+extern int comp_short_le_keys(const struct reiserfs_key *,
+			      const struct reiserfs_key *);
+
+//
+// get key version from on disk key - kludge
+//
+static inline int le_key_version(const struct reiserfs_key *key)
+{
+	int type;
+
+	type = offset_v2_k_type(&(key->u.k_offset_v2));
+	if (type != TYPE_DIRECT && type != TYPE_INDIRECT
+	    && type != TYPE_DIRENTRY)
+		return KEY_FORMAT_3_5;
+
+	return KEY_FORMAT_3_6;
+
+}
+
+static inline void copy_key(struct reiserfs_key *to,
+			    const struct reiserfs_key *from)
+{
+	memcpy(to, from, KEY_SIZE);
+}
+
+int comp_items(const struct item_head *stored_ih, const struct treepath *path);
+const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
+				    const struct super_block *sb);
+int search_by_key(struct super_block *, const struct cpu_key *,
+		  struct treepath *, int);
+#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
+int search_for_position_by_key(struct super_block *sb,
+			       const struct cpu_key *cpu_key,
+			       struct treepath *search_path);
+extern void decrement_bcount(struct buffer_head *bh);
+void decrement_counters_in_path(struct treepath *search_path);
+void pathrelse(struct treepath *search_path);
+int reiserfs_check_path(struct treepath *p);
+void pathrelse_and_restore(struct super_block *s, struct treepath *search_path);
+
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path,
+			 const struct cpu_key *key,
+			 struct item_head *ih,
+			 struct inode *inode, const char *body);
+
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
+			     struct treepath *path,
+			     const struct cpu_key *key,
+			     struct inode *inode,
+			     const char *body, int paste_size);
+
+int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
+			   struct treepath *path,
+			   struct cpu_key *key,
+			   struct inode *inode,
+			   struct page *page, loff_t new_file_size);
+
+int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
+			 struct treepath *path,
+			 const struct cpu_key *key,
+			 struct inode *inode, struct buffer_head *un_bh);
+
+void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
+				struct inode *inode, struct reiserfs_key *key);
+int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
+			   struct inode *inode);
+int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
+			 struct inode *inode, struct page *,
+			 int update_timestamps);
+
+#define i_block_size(inode) ((inode)->i_sb->s_blocksize)
+#define file_size(inode) ((inode)->i_size)
+#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
+
+#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
+!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
+
+void padd_item(char *item, int total_length, int length);
+
+/* inode.c */
+/* args for the create parameter of reiserfs_get_block */
+#define GET_BLOCK_NO_CREATE 0	/* don't create new blocks or convert tails */
+#define GET_BLOCK_CREATE 1	/* add anything you need to find block */
+#define GET_BLOCK_NO_HOLE 2	/* return -ENOENT for file holes */
+#define GET_BLOCK_READ_DIRECT 4	/* read the tail if indirect item not found */
+#define GET_BLOCK_NO_IMUX     8	/* i_mutex is not held, don't preallocate */
+#define GET_BLOCK_NO_DANGLE   16	/* don't leave any transactions running */
+
+void reiserfs_read_locked_inode(struct inode *inode,
+				struct reiserfs_iget_args *args);
+int reiserfs_find_actor(struct inode *inode, void *p);
+int reiserfs_init_locked_inode(struct inode *inode, void *p);
+void reiserfs_evict_inode(struct inode *inode);
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
+int reiserfs_get_block(struct inode *inode, sector_t block,
+		       struct buffer_head *bh_result, int create);
+struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+				     int fh_len, int fh_type);
+struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+				     int fh_len, int fh_type);
+int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
+		       int connectable);
+
+int reiserfs_truncate_file(struct inode *, int update_timestamps);
+void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
+		  int type, int key_length);
+void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
+		       int version,
+		       loff_t offset, int type, int length, int entry_count);
+struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key);
+
+struct reiserfs_security_handle;
+int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
+		       struct inode *dir, umode_t mode,
+		       const char *symname, loff_t i_size,
+		       struct dentry *dentry, struct inode *inode,
+		       struct reiserfs_security_handle *security);
+
+void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
+			     struct inode *inode, loff_t size);
+
+static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
+				      struct inode *inode)
+{
+	reiserfs_update_sd_size(th, inode, inode->i_size);
+}
+
+void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
+void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
+int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
+
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+
+/* namei.c */
+void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
+int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
+			struct treepath *path, struct reiserfs_dir_entry *de);
+struct dentry *reiserfs_get_parent(struct dentry *);
+
+#ifdef CONFIG_REISERFS_PROC_INFO
+int reiserfs_proc_info_init(struct super_block *sb);
+int reiserfs_proc_info_done(struct super_block *sb);
+int reiserfs_proc_info_global_init(void);
+int reiserfs_proc_info_global_done(void);
+
+#define PROC_EXP( e )   e
+
+#define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data
+#define PROC_INFO_MAX( sb, field, value )								\
+    __PINFO( sb ).field =												\
+        max( REISERFS_SB( sb ) -> s_proc_info_data.field, value )
+#define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) )
+#define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) )
+#define PROC_INFO_BH_STAT( sb, bh, level )							\
+    PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] );						\
+    PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) );	\
+    PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) )
+#else
+static inline int reiserfs_proc_info_init(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_done(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_init(void)
+{
+	return 0;
+}
+
+static inline int reiserfs_proc_info_global_done(void)
+{
+	return 0;
+}
+
+#define PROC_EXP( e )
+#define VOID_V ( ( void ) 0 )
+#define PROC_INFO_MAX( sb, field, value ) VOID_V
+#define PROC_INFO_INC( sb, field ) VOID_V
+#define PROC_INFO_ADD( sb, field, val ) VOID_V
+#define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V
+#endif
+
+/* dir.c */
+extern const struct inode_operations reiserfs_dir_inode_operations;
+extern const struct inode_operations reiserfs_symlink_inode_operations;
+extern const struct inode_operations reiserfs_special_inode_operations;
+extern const struct file_operations reiserfs_dir_operations;
+int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *);
+
+/* tail_conversion.c */
+int direct2indirect(struct reiserfs_transaction_handle *, struct inode *,
+		    struct treepath *, struct buffer_head *, loff_t);
+int indirect2direct(struct reiserfs_transaction_handle *, struct inode *,
+		    struct page *, struct treepath *, const struct cpu_key *,
+		    loff_t, char *);
+void reiserfs_unmap_buffer(struct buffer_head *);
+
+/* file.c */
+extern const struct inode_operations reiserfs_file_inode_operations;
+extern const struct file_operations reiserfs_file_operations;
+extern const struct address_space_operations reiserfs_address_space_operations;
+
+/* fix_nodes.c */
+
+int fix_nodes(int n_op_mode, struct tree_balance *tb,
+	      struct item_head *ins_ih, const void *);
+void unfix_nodes(struct tree_balance *);
+
+/* prints.c */
+void __reiserfs_panic(struct super_block *s, const char *id,
+		      const char *function, const char *fmt, ...)
+    __attribute__ ((noreturn));
+#define reiserfs_panic(s, id, fmt, args...) \
+	__reiserfs_panic(s, id, __func__, fmt, ##args)
+void __reiserfs_error(struct super_block *s, const char *id,
+		      const char *function, const char *fmt, ...);
+#define reiserfs_error(s, id, fmt, args...) \
+	 __reiserfs_error(s, id, __func__, fmt, ##args)
+void reiserfs_info(struct super_block *s, const char *fmt, ...);
+void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...);
+void print_indirect_item(struct buffer_head *bh, int item_num);
+void store_print_tb(struct tree_balance *tb);
+void print_cur_tb(char *mes);
+void print_de(struct reiserfs_dir_entry *de);
+void print_bi(struct buffer_info *bi, char *mes);
+#define PRINT_LEAF_ITEMS 1	/* print all items */
+#define PRINT_DIRECTORY_ITEMS 2	/* print directory items */
+#define PRINT_DIRECT_ITEMS 4	/* print contents of direct items */
+void print_block(struct buffer_head *bh, ...);
+void print_bmap(struct super_block *s, int silent);
+void print_bmap_block(int i, char *data, int size, int silent);
+/*void print_super_block (struct super_block * s, char * mes);*/
+void print_objectid_map(struct super_block *s);
+void print_block_head(struct buffer_head *bh, char *mes);
+void check_leaf(struct buffer_head *bh);
+void check_internal(struct buffer_head *bh);
+void print_statistics(struct super_block *s);
+char *reiserfs_hashname(int code);
+
+/* lbalance.c */
+int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
+		    int mov_bytes, struct buffer_head *Snew);
+int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes);
+int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
+void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
+		       int del_num, int del_bytes);
+void leaf_insert_into_buf(struct buffer_info *bi, int before,
+			  struct item_head *inserted_item_ih,
+			  const char *inserted_item_body, int zeros_number);
+void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
+			  int pos_in_item, int paste_size, const char *body,
+			  int zeros_number);
+void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
+			  int pos_in_item, int cut_size);
+void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
+			int new_entry_count, struct reiserfs_de_head *new_dehs,
+			const char *records, int paste_size);
+/* ibalance.c */
+int balance_internal(struct tree_balance *, int, int, struct item_head *,
+		     struct buffer_head **);
+
+/* do_balance.c */
+void do_balance_mark_leaf_dirty(struct tree_balance *tb,
+				struct buffer_head *bh, int flag);
+#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
+#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
+
+void do_balance(struct tree_balance *tb, struct item_head *ih,
+		const char *body, int flag);
+void reiserfs_invalidate_buffer(struct tree_balance *tb,
+				struct buffer_head *bh);
+
+int get_left_neighbor_position(struct tree_balance *tb, int h);
+int get_right_neighbor_position(struct tree_balance *tb, int h);
+void replace_key(struct tree_balance *tb, struct buffer_head *, int,
+		 struct buffer_head *, int);
+void make_empty_node(struct buffer_info *);
+struct buffer_head *get_FEB(struct tree_balance *);
+
+/* bitmap.c */
+
+/* structure contains hints for block allocator, and it is a container for
+ * arguments, such as node, search path, transaction_handle, etc. */
+struct __reiserfs_blocknr_hint {
+	struct inode *inode;	/* inode passed to allocator, if we allocate unf. nodes */
+	sector_t block;		/* file offset, in blocks */
+	struct in_core_key key;
+	struct treepath *path;	/* search path, used by allocator to deternine search_start by
+				 * various ways */
+	struct reiserfs_transaction_handle *th;	/* transaction handle is needed to log super blocks and
+						 * bitmap blocks changes  */
+	b_blocknr_t beg, end;
+	b_blocknr_t search_start;	/* a field used to transfer search start value (block number)
+					 * between different block allocator procedures
+					 * (determine_search_start() and others) */
+	int prealloc_size;	/* is set in determine_prealloc_size() function, used by underlayed
+				 * function that do actual allocation */
+
+	unsigned formatted_node:1;	/* the allocator uses different polices for getting disk space for
+					 * formatted/unformatted blocks with/without preallocation */
+	unsigned preallocate:1;
+};
+
+typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
+
+int reiserfs_parse_alloc_options(struct super_block *, char *);
+void reiserfs_init_alloc_options(struct super_block *s);
+
+/*
+ * given a directory, this will tell you what packing locality
+ * to use for a new object underneat it.  The locality is returned
+ * in disk byte order (le).
+ */
+__le32 reiserfs_choose_packing(struct inode *dir);
+
+int reiserfs_init_bitmap_cache(struct super_block *sb);
+void reiserfs_free_bitmap_cache(struct super_block *sb);
+void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info);
+struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap);
+int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
+void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *,
+			 b_blocknr_t, int for_unformatted);
+int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int,
+			       int);
+static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
+					     b_blocknr_t * new_blocknrs,
+					     int amount_needed)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = tb->transaction_handle,
+		.path = tb->tb_path,
+		.inode = NULL,
+		.key = tb->key,
+		.block = 0,
+		.formatted_node = 1
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed,
+					  0);
+}
+
+static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
+					    *th, struct inode *inode,
+					    b_blocknr_t * new_blocknrs,
+					    struct treepath *path,
+					    sector_t block)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = th,
+		.path = path,
+		.inode = inode,
+		.block = block,
+		.formatted_node = 0,
+		.preallocate = 0
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
+}
+
+#ifdef REISERFS_PREALLOCATE
+static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
+					     *th, struct inode *inode,
+					     b_blocknr_t * new_blocknrs,
+					     struct treepath *path,
+					     sector_t block)
+{
+	reiserfs_blocknr_hint_t hint = {
+		.th = th,
+		.path = path,
+		.inode = inode,
+		.block = block,
+		.formatted_node = 0,
+		.preallocate = 1
+	};
+	return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
+}
+
+void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
+			       struct inode *inode);
+void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th);
+#endif
+
+/* hashes.c */
+__u32 keyed_hash(const signed char *msg, int len);
+__u32 yura_hash(const signed char *msg, int len);
+__u32 r5_hash(const signed char *msg, int len);
+
+#define reiserfs_set_le_bit		__set_bit_le
+#define reiserfs_test_and_set_le_bit	__test_and_set_bit_le
+#define reiserfs_clear_le_bit		__clear_bit_le
+#define reiserfs_test_and_clear_le_bit	__test_and_clear_bit_le
+#define reiserfs_test_le_bit		test_bit_le
+#define reiserfs_find_next_zero_le_bit	find_next_zero_bit_le
+
+/* sometimes reiserfs_truncate may require to allocate few new blocks
+   to perform indirect2direct conversion. People probably used to
+   think, that truncate should work without problems on a filesystem
+   without free disk space. They may complain that they can not
+   truncate due to lack of free disk space. This spare space allows us
+   to not worry about it. 500 is probably too much, but it should be
+   absolutely safe */
+#define SPARE_SPACE 500
+
+/* prototypes from ioctl.c */
+long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+long reiserfs_compat_ioctl(struct file *filp,
+		   unsigned int cmd, unsigned long arg);
+int reiserfs_unpack(struct inode *inode, struct file *filp);
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 7483279b482..9a17f63c3fd 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -13,8 +13,7 @@
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_fs_sb.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 
 int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 313d39d639e..f8afa4b162b 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -51,7 +51,7 @@
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/buffer_head.h>
 #include <linux/quotaops.h>
 
@@ -1284,12 +1284,12 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
 		 ** -clm
 		 */
 
-		data = kmap_atomic(un_bh->b_page, KM_USER0);
+		data = kmap_atomic(un_bh->b_page);
 		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
 		memcpy(data + off,
 		       B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
 		       ret_value);
-		kunmap_atomic(data, KM_USER0);
+		kunmap_atomic(data);
 	}
 	/* Perform balancing after all resources have been collected at once. */
 	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e12d8b97cd4..8b7616ef06d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -16,9 +16,9 @@
 #include <linux/vmalloc.h>
 #include <linux/time.h>
 #include <asm/uaccess.h>
-#include <linux/reiserfs_fs.h>
-#include <linux/reiserfs_acl.h>
-#include <linux/reiserfs_xattr.h>
+#include "reiserfs.h"
+#include "acl.h"
+#include "xattr.h"
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
@@ -1874,11 +1874,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		unlock_new_inode(root_inode);
 	}
 
-	s->s_root = d_alloc_root(root_inode);
-	if (!s->s_root) {
-		iput(root_inode);
+	s->s_root = d_make_root(root_inode);
+	if (!s->s_root)
 		goto error;
-	}
 	// define and initialize hash function
 	sbi->s_hash_function = hash_function(s);
 	if (sbi->s_hash_function == NULL) {
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index d7f6e51bef2..5e2624d12f7 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -5,7 +5,7 @@
 #include <linux/time.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 
 /* access to tail : when one is going to read tail it must make sure, that is not running.
  direct2indirect and indirect2direct can not run concurrently */
@@ -128,9 +128,9 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
 	if (up_to_date_bh) {
 		unsigned pgoff =
 		    (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
-		char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0);
+		char *kaddr = kmap_atomic(up_to_date_bh->b_page);
 		memset(kaddr + pgoff, 0, blk_size - total_tail);
-		kunmap_atomic(kaddr, KM_USER0);
+		kunmap_atomic(kaddr);
 	}
 
 	REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c24deda8a8b..46fc1c20a6b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -33,7 +33,7 @@
  * The xattrs themselves are protected by the xattr_sem.
  */
 
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/capability.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
@@ -43,8 +43,8 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/reiserfs_xattr.h>
-#include <linux/reiserfs_acl.h>
+#include "xattr.h"
+#include "acl.h"
 #include <asm/uaccess.h>
 #include <net/checksum.h>
 #include <linux/stat.h>
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
new file mode 100644
index 00000000000..f59626c5d33
--- /dev/null
+++ b/fs/reiserfs/xattr.h
@@ -0,0 +1,122 @@
+#include <linux/reiserfs_xattr.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+
+struct inode;
+struct dentry;
+struct iattr;
+struct super_block;
+struct nameidata;
+
+int reiserfs_xattr_register_handlers(void) __init;
+void reiserfs_xattr_unregister_handlers(void);
+int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
+int reiserfs_lookup_privroot(struct super_block *sb);
+int reiserfs_delete_xattrs(struct inode *inode);
+int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
+int reiserfs_permission(struct inode *inode, int mask);
+
+#ifdef CONFIG_REISERFS_FS_XATTR
+#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
+ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
+			  void *buffer, size_t size);
+int reiserfs_setxattr(struct dentry *dentry, const char *name,
+		      const void *value, size_t size, int flags);
+ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int reiserfs_removexattr(struct dentry *dentry, const char *name);
+
+int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
+int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
+int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,
+			      struct inode *, const char *, const void *,
+			      size_t, int);
+
+extern const struct xattr_handler reiserfs_xattr_user_handler;
+extern const struct xattr_handler reiserfs_xattr_trusted_handler;
+extern const struct xattr_handler reiserfs_xattr_security_handler;
+#ifdef CONFIG_REISERFS_FS_SECURITY
+int reiserfs_security_init(struct inode *dir, struct inode *inode,
+			   const struct qstr *qstr,
+			   struct reiserfs_security_handle *sec);
+int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+			    struct inode *inode,
+			    struct reiserfs_security_handle *sec);
+void reiserfs_security_free(struct reiserfs_security_handle *sec);
+#endif
+
+static inline int reiserfs_xattrs_initialized(struct super_block *sb)
+{
+	return REISERFS_SB(sb)->priv_root != NULL;
+}
+
+#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
+static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
+{
+	loff_t ret = 0;
+	if (reiserfs_file_data_log(inode)) {
+		ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize);
+		ret >>= inode->i_sb->s_blocksize_bits;
+	}
+	return ret;
+}
+
+/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
+ * Let's try to be smart about it.
+ * xattr root: We cache it. If it's not cached, we may need to create it.
+ * xattr dir: If anything has been loaded for this inode, we can set a flag
+ *            saying so.
+ * xattr file: Since we don't cache xattrs, we can't tell. We always include
+ *             blocks for it.
+ *
+ * However, since root and dir can be created between calls - YOU MUST SAVE
+ * THIS VALUE.
+ */
+static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
+{
+	size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+
+	if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
+		nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+		if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
+			nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+	}
+
+	return nblocks;
+}
+
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+	init_rwsem(&REISERFS_I(inode)->i_xattr_sem);
+}
+
+#else
+
+#define reiserfs_getxattr NULL
+#define reiserfs_setxattr NULL
+#define reiserfs_listxattr NULL
+#define reiserfs_removexattr NULL
+
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+}
+#endif  /*  CONFIG_REISERFS_FS_XATTR  */
+
+#ifndef CONFIG_REISERFS_FS_SECURITY
+static inline int reiserfs_security_init(struct inode *dir,
+					 struct inode *inode,
+					 const struct qstr *qstr,
+					 struct reiserfs_security_handle *sec)
+{
+	return 0;
+}
+static inline int
+reiserfs_security_write(struct reiserfs_transaction_handle *th,
+			struct inode *inode,
+			struct reiserfs_security_handle *sec)
+{
+	return 0;
+}
+static inline void reiserfs_security_free(struct reiserfs_security_handle *sec)
+{}
+#endif
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 6da0396e505..44474f9b990 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,14 +1,14 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/posix_acl.h>
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include <linux/slab.h>
 #include <linux/posix_acl_xattr.h>
-#include <linux/reiserfs_xattr.h>
-#include <linux/reiserfs_acl.h>
+#include "xattr.h"
+#include "acl.h"
 #include <asm/uaccess.h>
 
 static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 534668fa41b..800a3cef6f6 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -1,10 +1,10 @@
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include <linux/slab.h>
-#include <linux/reiserfs_xattr.h>
+#include "xattr.h"
 #include <linux/security.h>
 #include <asm/uaccess.h>
 
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 9883736ce3e..a0035719f66 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,10 +1,10 @@
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/reiserfs_xattr.h>
+#include "xattr.h"
 #include <asm/uaccess.h>
 
 static int
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 45ae1a00013..8667491ae7c 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -1,9 +1,9 @@
-#include <linux/reiserfs_fs.h>
+#include "reiserfs.h"
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/reiserfs_xattr.h>
+#include "xattr.h"
 #include <asm/uaccess.h>
 
 static int
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index bb36ab74eb4..e64f6b5f7ae 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -538,14 +538,12 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (IS_ERR(root))
 		goto error;
 
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root)
-		goto error_i;
+		goto error;
 
 	return 0;
 
-error_i:
-	iput(root);
 error:
 	return -EINVAL;
 error_rsb_inval:
diff --git a/fs/select.c b/fs/select.c
index d33418fdc85..e782258d0de 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
 }
 
 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
-		long, timeout_msecs)
+		int, timeout_msecs)
 {
 	struct timespec end_time, *to = NULL;
 	int ret;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4023d6be939..aa242dc9937 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -140,9 +140,21 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 
 	mutex_lock(&m->lock);
 
+	/*
+	 * seq_file->op->..m_start/m_stop/m_next may do special actions
+	 * or optimisations based on the file->f_version, so we want to
+	 * pass the file->f_version to those methods.
+	 *
+	 * seq_file->version is just copy of f_version, and seq_file
+	 * methods can treat it simply as file version.
+	 * It is copied in first and copied out after all operations.
+	 * It is convenient to have it as  part of structure to avoid the
+	 * need of passing another argument to all the seq_file methods.
+	 */
+	m->version = file->f_version;
+
 	/* Don't assume *ppos is where we left it */
 	if (unlikely(*ppos != m->read_pos)) {
-		m->read_pos = *ppos;
 		while ((err = traverse(m, *ppos)) == -EAGAIN)
 			;
 		if (err) {
@@ -152,21 +164,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 			m->index = 0;
 			m->count = 0;
 			goto Done;
+		} else {
+			m->read_pos = *ppos;
 		}
 	}
 
-	/*
-	 * seq_file->op->..m_start/m_stop/m_next may do special actions
-	 * or optimisations based on the file->f_version, so we want to
-	 * pass the file->f_version to those methods.
-	 *
-	 * seq_file->version is just copy of f_version, and seq_file
-	 * methods can treat it simply as file version.
-	 * It is copied in first and copied out after all operations.
-	 * It is convenient to have it as  part of structure to avoid the
-	 * need of passing another argument to all the seq_file methods.
-	 */
-	m->version = file->f_version;
 	/* grab buffer if we didn't have one */
 	if (!m->buf) {
 		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451d..7ae2a574cb2 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
 
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+	wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+	/*
+	 * The lockless check can race with remove_wait_queue() in progress,
+	 * but in this case its caller should run under rcu_read_lock() and
+	 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+	 */
+	if (likely(!waitqueue_active(wqh)))
+		return;
+
+	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
 struct signalfd_ctx {
 	sigset_t sigmask;
 };
diff --git a/fs/splice.c b/fs/splice.c
index 1ec0493266b..f16402ed915 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -737,15 +737,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		goto out;
 
 	if (buf->page != page) {
-		/*
-		 * Careful, ->map() uses KM_USER0!
-		 */
 		char *src = buf->ops->map(pipe, buf, 1);
-		char *dst = kmap_atomic(page, KM_USER1);
+		char *dst = kmap_atomic(page);
 
 		memcpy(dst + offset, src + buf->offset, this_len);
 		flush_dcache_page(page);
-		kunmap_atomic(dst, KM_USER1);
+		kunmap_atomic(dst);
 		buf->ops->unmap(pipe, buf, src);
 	}
 	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 38bb1c64055..8ca62c28fe1 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -464,10 +464,10 @@ static int squashfs_readpage(struct file *file, struct page *page)
 		if (PageUptodate(push_page))
 			goto skip_page;
 
-		pageaddr = kmap_atomic(push_page, KM_USER0);
+		pageaddr = kmap_atomic(push_page);
 		squashfs_copy_data(pageaddr, buffer, offset, avail);
 		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
-		kunmap_atomic(pageaddr, KM_USER0);
+		kunmap_atomic(pageaddr);
 		flush_dcache_page(push_page);
 		SetPageUptodate(push_page);
 skip_page:
@@ -484,9 +484,9 @@ skip_page:
 error_out:
 	SetPageError(page);
 out:
-	pageaddr = kmap_atomic(page, KM_USER0);
+	pageaddr = kmap_atomic(page);
 	memset(pageaddr, 0, PAGE_CACHE_SIZE);
-	kunmap_atomic(pageaddr, KM_USER0);
+	kunmap_atomic(pageaddr);
 	flush_dcache_page(page);
 	if (!PageError(page))
 		SetPageUptodate(page);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index ecaa2f7bdb8..970b1167e7c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -316,11 +316,10 @@ check_directory_table:
 	}
 	insert_inode_hash(root);
 
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (sb->s_root == NULL) {
 		ERROR("Root inode create failed\n");
 		err = -ENOMEM;
-		iput(root);
 		goto failed_mount;
 	}
 
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 1191817264c..12806dffb34 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -90,14 +90,14 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
 			goto error_out;
 		}
 
-		pageaddr = kmap_atomic(page, KM_USER0);
+		pageaddr = kmap_atomic(page);
 		copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
 								length - bytes);
 		if (copied == length - bytes)
 			memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
 		else
 			block = entry->next_index;
-		kunmap_atomic(pageaddr, KM_USER0);
+		kunmap_atomic(pageaddr);
 		squashfs_cache_put(entry);
 	}
 
diff --git a/fs/stat.c b/fs/stat.c
index 8806b8997d2..86f13563a46 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -307,7 +307,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
 		if (inode->i_op->readlink) {
 			error = security_inode_readlink(path.dentry);
 			if (!error) {
-				touch_atime(path.mnt, path.dentry);
+				touch_atime(&path);
 				error = inode->i_op->readlink(path.dentry,
 							      buf, bufsiz);
 			}
diff --git a/fs/super.c b/fs/super.c
index 6015c02296b..d90e900a8a0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -32,6 +32,7 @@
 #include <linux/backing-dev.h>
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
+#include <linux/fsnotify.h>
 #include "internal.h"
 
 
@@ -634,6 +635,28 @@ rescan:
 EXPORT_SYMBOL(get_super);
 
 /**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	while (1) {
+		struct super_block *s = get_super(bdev);
+		if (!s || s->s_frozen == SB_UNFROZEN)
+			return s;
+		up_read(&s->s_umount);
+		vfs_check_frozen(s, SB_FREEZE_WRITE);
+		put_super(s);
+	}
+}
+EXPORT_SYMBOL(get_super_thawed);
+
+/**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
  *
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7fdf6a7b743..2a7a3f5d1ca 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -22,76 +22,103 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/hash.h>
 #include "sysfs.h"
 
 DEFINE_MUTEX(sysfs_mutex);
 DEFINE_SPINLOCK(sysfs_assoc_lock);
 
+#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
+
 static DEFINE_SPINLOCK(sysfs_ino_lock);
 static DEFINE_IDA(sysfs_ino_ida);
 
 /**
- *	sysfs_link_sibling - link sysfs_dirent into sibling list
+ *	sysfs_name_hash
+ *	@ns:   Namespace tag to hash
+ *	@name: Null terminated string to hash
+ *
+ *	Returns 31 bit hash of ns + name (so it fits in an off_t )
+ */
+static unsigned int sysfs_name_hash(const void *ns, const char *name)
+{
+	unsigned long hash = init_name_hash();
+	unsigned int len = strlen(name);
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) );
+	hash &= 0x7fffffffU;
+	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
+	if (hash < 1)
+		hash += 2;
+	if (hash >= INT_MAX)
+		hash = INT_MAX - 1;
+	return hash;
+}
+
+static int sysfs_name_compare(unsigned int hash, const void *ns,
+	const char *name, const struct sysfs_dirent *sd)
+{
+	if (hash != sd->s_hash)
+		return hash - sd->s_hash;
+	if (ns != sd->s_ns)
+		return ns - sd->s_ns;
+	return strcmp(name, sd->s_name);
+}
+
+static int sysfs_sd_compare(const struct sysfs_dirent *left,
+			    const struct sysfs_dirent *right)
+{
+	return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
+				  right);
+}
+
+/**
+ *	sysfs_link_subling - link sysfs_dirent into sibling rbtree
  *	@sd: sysfs_dirent of interest
  *
- *	Link @sd into its sibling list which starts from
+ *	Link @sd into its sibling rbtree which starts from
  *	sd->s_parent->s_dir.children.
  *
  *	Locking:
  *	mutex_lock(sysfs_mutex)
+ *
+ *	RETURNS:
+ *	0 on susccess -EEXIST on failure.
  */
-static void sysfs_link_sibling(struct sysfs_dirent *sd)
+static int sysfs_link_sibling(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *parent_sd = sd->s_parent;
-
-	struct rb_node **p;
-	struct rb_node *parent;
+	struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
+	struct rb_node *parent = NULL;
 
 	if (sysfs_type(sd) == SYSFS_DIR)
-		parent_sd->s_dir.subdirs++;
-
-	p = &parent_sd->s_dir.inode_tree.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-#define node	rb_entry(parent, struct sysfs_dirent, inode_node)
-		if (sd->s_ino < node->s_ino) {
-			p = &node->inode_node.rb_left;
-		} else if (sd->s_ino > node->s_ino) {
-			p = &node->inode_node.rb_right;
-		} else {
-			printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n",
-			       (unsigned long) sd->s_ino);
-			BUG();
-		}
-#undef node
-	}
-	rb_link_node(&sd->inode_node, parent, p);
-	rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
-
-	p = &parent_sd->s_dir.name_tree.rb_node;
-	parent = NULL;
-	while (*p) {
-		int c;
-		parent = *p;
-#define node	rb_entry(parent, struct sysfs_dirent, name_node)
-		c = strcmp(sd->s_name, node->s_name);
-		if (c < 0) {
-			p = &node->name_node.rb_left;
-		} else {
-			p = &node->name_node.rb_right;
-		}
-#undef node
+		sd->s_parent->s_dir.subdirs++;
+
+	while (*node) {
+		struct sysfs_dirent *pos;
+		int result;
+
+		pos = to_sysfs_dirent(*node);
+		parent = *node;
+		result = sysfs_sd_compare(sd, pos);
+		if (result < 0)
+			node = &pos->s_rb.rb_left;
+		else if (result > 0)
+			node = &pos->s_rb.rb_right;
+		else
+			return -EEXIST;
 	}
-	rb_link_node(&sd->name_node, parent, p);
-	rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree);
+	/* add new node and rebalance the tree */
+	rb_link_node(&sd->s_rb, parent, node);
+	rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
+	return 0;
 }
 
 /**
- *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
+ *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
  *	@sd: sysfs_dirent of interest
  *
- *	Unlink @sd from its sibling list which starts from
+ *	Unlink @sd from its sibling rbtree which starts from
  *	sd->s_parent->s_dir.children.
  *
  *	Locking:
@@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 	if (sysfs_type(sd) == SYSFS_DIR)
 		sd->s_parent->s_dir.subdirs--;
 
-	rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree);
-	rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
+	rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
 }
 
 /**
@@ -198,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
 	rwsem_release(&sd->dep_map, 1, _RET_IP_);
 }
 
-static int sysfs_alloc_ino(ino_t *pino)
+static int sysfs_alloc_ino(unsigned int *pino)
 {
 	int ino, rc;
 
@@ -217,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino)
 	return rc;
 }
 
-static void sysfs_free_ino(ino_t ino)
+static void sysfs_free_ino(unsigned int ino)
 {
 	spin_lock(&sysfs_ino_lock);
 	ida_remove(&sysfs_ino_ida, ino);
@@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
 int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
 	struct sysfs_inode_attrs *ps_iattr;
+	int ret;
 
 	if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
 		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 		return -EINVAL;
 	}
 
-	if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
-		return -EEXIST;
-
+	sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	sd->s_parent = sysfs_get(acxt->parent_sd);
 
-	sysfs_link_sibling(sd);
+	ret = sysfs_link_sibling(sd);
+	if (ret)
+		return ret;
 
 	/* Update timestamps on the parent */
 	ps_iattr = acxt->parent_sd->s_iattr;
@@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 				       const void *ns,
 				       const unsigned char *name)
 {
-	struct rb_node *p = parent_sd->s_dir.name_tree.rb_node;
-	struct sysfs_dirent *found = NULL;
+	struct rb_node *node = parent_sd->s_dir.children.rb_node;
+	unsigned int hash;
 
 	if (!!sysfs_ns_type(parent_sd) != !!ns) {
 		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 		return NULL;
 	}
 
-	while (p) {
-		int c;
-#define node	rb_entry(p, struct sysfs_dirent, name_node)
-		c = strcmp(name, node->s_name);
-		if (c < 0) {
-			p = node->name_node.rb_left;
-		} else if (c > 0) {
-			p = node->name_node.rb_right;
-		} else {
-			found = node;
-			p = node->name_node.rb_left;
-		}
-#undef node
-	}
-
-	if (found) {
-		while (found->s_ns != ns) {
-			p = rb_next(&found->name_node);
-			if (!p)
-				return NULL;
-			found = rb_entry(p, struct sysfs_dirent, name_node);
-			if (strcmp(name, found->s_name))
-				return NULL;
-		}
+	hash = sysfs_name_hash(ns, name);
+	while (node) {
+		struct sysfs_dirent *sd;
+		int result;
+
+		sd = to_sysfs_dirent(node);
+		result = sysfs_name_compare(hash, ns, name, sd);
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return sd;
 	}
-
-	return found;
+	return NULL;
 }
 
 /**
@@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 
 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
 	sysfs_addrm_start(&acxt, dir_sd);
-	pos = rb_first(&dir_sd->s_dir.inode_tree);
+	pos = rb_first(&dir_sd->s_dir.children);
 	while (pos) {
-		struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node);
+		struct sysfs_dirent *sd = to_sysfs_dirent(pos);
 		pos = rb_next(pos);
 		if (sysfs_type(sd) != SYSFS_DIR)
 			sysfs_remove_one(&acxt, sd);
@@ -863,6 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 
 		dup_name = sd->s_name;
 		sd->s_name = new_name;
+		sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	}
 
 	/* Move to the appropriate place in the appropriate directories rbtree. */
@@ -919,38 +935,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
 }
 
 static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
-	struct sysfs_dirent *parent_sd,	ino_t ino, struct sysfs_dirent *pos)
+	struct sysfs_dirent *parent_sd,	loff_t hash, struct sysfs_dirent *pos)
 {
 	if (pos) {
 		int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
 			pos->s_parent == parent_sd &&
-			ino == pos->s_ino;
+			hash == pos->s_hash;
 		sysfs_put(pos);
 		if (!valid)
 			pos = NULL;
 	}
-	if (!pos && (ino > 1) && (ino < INT_MAX)) {
-		struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node;
-		while (p) {
-#define node	rb_entry(p, struct sysfs_dirent, inode_node)
-			if (ino < node->s_ino) {
-				pos = node;
-				p = node->inode_node.rb_left;
-			} else if (ino > node->s_ino) {
-				p = node->inode_node.rb_right;
-			} else {
-				pos = node;
+	if (!pos && (hash > 1) && (hash < INT_MAX)) {
+		struct rb_node *node = parent_sd->s_dir.children.rb_node;
+		while (node) {
+			pos = to_sysfs_dirent(node);
+
+			if (hash < pos->s_hash)
+				node = node->rb_left;
+			else if (hash > pos->s_hash)
+				node = node->rb_right;
+			else
 				break;
-			}
-#undef node
 		}
 	}
+	/* Skip over entries in the wrong namespace */
 	while (pos && pos->s_ns != ns) {
-		struct rb_node *p = rb_next(&pos->inode_node);
-		if (!p)
+		struct rb_node *node = rb_next(&pos->s_rb);
+		if (!node)
 			pos = NULL;
 		else
-			pos = rb_entry(p, struct sysfs_dirent, inode_node);
+			pos = to_sysfs_dirent(node);
 	}
 	return pos;
 }
@@ -960,11 +974,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
 {
 	pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
 	if (pos) do {
-		struct rb_node *p = rb_next(&pos->inode_node);
-		if (!p)
+		struct rb_node *node = rb_next(&pos->s_rb);
+		if (!node)
 			pos = NULL;
 		else
-			pos = rb_entry(p, struct sysfs_dirent, inode_node);
+			pos = to_sysfs_dirent(node);
 	} while (pos && pos->s_ns != ns);
 	return pos;
 }
@@ -1006,7 +1020,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		len = strlen(name);
 		ino = pos->s_ino;
 		type = dt_type(pos);
-		filp->f_pos = ino;
+		filp->f_pos = pos->s_hash;
 		filp->private_data = sysfs_get(pos);
 
 		mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 85eb81683a2..feb2d69396c 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
 	void *old_secdata;
 	size_t old_secdata_len;
 
-	iattrs = sd->s_iattr;
-	if (!iattrs)
-		iattrs = sysfs_init_inode_attrs(sd);
-	if (!iattrs)
-		return -ENOMEM;
+	if (!sd->s_iattr) {
+		sd->s_iattr = sysfs_init_inode_attrs(sd);
+		if (!sd->s_iattr)
+			return -ENOMEM;
+	}
 
+	iattrs = sd->s_iattr;
 	old_secdata = iattrs->ia_secdata;
 	old_secdata_len = iattrs->ia_secdata_len;
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e34f0d99ea4..52c3bdb66a8 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = {
 	.s_name		= "",
 	.s_count	= ATOMIC_INIT(1),
 	.s_flags	= SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
-	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+	.s_mode		= S_IFDIR | S_IRUGO | S_IXUGO,
 	.s_ino		= 1,
 };
 
@@ -61,10 +61,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* instantiate and link root dentry */
-	root = d_alloc_root(inode);
+	root = d_make_root(inode);
 	if (!root) {
 		pr_debug("%s: could not get root dentry!\n",__func__);
-		iput(inode);
 		return -ENOMEM;
 	}
 	root->d_fsdata = &sysfs_root;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7484a36ee67..661a9639570 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -20,9 +20,8 @@ struct sysfs_elem_dir {
 	struct kobject		*kobj;
 
 	unsigned long		subdirs;
-
-	struct rb_root		inode_tree;
-	struct rb_root		name_tree;
+	/* children rbtree starts here and goes through sd->s_rb */
+	struct rb_root		children;
 };
 
 struct sysfs_elem_symlink {
@@ -62,8 +61,7 @@ struct sysfs_dirent {
 	struct sysfs_dirent	*s_parent;
 	const char		*s_name;
 
-	struct rb_node		inode_node;
-	struct rb_node		name_node;
+	struct rb_node		s_rb;
 
 	union {
 		struct completion	*completion;
@@ -71,6 +69,7 @@ struct sysfs_dirent {
 	} u;
 
 	const void		*s_ns; /* namespace tag */
+	unsigned int		s_hash; /* ns + name hash */
 	union {
 		struct sysfs_elem_dir		s_dir;
 		struct sysfs_elem_symlink	s_symlink;
@@ -78,9 +77,9 @@ struct sysfs_dirent {
 		struct sysfs_elem_bin_attr	s_bin_attr;
 	};
 
-	unsigned int		s_flags;
+	unsigned short		s_flags;
 	umode_t 		s_mode;
-	ino_t			s_ino;
+	unsigned int		s_ino;
 	struct sysfs_inode_attrs *s_iattr;
 };
 
@@ -95,11 +94,11 @@ struct sysfs_dirent {
 #define SYSFS_ACTIVE_REF		(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
 
 /* identify any namespace tag on sysfs_dirents */
-#define SYSFS_NS_TYPE_MASK		0xff00
+#define SYSFS_NS_TYPE_MASK		0xf00
 #define SYSFS_NS_TYPE_SHIFT		8
 
 #define SYSFS_FLAG_MASK			~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
-#define SYSFS_FLAG_REMOVED		0x020000
+#define SYSFS_FLAG_REMOVED		0x02000
 
 static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b217797e621..d7466e29361 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -121,9 +121,6 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 {
 	struct inode *inode = old_dentry->d_inode;
 
-	if (inode->i_nlink >= SYSV_SB(inode->i_sb)->s_link_max)
-		return -EMLINK;
-
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	ihold(inode);
@@ -134,10 +131,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
 	struct inode * inode;
-	int err = -EMLINK;
+	int err;
 
-	if (dir->i_nlink >= SYSV_SB(dir->i_sb)->s_link_max) 
-		goto out;
 	inode_inc_link_count(dir);
 
 	inode = sysv_new_inode(dir, S_IFDIR|mode);
@@ -251,11 +246,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
-				goto out_dir;
-		}
 		err = sysv_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index f60c196913e..7491c33b646 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -44,7 +44,7 @@ enum {
 	JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60
 };
 
-static void detected_xenix(struct sysv_sb_info *sbi)
+static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct buffer_head *bh1 = sbi->s_bh1;
 	struct buffer_head *bh2 = sbi->s_bh2;
@@ -59,7 +59,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
 		sbd2 = (struct xenix_super_block *) (bh2->b_data - 512);
 	}
 
-	sbi->s_link_max = XENIX_LINK_MAX;
+	*max_links = XENIX_LINK_MAX;
 	sbi->s_fic_size = XENIX_NICINOD;
 	sbi->s_flc_size = XENIX_NICFREE;
 	sbi->s_sbd1 = (char *)sbd1;
@@ -75,7 +75,7 @@ static void detected_xenix(struct sysv_sb_info *sbi)
 	sbi->s_nzones = fs32_to_cpu(sbi, sbd1->s_fsize);
 }
 
-static void detected_sysv4(struct sysv_sb_info *sbi)
+static void detected_sysv4(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct sysv4_super_block * sbd;
 	struct buffer_head *bh1 = sbi->s_bh1;
@@ -86,7 +86,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
 	else
 		sbd = (struct sysv4_super_block *) bh2->b_data;
 
-	sbi->s_link_max = SYSV_LINK_MAX;
+	*max_links = SYSV_LINK_MAX;
 	sbi->s_fic_size = SYSV_NICINOD;
 	sbi->s_flc_size = SYSV_NICFREE;
 	sbi->s_sbd1 = (char *)sbd;
@@ -103,7 +103,7 @@ static void detected_sysv4(struct sysv_sb_info *sbi)
 	sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
 }
 
-static void detected_sysv2(struct sysv_sb_info *sbi)
+static void detected_sysv2(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct sysv2_super_block *sbd;
 	struct buffer_head *bh1 = sbi->s_bh1;
@@ -114,7 +114,7 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
 	else
 		sbd = (struct sysv2_super_block *) bh2->b_data;
 
-	sbi->s_link_max = SYSV_LINK_MAX;
+	*max_links = SYSV_LINK_MAX;
 	sbi->s_fic_size = SYSV_NICINOD;
 	sbi->s_flc_size = SYSV_NICFREE;
 	sbi->s_sbd1 = (char *)sbd;
@@ -131,14 +131,14 @@ static void detected_sysv2(struct sysv_sb_info *sbi)
 	sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
 }
 
-static void detected_coherent(struct sysv_sb_info *sbi)
+static void detected_coherent(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct coh_super_block * sbd;
 	struct buffer_head *bh1 = sbi->s_bh1;
 
 	sbd = (struct coh_super_block *) bh1->b_data;
 
-	sbi->s_link_max = COH_LINK_MAX;
+	*max_links = COH_LINK_MAX;
 	sbi->s_fic_size = COH_NICINOD;
 	sbi->s_flc_size = COH_NICFREE;
 	sbi->s_sbd1 = (char *)sbd;
@@ -154,12 +154,12 @@ static void detected_coherent(struct sysv_sb_info *sbi)
 	sbi->s_nzones = fs32_to_cpu(sbi, sbd->s_fsize);
 }
 
-static void detected_v7(struct sysv_sb_info *sbi)
+static void detected_v7(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct buffer_head *bh2 = sbi->s_bh2;
 	struct v7_super_block *sbd = (struct v7_super_block *)bh2->b_data;
 
-	sbi->s_link_max = V7_LINK_MAX;
+	*max_links = V7_LINK_MAX;
 	sbi->s_fic_size = V7_NICINOD;
 	sbi->s_flc_size = V7_NICFREE;
 	sbi->s_sbd1 = (char *)sbd;
@@ -290,7 +290,7 @@ static char *flavour_names[] = {
 	[FSTYPE_AFS]	= "AFS",
 };
 
-static void (*flavour_setup[])(struct sysv_sb_info *) = {
+static void (*flavour_setup[])(struct sysv_sb_info *, unsigned *) = {
 	[FSTYPE_XENIX]	= detected_xenix,
 	[FSTYPE_SYSV4]	= detected_sysv4,
 	[FSTYPE_SYSV2]	= detected_sysv2,
@@ -310,7 +310,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 
 	sbi->s_firstinodezone = 2;
 
-	flavour_setup[sbi->s_type](sbi);
+	flavour_setup[sbi->s_type](sbi, &sb->s_max_links);
 	
 	sbi->s_truncate = 1;
 	sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone;
@@ -341,9 +341,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 		printk("SysV FS: get root inode failed\n");
 		return 0;
 	}
-	sb->s_root = d_alloc_root(root_inode);
+	sb->s_root = d_make_root(root_inode);
 	if (!sb->s_root) {
-		iput(root_inode);
 		printk("SysV FS: get root dentry failed\n");
 		return 0;
 	}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 0e4b821c569..11b07672f6c 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -24,7 +24,6 @@ struct sysv_sb_info {
 	char	       s_bytesex;	/* bytesex (le/be/pdp) */
 	char	       s_truncate;	/* if 1: names > SYSV_NAMELEN chars are truncated */
 					/* if 0: they are disallowed (ENAMETOOLONG) */
-	nlink_t        s_link_max;	/* max number of hard links to a file */
 	unsigned int   s_inodes_per_block;	/* number of inodes per block */
 	unsigned int   s_inodes_per_block_1;	/* inodes_per_block - 1 */
 	unsigned int   s_inodes_per_block_bits;	/* log2(inodes_per_block) */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f9c234bf33d..5c8f6dc1d28 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1042,10 +1042,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	 * the page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
 	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	if (i_size > synced_i_size) {
 		err = inode->i_sb->s_op->write_inode(inode, NULL);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 63765d58445..76e4e0566ad 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2076,15 +2076,13 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_umount;
 	}
 
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root)
-		goto out_iput;
+		goto out_umount;
 
 	mutex_unlock(&c->umount_mutex);
 	return 0;
 
-out_iput:
-	iput(root);
 out_umount:
 	ubifs_umount(c);
 out_unlock:
diff --git a/fs/udf/file.c b/fs/udf/file.c
index dca0c3881e8..7f3f7ba3df6 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,10 +87,10 @@ static int udf_adinicb_write_end(struct file *file,
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page);
 	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
 		kaddr + offset, copied);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 
 	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
@@ -201,12 +201,10 @@ out:
 static int udf_release_file(struct inode *inode, struct file *filp)
 {
 	if (filp->f_mode & FMODE_WRITE) {
-		mutex_lock(&inode->i_mutex);
 		down_write(&UDF_I(inode)->i_data_sem);
 		udf_discard_prealloc(inode);
 		udf_truncate_tail_extent(inode);
 		up_write(&UDF_I(inode)->i_data_sem);
-		mutex_unlock(&inode->i_mutex);
 	}
 	return 0;
 }
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 08bf46edf9c..38de8f234b9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,8 +32,6 @@
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
 
-enum { UDF_MAX_LINKS = 0xffff };
-
 static inline int udf_match(int len1, const unsigned char *name1, int len2,
 			    const unsigned char *name2)
 {
@@ -649,10 +647,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
-	err = -EMLINK;
-	if (dir->i_nlink >= UDF_MAX_LINKS)
-		goto out;
-
 	err = -EIO;
 	inode = udf_new_inode(dir, S_IFDIR | mode, &err);
 	if (!inode)
@@ -1032,9 +1026,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	struct fileIdentDesc cfi, *fi;
 	int err;
 
-	if (inode->i_nlink >= UDF_MAX_LINKS)
-		return -EMLINK;
-
 	fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
 	if (!fi) {
 		return err;
@@ -1126,10 +1117,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
 				old_dir->i_ino)
 			goto end_rename;
-
-		retval = -EMLINK;
-		if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
-			goto end_rename;
 	}
 	if (!nfi) {
 		nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index c09a84daaf5..85067b4c7e1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,6 +75,8 @@
 
 #define UDF_DEFAULT_BLOCKSIZE 2048
 
+enum { UDF_MAX_LINKS = 0xffff };
+
 /* These are the "meat" - everything else is stuffing */
 static int udf_fill_super(struct super_block *, void *, int);
 static void udf_put_super(struct super_block *);
@@ -2035,13 +2037,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	}
 
 	/* Allocate a dentry for the root inode */
-	sb->s_root = d_alloc_root(inode);
+	sb->s_root = d_make_root(inode);
 	if (!sb->s_root) {
 		udf_err(sb, "Couldn't allocate root dentry\n");
-		iput(inode);
 		goto error_out;
 	}
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_max_links = UDF_MAX_LINKS;
 	return 0;
 
 error_out:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 38cac199edf..a2281cadefa 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -166,10 +166,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 	int error;
 
 	lock_ufs(dir->i_sb);
-	if (inode->i_nlink >= UFS_LINK_MAX) {
-		unlock_ufs(dir->i_sb);
-		return -EMLINK;
-	}
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
@@ -183,10 +179,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
 	struct inode * inode;
-	int err = -EMLINK;
-
-	if (dir->i_nlink >= UFS_LINK_MAX)
-		goto out;
+	int err;
 
 	lock_ufs(dir->i_sb);
 	inode_inc_link_count(dir);
@@ -305,11 +298,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
 	} else {
-		if (dir_de) {
-			err = -EMLINK;
-			if (new_dir->i_nlink >= UFS_LINK_MAX)
-				goto out_dir;
-		}
 		err = ufs_add_link(new_dentry, old_inode);
 		if (err)
 			goto out_dir;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5246ee3e560..f636f6b460d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1157,16 +1157,17 @@ magic_found:
 			    "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
 		uspi->s_maxsymlinklen = maxsymlen;
 	}
+	sb->s_max_links = UFS_LINK_MAX;
 
 	inode = ufs_iget(sb, UFS_ROOTINO);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto failed;
 	}
-	sb->s_root = d_alloc_root(inode);
+	sb->s_root = d_make_root(inode);
 	if (!sb->s_root) {
 		ret = -ENOMEM;
-		goto dalloc_failed;
+		goto failed;
 	}
 
 	ufs_setup_cstotal(sb);
@@ -1180,8 +1181,6 @@ magic_found:
 	UFSD("EXIT\n");
 	return 0;
 
-dalloc_failed:
-	iput(inode);
 failed:
 	if (ubh)
 		ubh_brelse_uspi (uspi);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index cbcb7bea38e..53db20ee3e7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -139,10 +139,10 @@ xfs_qm_adjust_dqtimers(
 
 	if (!d->d_btimer) {
 		if ((d->d_blk_softlimit &&
-		     (be64_to_cpu(d->d_bcount) >=
+		     (be64_to_cpu(d->d_bcount) >
 		      be64_to_cpu(d->d_blk_softlimit))) ||
 		    (d->d_blk_hardlimit &&
-		     (be64_to_cpu(d->d_bcount) >=
+		     (be64_to_cpu(d->d_bcount) >
 		      be64_to_cpu(d->d_blk_hardlimit)))) {
 			d->d_btimer = cpu_to_be32(get_seconds() +
 					mp->m_quotainfo->qi_btimelimit);
@@ -151,10 +151,10 @@ xfs_qm_adjust_dqtimers(
 		}
 	} else {
 		if ((!d->d_blk_softlimit ||
-		     (be64_to_cpu(d->d_bcount) <
+		     (be64_to_cpu(d->d_bcount) <=
 		      be64_to_cpu(d->d_blk_softlimit))) &&
 		    (!d->d_blk_hardlimit ||
-		    (be64_to_cpu(d->d_bcount) <
+		    (be64_to_cpu(d->d_bcount) <=
 		     be64_to_cpu(d->d_blk_hardlimit)))) {
 			d->d_btimer = 0;
 		}
@@ -162,10 +162,10 @@ xfs_qm_adjust_dqtimers(
 
 	if (!d->d_itimer) {
 		if ((d->d_ino_softlimit &&
-		     (be64_to_cpu(d->d_icount) >=
+		     (be64_to_cpu(d->d_icount) >
 		      be64_to_cpu(d->d_ino_softlimit))) ||
 		    (d->d_ino_hardlimit &&
-		     (be64_to_cpu(d->d_icount) >=
+		     (be64_to_cpu(d->d_icount) >
 		      be64_to_cpu(d->d_ino_hardlimit)))) {
 			d->d_itimer = cpu_to_be32(get_seconds() +
 					mp->m_quotainfo->qi_itimelimit);
@@ -174,10 +174,10 @@ xfs_qm_adjust_dqtimers(
 		}
 	} else {
 		if ((!d->d_ino_softlimit ||
-		     (be64_to_cpu(d->d_icount) <
+		     (be64_to_cpu(d->d_icount) <=
 		      be64_to_cpu(d->d_ino_softlimit)))  &&
 		    (!d->d_ino_hardlimit ||
-		     (be64_to_cpu(d->d_icount) <
+		     (be64_to_cpu(d->d_icount) <=
 		      be64_to_cpu(d->d_ino_hardlimit)))) {
 			d->d_itimer = 0;
 		}
@@ -185,10 +185,10 @@ xfs_qm_adjust_dqtimers(
 
 	if (!d->d_rtbtimer) {
 		if ((d->d_rtb_softlimit &&
-		     (be64_to_cpu(d->d_rtbcount) >=
+		     (be64_to_cpu(d->d_rtbcount) >
 		      be64_to_cpu(d->d_rtb_softlimit))) ||
 		    (d->d_rtb_hardlimit &&
-		     (be64_to_cpu(d->d_rtbcount) >=
+		     (be64_to_cpu(d->d_rtbcount) >
 		      be64_to_cpu(d->d_rtb_hardlimit)))) {
 			d->d_rtbtimer = cpu_to_be32(get_seconds() +
 					mp->m_quotainfo->qi_rtbtimelimit);
@@ -197,10 +197,10 @@ xfs_qm_adjust_dqtimers(
 		}
 	} else {
 		if ((!d->d_rtb_softlimit ||
-		     (be64_to_cpu(d->d_rtbcount) <
+		     (be64_to_cpu(d->d_rtbcount) <=
 		      be64_to_cpu(d->d_rtb_softlimit))) &&
 		    (!d->d_rtb_hardlimit ||
-		     (be64_to_cpu(d->d_rtbcount) <
+		     (be64_to_cpu(d->d_rtbcount) <=
 		      be64_to_cpu(d->d_rtb_hardlimit)))) {
 			d->d_rtbtimer = 0;
 		}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 15ff5392fb6..0ed9ee77937 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1981,7 +1981,7 @@ xfs_qm_dqcheck(
 
 	if (!errs && ddq->d_id) {
 		if (ddq->d_blk_softlimit &&
-		    be64_to_cpu(ddq->d_bcount) >=
+		    be64_to_cpu(ddq->d_bcount) >
 				be64_to_cpu(ddq->d_blk_softlimit)) {
 			if (!ddq->d_btimer) {
 				if (flags & XFS_QMOPT_DOWARN)
@@ -1992,7 +1992,7 @@ xfs_qm_dqcheck(
 			}
 		}
 		if (ddq->d_ino_softlimit &&
-		    be64_to_cpu(ddq->d_icount) >=
+		    be64_to_cpu(ddq->d_icount) >
 				be64_to_cpu(ddq->d_ino_softlimit)) {
 			if (!ddq->d_itimer) {
 				if (flags & XFS_QMOPT_DOWARN)
@@ -2003,7 +2003,7 @@ xfs_qm_dqcheck(
 			}
 		}
 		if (ddq->d_rtb_softlimit &&
-		    be64_to_cpu(ddq->d_rtbcount) >=
+		    be64_to_cpu(ddq->d_rtbcount) >
 				be64_to_cpu(ddq->d_rtb_softlimit)) {
 			if (!ddq->d_rtbtimer) {
 				if (flags & XFS_QMOPT_DOWARN)
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index eafbcff81f3..711a86e39ff 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -813,11 +813,11 @@ xfs_qm_export_dquot(
 	     (XFS_IS_OQUOTA_ENFORCED(mp) &&
 			(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
 	    dst->d_id != 0) {
-		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+		if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
 		    (dst->d_blk_softlimit > 0)) {
 			ASSERT(dst->d_btimer != 0);
 		}
-		if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+		if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
 		    (dst->d_ino_softlimit > 0)) {
 			ASSERT(dst->d_itimer != 0);
 		}
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 866de277079..e44ef7ee8ce 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -118,17 +118,6 @@ xfs_rename(
 	new_parent = (src_dp != target_dp);
 	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
 
-	if (src_is_directory) {
-		/*
-		 * Check for link count overflow on target_dp
-		 */
-		if (target_ip == NULL && new_parent &&
-		    target_dp->i_d.di_nlink >= XFS_MAXLINK) {
-			error = XFS_ERROR(EMLINK);
-			goto std_return;
-		}
-	}
-
 	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
 				inodes, &num_inodes);
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee5b695c99a..baf40e378d3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1341,6 +1341,7 @@ xfs_fs_fill_super(
 	sb->s_blocksize = mp->m_sb.sb_blocksize;
 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
 	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+	sb->s_max_links = XFS_MAXLINK;
 	sb->s_time_gran = 1;
 	set_posix_acl_flag(sb);
 
@@ -1361,10 +1362,10 @@ xfs_fs_fill_super(
 		error = EINVAL;
 		goto out_syncd_stop;
 	}
-	sb->s_root = d_alloc_root(root);
+	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		error = ENOMEM;
-		goto out_iput;
+		goto out_syncd_stop;
 	}
 
 	return 0;
@@ -1383,8 +1384,6 @@ xfs_fs_fill_super(
  out:
 	return -error;
 
- out_iput:
-	iput(root);
  out_syncd_stop:
 	xfs_syncd_stop(mp);
  out_unmount:
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 329b06aba1c..7adcdf15ae0 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1151,8 +1151,8 @@ xfs_trans_add_item(
 {
 	struct xfs_log_item_desc *lidp;
 
-	ASSERT(lip->li_mountp = tp->t_mountp);
-	ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+	ASSERT(lip->li_mountp == tp->t_mountp);
+	ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
 
 	lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
 
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4d00ee67792..c4ba366d24e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -649,12 +649,12 @@ xfs_trans_dqresv(
 			 * nblks.
 			 */
 			if (hardlimit > 0ULL &&
-			    hardlimit <= nblks + *resbcountp) {
+			    hardlimit < nblks + *resbcountp) {
 				xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
 				goto error_return;
 			}
 			if (softlimit > 0ULL &&
-			    softlimit <= nblks + *resbcountp) {
+			    softlimit < nblks + *resbcountp) {
 				if ((timer != 0 && get_seconds() > timer) ||
 				    (warns != 0 && warns >= warnlimit)) {
 					xfs_quota_warn(mp, dqp,
@@ -677,11 +677,13 @@ xfs_trans_dqresv(
 			if (!softlimit)
 				softlimit = q->qi_isoftlimit;
 
-			if (hardlimit > 0ULL && count >= hardlimit) {
+			if (hardlimit > 0ULL &&
+			    hardlimit < ninos + count) {
 				xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
 				goto error_return;
 			}
-			if (softlimit > 0ULL && count >= softlimit) {
+			if (softlimit > 0ULL &&
+			    softlimit < ninos + count) {
 				if  ((timer != 0 && get_seconds() > timer) ||
 				     (warns != 0 && warns >= warnlimit)) {
 					xfs_quota_warn(mp, dqp,
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 89dbb4a5087..79c05ac85bf 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -296,8 +296,6 @@ xfs_bumplink(
 	xfs_trans_t *tp,
 	xfs_inode_t *ip)
 {
-	if (ip->i_d.di_nlink >= XFS_MAXLINK)
-		return XFS_ERROR(EMLINK);
 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
 	ASSERT(ip->i_d.di_nlink > 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ebdb88840a4..64981d7e737 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -917,14 +917,6 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = B_TRUE;
 
-	/*
-	 * Check for directory link count overflow.
-	 */
-	if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) {
-		error = XFS_ERROR(EMLINK);
-		goto out_trans_cancel;
-	}
-
 	xfs_bmap_init(&free_list, &first_block);
 
 	/*
@@ -1429,14 +1421,6 @@ xfs_link(
 	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
 
 	/*
-	 * If the source has too many links, we can't make any more to it.
-	 */
-	if (sip->i_d.di_nlink >= XFS_MAXLINK) {
-		error = XFS_ERROR(EMLINK);
-		goto error_return;
-	}
-
-	/*
 	 * If we are using project inheritance, we only allow hard link
 	 * creation in our tree when the project IDs are the same; else
 	 * the tree quota mechanism could be circumvented.