From dd4c4f17d722ffeb2515bf781400675a30fcead7 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 28 May 2010 16:32:14 -0400
Subject: suspend: Move NVS save/restore code to generic suspend functionality

Saving platform non-volatile state may be required for suspend to RAM as
well as hibernation. Move it to more generic code.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/suspend.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5e781d824e6..bc7d6bb4cd8 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -256,22 +256,22 @@ static inline int hibernate(void) { return -ENOSYS; }
 static inline bool system_entering_hibernation(void) { return false; }
 #endif /* CONFIG_HIBERNATION */
 
-#ifdef CONFIG_HIBERNATION_NVS
-extern int hibernate_nvs_register(unsigned long start, unsigned long size);
-extern int hibernate_nvs_alloc(void);
-extern void hibernate_nvs_free(void);
-extern void hibernate_nvs_save(void);
-extern void hibernate_nvs_restore(void);
-#else /* CONFIG_HIBERNATION_NVS */
-static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
+#ifdef CONFIG_SUSPEND_NVS
+extern int suspend_nvs_register(unsigned long start, unsigned long size);
+extern int suspend_nvs_alloc(void);
+extern void suspend_nvs_free(void);
+extern void suspend_nvs_save(void);
+extern void suspend_nvs_restore(void);
+#else /* CONFIG_SUSPEND_NVS */
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
 {
 	return 0;
 }
-static inline int hibernate_nvs_alloc(void) { return 0; }
-static inline void hibernate_nvs_free(void) {}
-static inline void hibernate_nvs_save(void) {}
-static inline void hibernate_nvs_restore(void) {}
-#endif /* CONFIG_HIBERNATION_NVS */
+static inline int suspend_nvs_alloc(void) { return 0; }
+static inline void suspend_nvs_free(void) {}
+static inline void suspend_nvs_save(void) {}
+static inline void suspend_nvs_restore(void) {}
+#endif /* CONFIG_SUSPEND_NVS */
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
-- 
cgit v1.2.3-70-g09d2


From c5444198ca210498e8ac0ba121b4cd3537aa12f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Jun 2010 18:15:15 +0200
Subject: writeback: simplify and split bdi_start_writeback

bdi_start_writeback now never gets a superblock passed, so we can just remove
that case.  And to further untangle the code and flatten the call stack
split it into two trivial helpers for it's two callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c           | 32 ++++++++++++++++++++------------
 include/linux/backing-dev.h |  4 ++--
 mm/page-writeback.c         |  5 ++---
 3 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4fcca4f7494..0079bf59b58 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -200,7 +200,6 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args)
 /**
  * bdi_start_writeback - start writeback
  * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
  * @nr_pages: the number of pages to write
  *
  * Description:
@@ -209,25 +208,34 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args)
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
 {
 	struct wb_writeback_args args = {
-		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.nr_pages	= nr_pages,
 		.range_cyclic	= 1,
 	};
 
-	/*
-	 * We treat @nr_pages=0 as the special case to do background writeback,
-	 * ie. to sync pages until the background dirty threshold is reached.
-	 */
-	if (!nr_pages) {
-		args.nr_pages = LONG_MAX;
-		args.for_background = 1;
-	}
+	bdi_alloc_queue_work(bdi, &args);
+}
 
+/**
+ * bdi_start_background_writeback - start background writeback
+ * @bdi: the backing device to write from
+ *
+ * Description:
+ *   This does WB_SYNC_NONE background writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ */
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
+{
+	struct wb_writeback_args args = {
+		.sync_mode	= WB_SYNC_NONE,
+		.nr_pages	= LONG_MAX,
+		.for_background = 1,
+		.range_cyclic	= 1,
+	};
 	bdi_alloc_queue_work(bdi, &args);
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index aee5f6ce166..9ae2889096b 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -105,8 +105,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-				long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_background_writeback(struct backing_dev_info *bdi);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_arm_supers_timer(void);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bbd396ac954..54f28bd493d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
 			       + global_page_state(NR_UNSTABLE_NFS))
 					  > background_thresh)))
-		bdi_start_writeback(bdi, NULL, 0);
+		bdi_start_background_writeback(bdi);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,9 +705,8 @@ void laptop_mode_timer_fn(unsigned long data)
 	 * We want to write everything out, not just down to the dirty
 	 * threshold
 	 */
-
 	if (bdi_has_dirty_io(&q->backing_dev_info))
-		bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
+		bdi_start_writeback(&q->backing_dev_info, nr_pages);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From dc66c74de6f4238020db3e2041d4aca5c5b3e9bc Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 2 Jun 2010 14:31:29 +0200
Subject: drbd: Fixed a race between disk-attach and unexpected state changes

This was a very hard to trigger race condition.

If we got a state packet from the peer, after drbd_nl_disk() has
already changed the disk state to D_NEGOTIATING but
after_state_ch() was not yet run by the worker, then receive_state()
might called drbd_sync_handshake(), which in turn crashed
when accessing p_uuid.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c | 2 --
 drivers/block/drbd/drbd_nl.c   | 6 ++++++
 include/linux/drbd.h           | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6b077f93acc..7258c95e895 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1236,8 +1236,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	/* Last part of the attaching process ... */
 	if (ns.conn >= C_CONNECTED &&
 	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
-		kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
-		mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
 		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
 		drbd_send_uuids(mdev);
 		drbd_send_state(mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 632e3245d1b..2151f18b21d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1114,6 +1114,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		mdev->new_state_tmp.i = ns.i;
 		ns.i = os.i;
 		ns.disk = D_NEGOTIATING;
+
+		/* We expect to receive up-to-date UUIDs soon.
+		   To avoid a race in receive_state, free p_uuid while
+		   holding req_lock. I.e. atomic with the state change */
+		kfree(mdev->p_uuid);
+		mdev->p_uuid = NULL;
 	}
 
 	rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 30da4ae4897..b8d2516668a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8rc2"
+#define REL_VERSION "8.3.8"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 94
-- 
cgit v1.2.3-70-g09d2


From da931a931da85218add949266238c54b5fecd37f Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 15 Jun 2010 09:52:37 +1000
Subject: agp: drop vmalloc flag.

Since the code that was too ugly to live is upstream, we can use it now,
instead of rolling our own.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/generic.c  | 4 +---
 include/linux/agp_backend.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 4b51982fd23..4e414417730 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -97,20 +97,18 @@ EXPORT_SYMBOL(agp_flush_chipset);
 void agp_alloc_page_array(size_t size, struct agp_memory *mem)
 {
 	mem->pages = NULL;
-	mem->vmalloc_flag = false;
 
 	if (size <= 2*PAGE_SIZE)
 		mem->pages = kmalloc(size, GFP_KERNEL | __GFP_NORETRY);
 	if (mem->pages == NULL) {
 		mem->pages = vmalloc(size);
-		mem->vmalloc_flag = true;
 	}
 }
 EXPORT_SYMBOL(agp_alloc_page_array);
 
 void agp_free_page_array(struct agp_memory *mem)
 {
-	if (mem->vmalloc_flag) {
+	if (is_vmalloc_addr(mem->pages)) {
 		vfree(mem->pages);
 	} else {
 		kfree(mem->pages);
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 9101ed64f80..09ea4a1e950 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -79,7 +79,6 @@ struct agp_memory {
 	u32 physical;
 	bool is_bound;
 	bool is_flushed;
-	bool vmalloc_flag;
 	/* list of agp_memory mapped to the aperture */
 	struct list_head mapped_list;
 	/* DMA-mapped addresses */
-- 
cgit v1.2.3-70-g09d2


From b70e4f0529c089b00d0a6da13106db4de1ada4c7 Mon Sep 17 00:00:00 2001
From: Wu Zhangjin <wuzhangjin@gmail.com>
Date: Mon, 21 Jun 2010 19:09:09 +0800
Subject: tracing: Fix undeclared ENOSYS in include/linux/tracepoint.h

The header file include/linux/tracepoint.h may be included without
include/linux/errno.h and then the compiler will fail on building for
undelcared ENOSYS. This patch fixes this problem via including <linux/errno.h>
to include/linux/tracepoint.h.

Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
LKML-Reference: <1277118549-622-1-git-send-email-wuzhangjin@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 9a59d1f98cd..103d1b61aac 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -14,6 +14,7 @@
  * See the file COPYING for more details.
  */
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
 
-- 
cgit v1.2.3-70-g09d2


From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:45:58 +0000
Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH

struct ethtool_rxnfc was originally defined in 2.6.27 for the
ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data
fields.  It was then extended in 2.6.30 to support various additional
commands.  These commands should have been defined to use a new
structure, but it is too late to change that now.

Since user-space may still be using the old structure definition
for the ETHTOOL_{G,S}RXFH commands, and since they do not need the
additional fields, only copy the originally defined fields to and
from user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  2 ++
 net/core/ethtool.c      | 36 +++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 276b40a1683..b4207ca3ad5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -379,6 +379,8 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	/* The rx flow hash value or the rule DB size */
 	__u64				data;
+	/* The following fields are not valid and must not be used for
+	 * the ETHTOOL_{G,X}RXFH commands. */
 	struct ethtool_rx_flow_spec	fs;
 	__u32				rule_cnt;
 	__u32				rule_locs[0];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3a7e9a48df..75e4ffeb8cc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -318,23 +318,33 @@ out:
 }
 
 static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
-	struct ethtool_rxnfc cmd;
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &info);
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 	void *rule_buf = NULL;
@@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&info, useraddr, sizeof(info)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
@@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 		goto err_out;
 
 	ret = -EFAULT;
-	if (copy_to_user(useraddr, &info, sizeof(info)))
+	if (copy_to_user(useraddr, &info, info_size))
 		goto err_out;
 
 	if (rule_buf) {
@@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
-		rc = ethtool_get_rxnfc(dev, useraddr);
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
 	case ETHTOOL_SRXCLSRLDEL:
 	case ETHTOOL_SRXCLSRLINS:
-		rc = ethtool_set_rxnfc(dev, useraddr);
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
-- 
cgit v1.2.3-70-g09d2


From 57439f878afafefad8836ebf5c49da2a0a746105 Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Thu, 24 Jun 2010 13:02:14 +1000
Subject: fs: fix superblock iteration race

list_for_each_entry_safe is not suitable to protect against concurrent
modification of the list. 6754af6 introduced a race in sb walking.

list_for_each_entry can use the trick of pinning the current entry in
the list before we drop and retake the lock because it subsequently
follows cur->next. However list_for_each_entry_safe saves n=cur->next
for following before entering the loop body, so when the lock is
dropped, n may be deleted.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Frank Mayhar <fmayhar@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c          |  2 ++
 fs/super.c           |  6 ++++++
 include/linux/list.h | 15 +++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a63..c8c78ba0782 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499..938119ab8dc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
+			/* lock was dropped, must reset next */
+			list_safe_reset_next(sb, n, s_list);
 			__put_super(sb);
 		}
 	}
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
diff --git a/include/linux/list.h b/include/linux/list.h
index 8392884a297..5d57a3a1fa1 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -544,6 +544,21 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     &pos->member != (head); 					\
 	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
 
+/**
+ * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
+ * @pos:	the loop cursor used in the list_for_each_entry_safe loop
+ * @n:		temporary storage used in list_for_each_entry_safe
+ * @member:	the name of the list_struct within the struct.
+ *
+ * list_safe_reset_next is not safe to use in general if the list may be
+ * modified concurrently (eg. the lock is dropped in the loop body). An
+ * exception to this is if the cursor element (pos) is pinned in the list,
+ * and list_safe_reset_next is called after re-taking the lock and before
+ * completing the current iteration of the loop body.
+ */
+#define list_safe_reset_next(pos, n, member)				\
+	n = list_entry(pos->member.next, typeof(*pos), member)
+
 /*
  * Double linked lists with a single pointer list head.
  * Mostly useful for hash tables where the two pointer list head is
-- 
cgit v1.2.3-70-g09d2


From 9c695203a7ddbe49dba5f22f4c941d24f47475df Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Tue, 29 Jun 2010 15:05:25 -0700
Subject: compiler-gcc.h: gcc-4.5 needs noclone and noinline on __naked
 functions

A __naked function is defined in C but with a body completely implemented
by asm(), including any prologue and epilogue.  These asm() bodies expect
standard calling conventions for parameter passing.  Older GCCs implement
that correctly, but 4.[56] currently do not, see GCC PR44290.  In the
Linux kernel this breaks ARM, causing most arch/arm/mm/copypage-*.c
modules to get miscompiled, resulting in kernel crashes during bootup.

Part of the kernel fix is to augment the __naked function attribute to
also imply noinline and noclone.  This patch implements that, and has been
verified to fix boot failures with gcc-4.5 compiled 2.6.34 and 2.6.35-rc1
kernels.  The patch is a no-op with older GCCs.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h  | 10 +++++++++-
 include/linux/compiler-gcc4.h |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 73dcf804bc9..0da5b187f12 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -58,8 +58,12 @@
  * naked functions because then mcount is called without stack and frame pointer
  * being set up and there is no chance to restore the lr register to the value
  * before mcount was called.
+ *
+ * The asm() bodies of naked functions often depend on standard calling conventions,
+ * therefore they must be noinline and noclone.  GCC 4.[56] currently fail to enforce
+ * this, so we must do so ourselves.  See GCC PR44290.
  */
-#define __naked				__attribute__((naked)) notrace
+#define __naked				__attribute__((naked)) noinline __noclone notrace
 
 #define __noreturn			__attribute__((noreturn))
 
@@ -85,3 +89,7 @@
 #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
 #define gcc_header(x) _gcc_header(x)
 #include gcc_header(__GNUC__)
+
+#if !defined(__noclone)
+#define __noclone	/* not needed */
+#endif
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 94dea3ffbfa..fcfa5b9a431 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -48,6 +48,10 @@
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() __builtin_unreachable()
+
+/* Mark a function definition as prohibited from being cloned. */
+#define __noclone	__attribute__((__noclone__))
+
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From c59690fa484c04ab96fe932241b569a09755a4d2 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 30 Jun 2010 00:53:53 -0700
Subject: Input: i8042 - mark stubs in i8042.h "static inline"

Otherwise we may run into following:

drivers/platform/built-in.o: In function `i8042_lock_chip':
/home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: multiple definition of `i8042_lock_chip'
drivers/input/serio/built-in.o:/home/test/ws2/projects/linux-2.6/include/linux/i8042.h:50: first defined here
...
make[1]: *** [drivers/built-in.o] Error 1
make: *** [drivers] Error 2

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/i8042.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 9bf6870ee5f..a986ff58894 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -46,31 +46,31 @@ int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
 
 #else
 
-void i8042_lock_chip(void)
+static inline void i8042_lock_chip(void)
 {
 }
 
-void i8042_unlock_chip(void)
+static inline void i8042_unlock_chip(void)
 {
 }
 
-int i8042_command(unsigned char *param, int command)
+static inline int i8042_command(unsigned char *param, int command)
 {
 	return -ENODEV;
 }
 
-bool i8042_check_port_owner(const struct serio *serio)
+static inline bool i8042_check_port_owner(const struct serio *serio)
 {
 	return false;
 }
 
-int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
 					struct serio *serio))
 {
 	return -ENODEV;
 }
 
-int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+static inline int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
 				       struct serio *serio))
 {
 	return -ENODEV;
-- 
cgit v1.2.3-70-g09d2


From 9b2c2ff7a1c04e69842254dd4afe0f8ad4efa439 Mon Sep 17 00:00:00 2001
From: Saeed Bishara <saeed@marvell.com>
Date: Sun, 27 Jun 2010 00:26:43 +0000
Subject: mv643xx_eth: use sw csum for big packets

Some controllers (KW, Dove) limits the TX IP/layer4 checksum offloading to a max size.

Signed-off-by: Saeed Bishara <saeed@marvell.com>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mv643xx_eth.c   | 9 +++++++--
 include/linux/mv643xx_eth.h | 5 +++++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index e345ec8cb47..73bb8ea6f54 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -289,6 +289,7 @@ struct mv643xx_eth_shared_private {
 	unsigned int t_clk;
 	int extended_rx_coal_limit;
 	int tx_bw_control;
+	int tx_csum_limit;
 };
 
 #define TX_BW_CONTROL_ABSENT		0
@@ -776,13 +777,16 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb)
 	l4i_chk = 0;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		int hdr_len;
 		int tag_bytes;
 
 		BUG_ON(skb->protocol != htons(ETH_P_IP) &&
 		       skb->protocol != htons(ETH_P_8021Q));
 
-		tag_bytes = (void *)ip_hdr(skb) - (void *)skb->data - ETH_HLEN;
-		if (unlikely(tag_bytes & ~12)) {
+		hdr_len = (void *)ip_hdr(skb) - (void *)skb->data;
+		tag_bytes = hdr_len - ETH_HLEN;
+		if (skb->len - hdr_len > mp->shared->tx_csum_limit ||
+		    unlikely(tag_bytes & ~12)) {
 			if (skb_checksum_help(skb) == 0)
 				goto no_csum;
 			kfree_skb(skb);
@@ -2666,6 +2670,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	 * Detect hardware parameters.
 	 */
 	msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000;
+	msp->tx_csum_limit = pd->tx_csum_limit ? pd->tx_csum_limit : 9 * 1024;
 	infer_hw_params(msp);
 
 	platform_set_drvdata(pdev, msp);
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index cbbbe9bfeca..30b0c4e78f9 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -19,6 +19,11 @@ struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
 	struct platform_device	*shared_smi;
 	unsigned int		t_clk;
+	/*
+	 * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default
+	 * limit of 9KiB will be used.
+	 */
+	int			tx_csum_limit;
 };
 
 #define MV643XX_ETH_PHY_ADDR_DEFAULT	0
-- 
cgit v1.2.3-70-g09d2


From b26c949755c06ec79e55a75817210083bd78fc9a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 23 Jun 2010 11:35:41 +1000
Subject: fb: fix colliding defines for fb flags.

When I added the flags I must have been using a 25 line terminal and missed the following flags.

The collided with flag has one user in staging despite being in-tree for 5 years.

I'm happy to push this via my drm tree unless someone really wants to do it.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Cc: stable@kernel.org
---
 include/linux/fb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 907ace3a64c..8e5a9dfb76b 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -786,8 +786,6 @@ struct fb_tile_ops {
 #define FBINFO_MISC_USEREVENT          0x10000 /* event request
 						  from userspace */
 #define FBINFO_MISC_TILEBLITTING       0x20000 /* use tile blitting */
-#define FBINFO_MISC_FIRMWARE           0x40000 /* a replaceable firmware
-						  inited framebuffer */
 
 /* A driver may set this flag to indicate that it does want a set_par to be
  * called every time when fbcon_switch is executed. The advantage is that with
@@ -801,6 +799,8 @@ struct fb_tile_ops {
  */
 #define FBINFO_MISC_ALWAYS_SETPAR   0x40000
 
+/* where the fb is a firmware driver, and can be replaced with a proper one */
+#define FBINFO_MISC_FIRMWARE        0x80000
 /*
  * Host and GPU endianness differ.
  */
-- 
cgit v1.2.3-70-g09d2


From 8c215bd3890c347dfb6a2db4779755f8b9c298a9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 1 Jul 2010 09:07:17 +0200
Subject: sched: Cure nr_iowait_cpu() users

Commit 0224cf4c5e (sched: Intoduce get_cpu_iowait_time_us())
broke things by not making sure preemption was indeed disabled
by the callers of nr_iowait_cpu() which took the iowait value of
the current cpu.

This resulted in a heap of preempt warnings. Cure this by making
nr_iowait_cpu() take a cpu number and fix up the callers to pass
in the right number.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Maxim Levitsky <maximlevitsky@gmail.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: linux-pm@lists.linux-foundation.org
LKML-Reference: <1277968037.1868.120.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/cpuidle/governors/menu.c |  4 ++--
 include/linux/sched.h            |  2 +-
 kernel/sched.c                   |  4 ++--
 kernel/time/tick-sched.c         | 16 ++++++++--------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 52ff8aa63f8..1b128702d30 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -143,7 +143,7 @@ static inline int which_bucket(unsigned int duration)
 	 * This allows us to calculate
 	 * E(duration)|iowait
 	 */
-	if (nr_iowait_cpu())
+	if (nr_iowait_cpu(smp_processor_id()))
 		bucket = BUCKETS/2;
 
 	if (duration < 10)
@@ -175,7 +175,7 @@ static inline int performance_multiplier(void)
 	mult += 2 * get_loadavg();
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
-	mult += 10 * nr_iowait_cpu();
+	mult += 10 * nr_iowait_cpu(smp_processor_id());
 
 	return mult;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f118809c953..747fcaedddb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -139,7 +139,7 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
-extern unsigned long nr_iowait_cpu(void);
+extern unsigned long nr_iowait_cpu(int cpu);
 extern unsigned long this_cpu_load(void);
 
 
diff --git a/kernel/sched.c b/kernel/sched.c
index a24d6d5d83f..f87abe3b017 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2864,9 +2864,9 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
-unsigned long nr_iowait_cpu(void)
+unsigned long nr_iowait_cpu(int cpu)
 {
-	struct rq *this = this_rq();
+	struct rq *this = cpu_rq(cpu);
 	return atomic_read(&this->nr_iowait);
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c03..1a6f828e57a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -154,14 +154,14 @@ static void tick_nohz_update_jiffies(ktime_t now)
  * Updates the per cpu time idle statistics counters
  */
 static void
-update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time)
+update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
 {
 	ktime_t delta;
 
 	if (ts->idle_active) {
 		delta = ktime_sub(now, ts->idle_entrytime);
 		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-		if (nr_iowait_cpu() > 0)
+		if (nr_iowait_cpu(cpu) > 0)
 			ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
 		ts->idle_entrytime = now;
 	}
@@ -175,19 +175,19 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 
-	update_ts_time_stats(ts, now, NULL);
+	update_ts_time_stats(cpu, ts, now, NULL);
 	ts->idle_active = 0;
 
 	sched_clock_idle_wakeup_event(0);
 }
 
-static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
 {
 	ktime_t now;
 
 	now = ktime_get();
 
-	update_ts_time_stats(ts, now, NULL);
+	update_ts_time_stats(cpu, ts, now, NULL);
 
 	ts->idle_entrytime = now;
 	ts->idle_active = 1;
@@ -216,7 +216,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 	if (!tick_nohz_enabled)
 		return -1;
 
-	update_ts_time_stats(ts, ktime_get(), last_update_time);
+	update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
 
 	return ktime_to_us(ts->idle_sleeptime);
 }
@@ -242,7 +242,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 	if (!tick_nohz_enabled)
 		return -1;
 
-	update_ts_time_stats(ts, ktime_get(), last_update_time);
+	update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
 
 	return ktime_to_us(ts->iowait_sleeptime);
 }
@@ -284,7 +284,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	 */
 	ts->inidle = 1;
 
-	now = tick_nohz_start_idle(ts);
+	now = tick_nohz_start_idle(cpu, ts);
 
 	/*
 	 * If this cpu is offline and it is the one which updates
-- 
cgit v1.2.3-70-g09d2


From c6353b4520788e34098bbf61c73fb9618ca7fdd6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 17 Jun 2010 11:42:22 +0200
Subject: ahci,ata_generic: let ata_generic handle new MBP w/ MCP89
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For yet unknown reason, MCP89 on MBP 7,1 doesn't work w/ ahci under
linux but the controller doesn't require explicit mode setting and
works fine with ata_generic.  Make ahci ignore the controller on MBP
7,1 and let ata_generic take it for now.

Reported in bko#15923.

  https://bugzilla.kernel.org/show_bug.cgi?id=15923

NVIDIA is investigating why ahci mode doesn't work.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peer Chen <pchen@nvidia.com>
Cc: stable@kernel.org
Reported-by: Anders Østhus <grapz666@gmail.com>
Reported-by: Andreas Graf <andreas_graf@csgraf.de>
Reported-by: Benoit Gschwind <gschwind@gnu-log.net>
Reported-by: Damien Cassou <damien.cassou@gmail.com>
Reported-by: tixetsal@juno.com
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c        | 10 ++++++++++
 drivers/ata/ata_generic.c |  6 ++++++
 include/linux/pci_ids.h   |  1 +
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8ca16f54e1e..f2522534ae6 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1053,6 +1053,16 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable)
 		return -ENODEV;
 
+	/*
+	 * For some reason, MCP89 on MacBook 7,1 doesn't work with
+	 * ahci, use ata_generic instead.
+	 */
+	if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
+	    pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA &&
+	    pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE &&
+	    pdev->subsystem_device == 0xcb89)
+		return -ENODEV;
+
 	/* Promise's PDC42819 is a SAS/SATA controller that has an AHCI mode.
 	 * At the moment, we can only use the AHCI mode. Let the users know
 	 * that for SAS drives they're out of luck.
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 573158a9668..d4ccf74c4c9 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -168,6 +168,12 @@ static struct pci_device_id ata_generic[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CENATEK,PCI_DEVICE_ID_CENATEK_IDE), },
+	/*
+	 * For some reason, MCP89 on MacBook 7,1 doesn't work with
+	 * ahci, use ata_generic instead.
+	 */
+	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA,
+	  PCI_VENDOR_ID_APPLE, 0xcb89, },
 #if !defined(CONFIG_PATA_TOSHIBA) && !defined(CONFIG_PATA_TOSHIBA_MODULE)
 	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),  },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4eb467910a4..3bedcc149c8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1261,6 +1261,7 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE       0x0759
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS     0x07D8
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS     0x0AA2
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP89_SATA	    0x0D85
 
 #define PCI_VENDOR_ID_IMS		0x10e0
 #define PCI_DEVICE_ID_IMS_TT128		0x9128
-- 
cgit v1.2.3-70-g09d2


From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 1 Jul 2010 13:21:57 +0000
Subject: net: decreasing real_num_tx_queues needs to flush qdisc

Reducing real_num_queues needs to flush the qdisc otherwise
skbs with queue_mappings greater then real_num_tx_queues can
be sent to the underlying driver.

The flow for this is,

dev_queue_xmit()
	dev_pick_tx()
		skb_tx_hash()  => hash using real_num_tx_queues
		skb_set_queue_mapping()
	...
	qdisc_enqueue_root() => enqueue skb on txq from hash
...
dev->real_num_tx_queues -= n
...
sch_direct_xmit()
	dev_hard_start_xmit()
		ndo_start_xmit(skb,dev) => skb queue set with old hash

skbs are enqueued on the qdisc with skb->queue_mapping set
0 < queue_mappings < real_num_tx_queues.  When the driver
decreases real_num_tx_queues skb's may be dequeued from the
qdisc with a queue_mapping greater then real_num_tx_queues.

This fixes a case in ixgbe where this was occurring with DCB
and FCoE. Because the driver is using queue_mapping to map
skbs to tx descriptor rings we can potentially map skbs to
rings that no longer exist.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c |  2 +-
 include/linux/netdevice.h      |  3 +++
 include/net/sch_generic.h      | 12 ++++++++----
 net/core/dev.c                 | 18 ++++++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index a0b33165b98..7b5d9764f31 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4001,7 +4001,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 
 done:
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
 }
 
 static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f37502..5e6188d9f01 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 	return (dev->num_tx_queues > 1);
 }
 
+extern void netif_set_real_num_tx_queues(struct net_device *dev,
+					 unsigned int txq);
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba749be1e35..433604bb3fe 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
-/* Reset all TX qdiscs of a device.  */
-static inline void qdisc_reset_all_tx(struct net_device *dev)
+/* Reset all TX qdiscs greater then index of a device.  */
+static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
-	unsigned int i;
 	struct Qdisc *qdisc;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
+	for (; i < dev->num_tx_queues; i++) {
 		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
@@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 	}
 }
 
+static inline void qdisc_reset_all_tx(struct net_device *dev)
+{
+	qdisc_reset_all_tx_gt(dev, 0);
+}
+
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b3bf53bc68..723a34710ad 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if (txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		qdisc_reset_all_tx_gt(dev, txq);
+	}
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
-- 
cgit v1.2.3-70-g09d2


From e2aec372ff4b7e78e79c308104a860ae0ed20950 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 1 Jul 2010 13:18:58 +0000
Subject: linux/net.h: fix kernel-doc warnings

Fix kernel-doc warnings in linux/net.h:

Warning(include/linux/net.h:151): No description found for parameter 'wq'
Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'fasync_list' description in 'socket'
Warning(include/linux/net.h:151): Excess struct/union/enum/typedef member 'wait' description in 'socket'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 2b4deeeb864..dee0b11a875 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -129,10 +129,9 @@ struct socket_wq {
  *  @type: socket type (%SOCK_STREAM, etc)
  *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
  *  @ops: protocol specific socket operations
- *  @fasync_list: Asynchronous wake up list
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
- *  @wait: wait queue for several uses
+ *  @wq: wait queue for several uses
  */
 struct socket {
 	socket_state		state;
-- 
cgit v1.2.3-70-g09d2


From ff49d74ad383f54041378144ca1a229ee9aeaa59 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Sat, 3 Jul 2010 13:07:35 +1000
Subject: module: initialize module dynamic debug later

We should initialize the module dynamic debug datastructures
only after determining that the module is not loaded yet. This
fixes a bug that introduced in 2.6.35-rc2, where when a trying
to load a module twice, we also load it's dynamic printing data
twice which causes all sorts of nasty issues. Also handle
the dynamic debug cleanup later on failure.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (removed a #ifdef)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dynamic_debug.h |  4 ++--
 kernel/module.c               | 23 +++++++++++++++--------
 lib/dynamic_debug.c           |  2 +-
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index b3cd4de9432..52c0da4bdd1 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -40,7 +40,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 				const char *modname);
 
 #if defined(CONFIG_DYNAMIC_DEBUG)
-extern int ddebug_remove_module(char *mod_name);
+extern int ddebug_remove_module(const char *mod_name);
 
 #define __dynamic_dbg_enabled(dd)  ({	     \
 	int __ret = 0;							     \
@@ -73,7 +73,7 @@ extern int ddebug_remove_module(char *mod_name);
 
 #else
 
-static inline int ddebug_remove_module(char *mod)
+static inline int ddebug_remove_module(const char *mod)
 {
 	return 0;
 }
diff --git a/kernel/module.c b/kernel/module.c
index 8c6b42840dd..5d2d28197c8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2062,6 +2062,12 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
 #endif
 }
 
+static void dynamic_debug_remove(struct _ddebug *debug)
+{
+	if (debug)
+		ddebug_remove_module(debug->modname);
+}
+
 static void *module_alloc_update_bounds(unsigned long size)
 {
 	void *ret = module_alloc(size);
@@ -2124,6 +2130,8 @@ static noinline struct module *load_module(void __user *umod,
 	void *ptr = NULL; /* Stops spurious gcc warning */
 	unsigned long symoffs, stroffs, *strmap;
 	void __percpu *percpu;
+	struct _ddebug *debug = NULL;
+	unsigned int num_debug = 0;
 
 	mm_segment_t old_fs;
 
@@ -2476,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod,
 	kfree(strmap);
 	strmap = NULL;
 
-	if (!mod->taints) {
-		struct _ddebug *debug;
-		unsigned int num_debug;
-
+	if (!mod->taints)
 		debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
 				     sizeof(*debug), &num_debug);
-		if (debug)
-			dynamic_debug_setup(debug, num_debug);
-	}
 
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
@@ -2526,10 +2528,13 @@ static noinline struct module *load_module(void __user *umod,
 		goto unlock;
 	}
 
+	if (debug)
+		dynamic_debug_setup(debug, num_debug);
+
 	/* Find duplicate symbols */
 	err = verify_export_symbols(mod);
 	if (err < 0)
-		goto unlock;
+		goto ddebug;
 
 	list_add_rcu(&mod->list, &modules);
 	mutex_unlock(&module_mutex);
@@ -2557,6 +2562,8 @@ static noinline struct module *load_module(void __user *umod,
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+ ddebug:
+	dynamic_debug_remove(debug);
  unlock:
 	mutex_unlock(&module_mutex);
 	synchronize_sched();
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3df8eb17a60..02afc253372 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -692,7 +692,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
  * Called in response to a module being unloaded.  Removes
  * any ddebug_table's which point at the module.
  */
-int ddebug_remove_module(char *mod_name)
+int ddebug_remove_module(const char *mod_name)
 {
 	struct ddebug_table *dt, *nextdt;
 	int ret = -ENOENT;
-- 
cgit v1.2.3-70-g09d2


From b945d6b2554d550fe95caadc61e521c0ad71fb9c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 29 May 2010 15:31:43 +0200
Subject: rbtree: Undo augmented trees performance damage and regression

Reimplement augmented RB-trees without sprinkling extra branches
all over the RB-tree code (which lives in the scheduler hot path).

This approach is 'borrowed' from Fabio's BFQ implementation and
relies on traversing the rebalance path after the RB-tree-op to
correct the heap property for insertion/removal and make up for
the damage done by the tree rotations.

For insertion the rebalance path is trivially that from the new
node upwards to the root, for removal it is that from the deepest
node in the path from the to be removed node that will still
be around after the removal.

[ This patch also fixes a video driver regression reported by
  Ali Gholami Rudi - the memtype->subtree_max_end was updated
  incorrectly. ]

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Ali Gholami Rudi <ali@rudi.ir>
Cc: Fabio Checconi <fabio@gandalf.sssup.it>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1275414172.27810.27961.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat_rbtree.c |  34 +++-----------
 include/linux/rbtree.h   |  13 ++++--
 lib/rbtree.c             | 116 +++++++++++++++++++++++++++++------------------
 3 files changed, 87 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index f20eeec85a8..8acaddd0fb2 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -34,8 +34,7 @@
  * memtype_lock protects the rbtree.
  */
 
-static void memtype_rb_augment_cb(struct rb_node *node);
-static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
+static struct rb_root memtype_rbroot = RB_ROOT;
 
 static int is_node_overlap(struct memtype *node, u64 start, u64 end)
 {
@@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
 }
 
 /* Update 'subtree_max_end' for a node, based on node and its children */
-static void update_node_max_end(struct rb_node *node)
+static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
 {
 	struct memtype *data;
 	u64 max_end, child_max_end;
@@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node)
 	data->subtree_max_end = max_end;
 }
 
-/* Update 'subtree_max_end' for a node and all its ancestors */
-static void update_path_max_end(struct rb_node *node)
-{
-	u64 old_max_end, new_max_end;
-
-	while (node) {
-		struct memtype *data = container_of(node, struct memtype, rb);
-
-		old_max_end = data->subtree_max_end;
-		update_node_max_end(node);
-		new_max_end = data->subtree_max_end;
-
-		if (new_max_end == old_max_end)
-			break;
-
-		node = rb_parent(node);
-	}
-}
-
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
 				u64 start, u64 end)
@@ -190,12 +170,6 @@ failure:
 	return -EBUSY;
 }
 
-static void memtype_rb_augment_cb(struct rb_node *node)
-{
-	if (node)
-		update_path_max_end(node);
-}
-
 static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 {
 	struct rb_node **node = &(root->rb_node);
@@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 
 	rb_link_node(&newdata->rb, parent, node);
 	rb_insert_color(&newdata->rb, root);
+	rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
 }
 
 int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -234,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
 
 struct memtype *rbt_memtype_erase(u64 start, u64 end)
 {
+	struct rb_node *deepest;
 	struct memtype *data;
 
 	data = memtype_rb_exact_match(&memtype_rbroot, start, end);
 	if (!data)
 		goto out;
 
+	deepest = rb_augment_erase_begin(&data->rb);
 	rb_erase(&data->rb, &memtype_rbroot);
+	rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
 out:
 	return data;
 }
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index fe1872e5b37..7066acb2c53 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -110,7 +110,6 @@ struct rb_node
 struct rb_root
 {
 	struct rb_node *rb_node;
-	void (*augment_cb)(struct rb_node *node);
 };
 
 
@@ -130,9 +129,7 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
 }
 
-#define RB_ROOT	(struct rb_root) { NULL, NULL, }
-#define RB_AUGMENT_ROOT(x)	(struct rb_root) { NULL, x}
-
+#define RB_ROOT	(struct rb_root) { NULL, }
 #define	rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
@@ -142,6 +139,14 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
+typedef void (*rb_augment_f)(struct rb_node *node, void *data);
+
+extern void rb_augment_insert(struct rb_node *node,
+			      rb_augment_f func, void *data);
+extern struct rb_node *rb_augment_erase_begin(struct rb_node *node);
+extern void rb_augment_erase_end(struct rb_node *node,
+				 rb_augment_f func, void *data);
+
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
 extern struct rb_node *rb_prev(const struct rb_node *);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 15e10b1afdd..4693f79195d 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = right;
 	rb_set_parent(node, right);
-
-	if (root->augment_cb) {
-		root->augment_cb(node);
-		root->augment_cb(right);
-	}
 }
 
 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
@@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = left;
 	rb_set_parent(node, left);
-
-	if (root->augment_cb) {
-		root->augment_cb(node);
-		root->augment_cb(left);
-	}
 }
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
 	struct rb_node *parent, *gparent;
 
-	if (root->augment_cb)
-		root->augment_cb(node);
-
 	while ((parent = rb_parent(node)) && rb_is_red(parent))
 	{
 		gparent = rb_parent(parent);
@@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 	else
 	{
 		struct rb_node *old = node, *left;
-		int old_parent_cb = 0;
-		int successor_parent_cb = 0;
 
 		node = node->rb_right;
 		while ((left = node->rb_left) != NULL)
 			node = left;
 
 		if (rb_parent(old)) {
-			old_parent_cb = 1;
 			if (rb_parent(old)->rb_left == old)
 				rb_parent(old)->rb_left = node;
 			else
@@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		if (parent == old) {
 			parent = node;
 		} else {
-			successor_parent_cb = 1;
 			if (child)
 				rb_set_parent(child, parent);
-
 			parent->rb_left = child;
 
 			node->rb_right = old->rb_right;
@@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		node->rb_left = old->rb_left;
 		rb_set_parent(old->rb_left, node);
 
-		if (root->augment_cb) {
-			/*
-			 * Here, three different nodes can have new children.
-			 * The parent of the successor node that was selected
-			 * to replace the node to be erased.
-			 * The node that is getting erased and is now replaced
-			 * by its successor.
-			 * The parent of the node getting erased-replaced.
-			 */
-			if (successor_parent_cb)
-				root->augment_cb(parent);
-
-			root->augment_cb(node);
-
-			if (old_parent_cb)
-				root->augment_cb(rb_parent(old));
-		}
-
 		goto color;
 	}
 
@@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 
 	if (child)
 		rb_set_parent(child, parent);
-
-	if (parent) {
+	if (parent)
+	{
 		if (parent->rb_left == node)
 			parent->rb_left = child;
 		else
 			parent->rb_right = child;
-
-		if (root->augment_cb)
-			root->augment_cb(parent);
-
-	} else {
-		root->rb_node = child;
 	}
+	else
+		root->rb_node = child;
 
  color:
 	if (color == RB_BLACK)
@@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 }
 EXPORT_SYMBOL(rb_erase);
 
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+	struct rb_node *parent;
+
+up:
+	func(node, data);
+	parent = rb_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right)
+		func(parent->rb_right, data);
+	else if (parent->rb_left)
+		func(parent->rb_left, data);
+
+	node = parent;
+	goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node->rb_left)
+		node = node->rb_left;
+	else if (node->rb_right)
+		node = node->rb_right;
+
+	rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (!node->rb_right && !node->rb_left)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+	}
+
+	return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node)
+		rb_augment_path(node, func, data);
+}
+
 /*
  * This function returns the first node (in sort order) of the tree.
  */
-- 
cgit v1.2.3-70-g09d2


From bcfcc450baaaa44afc1d3c51ef96a53338ff0eb2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 2 Jul 2010 07:08:44 +0000
Subject: net: Fix definition of netif_vdbg() when VERBOSE_DEBUG is defined

netif_vdbg() was originally defined as entirely equivalent to
netdev_vdbg(), but I assume that it was intended to take the same
parameters as netif_dbg() etc.  (Currently it is only used by the
sfc driver, in which I worked on that assumption.)

In commit a4ed89c I changed the definition used when VERBOSE_DEBUG is
not defined, but I failed to notice that the definition used when
VERBOSE_DEBUG is defined was also not as I expected.  Change that to
match netif_dbg() as well.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e6188d9f01..b21e4054c12 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2332,7 +2332,7 @@ do {								\
 #endif
 
 #if defined(VERBOSE_DEBUG)
-#define netif_vdbg	netdev_dbg
+#define netif_vdbg	netif_dbg
 #else
 #define netif_vdbg(priv, type, dev, format, args...)		\
 ({								\
-- 
cgit v1.2.3-70-g09d2


From 9c3a8ee8a1d72c5c0d7fbdf426d80e270ddfa54c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 10 Jun 2010 12:07:27 +0200
Subject: writeback: remove writeback_inodes_wbc

This was just an odd wrapper around writeback_inodes_wb.  Removing this
also allows to get rid of the bdi member of struct writeback_control
which was rather out of place there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/afs/write.c            |  1 -
 fs/btrfs/extent_io.c      |  2 --
 fs/fs-writeback.c         | 12 ++----------
 include/linux/writeback.h |  5 ++---
 mm/backing-dev.c          |  3 +--
 mm/page-writeback.c       |  3 +--
 6 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d..722743b152d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
 {
 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
 	struct writeback_control wbc = {
-		.bdi		= mapping->backing_dev_info,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_to_write	= LONG_MAX,
 		.range_cyclic	= 1,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec5..d74e6af9b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
 	struct writeback_control wbc_writepages = {
-		.bdi		= wbc->bdi,
 		.sync_mode	= wbc->sync_mode,
 		.older_than_this = NULL,
 		.nr_to_write	= 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
 		.sync_io = mode == WB_SYNC_ALL,
 	};
 	struct writeback_control wbc_writepages = {
-		.bdi		= inode->i_mapping->backing_dev_info,
 		.sync_mode	= mode,
 		.older_than_this = NULL,
 		.nr_to_write	= nr_pages * 2,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6981e4b7c14..94a602e98bb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -614,8 +614,8 @@ static int writeback_sb_inodes(struct super_block *sb,
 	return 1;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-				struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+		struct writeback_control *wbc)
 {
 	int ret = 0;
 
@@ -660,13 +660,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 	/* Leave any unwritten inodes on b_io */
 }
 
-void writeback_inodes_wbc(struct writeback_control *wbc)
-{
-	struct backing_dev_info *bdi = wbc->bdi;
-
-	writeback_inodes_wb(&bdi->wb, wbc);
-}
-
 /*
  * The maximum number of pages to writeout in a single bdi flush/kupdate
  * operation.  We do this so we don't hold I_SYNC against an inode for
@@ -705,7 +698,6 @@ static long wb_writeback(struct bdi_writeback *wb,
 			 struct wb_writeback_args *args)
 {
 	struct writeback_control wbc = {
-		.bdi			= wb->bdi,
 		.sb			= args->sb,
 		.sync_mode		= args->sync_mode,
 		.older_than_this	= NULL,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d63ef8f9609..f6756f6a610 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -27,8 +27,6 @@ enum writeback_sync_modes {
  * in a manner such that unspecified fields are set to zero.
  */
 struct writeback_control {
-	struct backing_dev_info *bdi;	/* If !NULL, only write back this
-					   queue */
 	struct super_block *sb;		/* if !NULL, only write inodes from
 					   this super_block */
 	enum writeback_sync_modes sync_mode;
@@ -66,7 +64,8 @@ int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
 int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
-void writeback_inodes_wbc(struct writeback_control *wbc);
+void writeback_inodes_wb(struct bdi_writeback *wb,
+		struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages);
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a2251..6e0b09a1ec2 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -340,14 +340,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
 static void bdi_flush_io(struct backing_dev_info *bdi)
 {
 	struct writeback_control wbc = {
-		.bdi			= bdi,
 		.sync_mode		= WB_SYNC_NONE,
 		.older_than_this	= NULL,
 		.range_cyclic		= 1,
 		.nr_to_write		= 1024,
 	};
 
-	writeback_inodes_wbc(&wbc);
+	writeback_inodes_wb(&bdi->wb, &wbc);
 }
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 54f28bd493d..37498ef6154 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping,
 
 	for (;;) {
 		struct writeback_control wbc = {
-			.bdi		= bdi,
 			.sync_mode	= WB_SYNC_NONE,
 			.older_than_this = NULL,
 			.nr_to_write	= write_chunk,
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * up.
 		 */
 		if (bdi_nr_reclaimable > bdi_thresh) {
-			writeback_inodes_wbc(&wbc);
+			writeback_inodes_wb(&bdi->wb, &wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
-- 
cgit v1.2.3-70-g09d2


From edadfb10ba35da7253541e4155aa92eff758ebe6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 10 Jun 2010 12:07:54 +0200
Subject: writeback: split writeback_inodes_wb

The case where we have a superblock doesn't require a loop here as we scan
over all inodes in writeback_sb_inodes. Split it out into a separate helper
to make the code simpler.  This also allows to get rid of the sb member in
struct writeback_control, which was rather out of place there.

Also update the comments in writeback_sb_inodes that explain the handling
of inodes from wrong superblocks.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c         | 82 ++++++++++++++++++++++++++---------------------
 include/linux/writeback.h |  2 --
 2 files changed, 46 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 94a602e98bb..8cc06d5432b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -554,29 +554,41 @@ static bool pin_sb_for_writeback(struct super_block *sb)
 
 /*
  * Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
  * inodes. Otherwise write only ones which go sequentially
  * in reverse order.
+ *
  * Return 1, if the caller writeback routine should be
  * interrupted. Otherwise return 0.
  */
-static int writeback_sb_inodes(struct super_block *sb,
-			       struct bdi_writeback *wb,
-			       struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+		struct writeback_control *wbc, bool only_this_sb)
 {
 	while (!list_empty(&wb->b_io)) {
 		long pages_skipped;
 		struct inode *inode = list_entry(wb->b_io.prev,
 						 struct inode, i_list);
-		if (wbc->sb && sb != inode->i_sb) {
-			/* super block given and doesn't
-			   match, skip this inode */
-			redirty_tail(inode);
-			continue;
-		}
-		if (sb != inode->i_sb)
-			/* finish with this superblock */
+
+		if (inode->i_sb != sb) {
+			if (only_this_sb) {
+				/*
+				 * We only want to write back data for this
+				 * superblock, move all inodes not belonging
+				 * to it back onto the dirty list.
+				 */
+				redirty_tail(inode);
+				continue;
+			}
+
+			/*
+			 * The inode belongs to a different superblock.
+			 * Bounce back to the caller to unpin this and
+			 * pin the next superblock.
+			 */
 			return 0;
+		}
+
 		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
 			requeue_io(inode);
 			continue;
@@ -629,29 +641,12 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 						 struct inode, i_list);
 		struct super_block *sb = inode->i_sb;
 
-		if (wbc->sb) {
-			/*
-			 * We are requested to write out inodes for a specific
-			 * superblock.  This means we already have s_umount
-			 * taken by the caller which also waits for us to
-			 * complete the writeout.
-			 */
-			if (sb != wbc->sb) {
-				redirty_tail(inode);
-				continue;
-			}
-
-			WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
-			ret = writeback_sb_inodes(sb, wb, wbc);
-		} else {
-			if (!pin_sb_for_writeback(sb)) {
-				requeue_io(inode);
-				continue;
-			}
-			ret = writeback_sb_inodes(sb, wb, wbc);
-			drop_super(sb);
+		if (!pin_sb_for_writeback(sb)) {
+			requeue_io(inode);
+			continue;
 		}
+		ret = writeback_sb_inodes(sb, wb, wbc, false);
+		drop_super(sb);
 
 		if (ret)
 			break;
@@ -660,6 +655,19 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 	/* Leave any unwritten inodes on b_io */
 }
 
+static void __writeback_inodes_sb(struct super_block *sb,
+		struct bdi_writeback *wb, struct writeback_control *wbc)
+{
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+	wbc->wb_start = jiffies; /* livelock avoidance */
+	spin_lock(&inode_lock);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
+	writeback_sb_inodes(sb, wb, wbc, true);
+	spin_unlock(&inode_lock);
+}
+
 /*
  * The maximum number of pages to writeout in a single bdi flush/kupdate
  * operation.  We do this so we don't hold I_SYNC against an inode for
@@ -698,7 +706,6 @@ static long wb_writeback(struct bdi_writeback *wb,
 			 struct wb_writeback_args *args)
 {
 	struct writeback_control wbc = {
-		.sb			= args->sb,
 		.sync_mode		= args->sync_mode,
 		.older_than_this	= NULL,
 		.for_kupdate		= args->for_kupdate,
@@ -736,7 +743,10 @@ static long wb_writeback(struct bdi_writeback *wb,
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
-		writeback_inodes_wb(wb, &wbc);
+		if (args->sb)
+			__writeback_inodes_sb(args->sb, wb, &wbc);
+		else
+			writeback_inodes_wb(wb, &wbc);
 		args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f6756f6a610..c24eca71e80 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -27,8 +27,6 @@ enum writeback_sync_modes {
  * in a manner such that unspecified fields are set to zero.
  */
 struct writeback_control {
-	struct super_block *sb;		/* if !NULL, only write inodes from
-					   this super_block */
 	enum writeback_sync_modes sync_mode;
 	unsigned long *older_than_this;	/* If !NULL, only write back inodes
 					   older than this */
-- 
cgit v1.2.3-70-g09d2


From 83ba7b071f30f7c01f72518ad72d5cd203c27502 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Jul 2010 08:59:53 +0200
Subject: writeback: simplify the write back thread queue

First remove items from work_list as soon as we start working on them.  This
means we don't have to track any pending or visited state and can get
rid of all the RCU magic freeing the work items - we can simply free
them once the operation has finished.  Second use a real completion for
tracking synchronous requests - if the caller sets the completion pointer
we complete it, otherwise use it as a boolean indicator that we can free
the work item directly.  Third unify struct wb_writeback_args and struct
bdi_work into a single data structure, wb_writeback_work.  Previous we
set all parameters into a struct wb_writeback_args, copied it into
struct bdi_work, copied it again on the stack to use it there.  Instead
of just allocate one structure dynamically or on the stack and use it
all the way through the stack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c           | 253 ++++++++++++--------------------------------
 include/linux/backing-dev.h |   2 -
 mm/backing-dev.c            |  14 +--
 3 files changed, 72 insertions(+), 197 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8cc06d5432b..d5be1693ac9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,43 +38,18 @@ int nr_pdflush_threads;
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
-struct wb_writeback_args {
+struct wb_writeback_work {
 	long nr_pages;
 	struct super_block *sb;
 	enum writeback_sync_modes sync_mode;
 	unsigned int for_kupdate:1;
 	unsigned int range_cyclic:1;
 	unsigned int for_background:1;
-};
 
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
 	struct list_head list;		/* pending work list */
-	struct rcu_head rcu_head;	/* for RCU free/clear of work */
-
-	unsigned long seen;		/* threads that have seen this work */
-	atomic_t pending;		/* number of threads still to do work */
-
-	struct wb_writeback_args args;	/* writeback arguments */
-
-	unsigned long state;		/* flag bits, see WS_* */
-};
-
-enum {
-	WS_INPROGRESS = 0,
-	WS_ONSTACK,
+	struct completion *done;	/* set if the caller waits */
 };
 
-static inline void bdi_work_init(struct bdi_work *work,
-				 struct wb_writeback_args *args)
-{
-	INIT_RCU_HEAD(&work->rcu_head);
-	work->args = *args;
-	__set_bit(WS_INPROGRESS, &work->state);
-}
-
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 	return !list_empty(&bdi->work_list);
 }
 
-static void bdi_work_free(struct rcu_head *head)
-{
-	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
-	clear_bit(WS_INPROGRESS, &work->state);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&work->state, WS_INPROGRESS);
-
-	if (!test_bit(WS_ONSTACK, &work->state))
-		kfree(work);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
-	/*
-	 * The caller has retrieved the work arguments from this work,
-	 * drop our reference. If this is the last ref, delete and free it
-	 */
-	if (atomic_dec_and_test(&work->pending)) {
-		struct backing_dev_info *bdi = wb->bdi;
-
-		spin_lock(&bdi->wb_lock);
-		list_del_rcu(&work->list);
-		spin_unlock(&bdi->wb_lock);
-
-		call_rcu(&work->rcu_head, bdi_work_free);
-	}
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+		struct wb_writeback_work *work)
 {
-	work->seen = bdi->wb_mask;
-	BUG_ON(!work->seen);
-	atomic_set(&work->pending, bdi->wb_cnt);
-	BUG_ON(!bdi->wb_cnt);
-
-	/*
-	 * list_add_tail_rcu() contains the necessary barriers to
-	 * make sure the above stores are seen before the item is
-	 * noticed on the list
-	 */
 	spin_lock(&bdi->wb_lock);
-	list_add_tail_rcu(&work->list, &bdi->work_list);
+	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock(&bdi->wb_lock);
 
 	/*
@@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 	}
 }
 
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_done(struct bdi_work *work)
-{
-	wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
-		    TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-				 struct wb_writeback_args *args)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+		bool range_cyclic, bool for_background)
 {
-	struct bdi_work *work;
+	struct wb_writeback_work *work;
 
 	/*
 	 * This is WB_SYNC_NONE writeback, so if allocation fails just
 	 * wakeup the thread for old dirty data writeback
 	 */
-	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (work) {
-		bdi_work_init(work, args);
-		bdi_queue_work(bdi, work);
-	} else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work) {
+		if (bdi->wb.task)
+			wake_up_process(bdi->wb.task);
+		return;
 	}
-}
 
-/**
- * bdi_queue_work_onstack - start and wait for writeback
- * @args: parameters to control the work queue writeback
- *
- * Description:
- *   This function initiates writeback and waits for the operation to
- *   complete. Callers must hold the sb s_umount semaphore for
- *   reading, to avoid having the super disappear before we are done.
- */
-static void bdi_queue_work_onstack(struct wb_writeback_args *args)
-{
-	struct bdi_work work;
+	work->sync_mode	= WB_SYNC_NONE;
+	work->nr_pages	= nr_pages;
+	work->range_cyclic = range_cyclic;
+	work->for_background = for_background;
 
-	bdi_work_init(&work, args);
-	__set_bit(WS_ONSTACK, &work.state);
-
-	bdi_queue_work(args->sb->s_bdi, &work);
-	bdi_wait_on_work_done(&work);
+	bdi_queue_work(bdi, work);
 }
 
 /**
@@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args)
  */
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
 {
-	struct wb_writeback_args args = {
-		.sync_mode	= WB_SYNC_NONE,
-		.nr_pages	= nr_pages,
-		.range_cyclic	= 1,
-	};
-
-	bdi_alloc_queue_work(bdi, &args);
+	__bdi_start_writeback(bdi, nr_pages, true, false);
 }
 
 /**
@@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
  */
 void bdi_start_background_writeback(struct backing_dev_info *bdi)
 {
-	struct wb_writeback_args args = {
-		.sync_mode	= WB_SYNC_NONE,
-		.nr_pages	= LONG_MAX,
-		.for_background = 1,
-		.range_cyclic	= 1,
-	};
-	bdi_alloc_queue_work(bdi, &args);
+	__bdi_start_writeback(bdi, LONG_MAX, true, true);
 }
 
 /*
@@ -703,14 +602,14 @@ static inline bool over_bground_thresh(void)
  * all dirty pages if they are all attached to "old" mappings.
  */
 static long wb_writeback(struct bdi_writeback *wb,
-			 struct wb_writeback_args *args)
+			 struct wb_writeback_work *work)
 {
 	struct writeback_control wbc = {
-		.sync_mode		= args->sync_mode,
+		.sync_mode		= work->sync_mode,
 		.older_than_this	= NULL,
-		.for_kupdate		= args->for_kupdate,
-		.for_background		= args->for_background,
-		.range_cyclic		= args->range_cyclic,
+		.for_kupdate		= work->for_kupdate,
+		.for_background		= work->for_background,
+		.range_cyclic		= work->range_cyclic,
 	};
 	unsigned long oldest_jif;
 	long wrote = 0;
@@ -730,24 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
 		/*
 		 * Stop writeback when nr_pages has been consumed
 		 */
-		if (args->nr_pages <= 0)
+		if (work->nr_pages <= 0)
 			break;
 
 		/*
 		 * For background writeout, stop when we are below the
 		 * background dirty threshold
 		 */
-		if (args->for_background && !over_bground_thresh())
+		if (work->for_background && !over_bground_thresh())
 			break;
 
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
-		if (args->sb)
-			__writeback_inodes_sb(args->sb, wb, &wbc);
+		if (work->sb)
+			__writeback_inodes_sb(work->sb, wb, &wbc);
 		else
 			writeback_inodes_wb(wb, &wbc);
-		args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
 		/*
@@ -783,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
 }
 
 /*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
-					   struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
 {
-	struct bdi_work *work, *ret = NULL;
-
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(work, &bdi->work_list, list) {
-		if (!test_bit(wb->nr, &work->seen))
-			continue;
-		clear_bit(wb->nr, &work->seen);
+	struct wb_writeback_work *work = NULL;
 
-		ret = work;
-		break;
+	spin_lock(&bdi->wb_lock);
+	if (!list_empty(&bdi->work_list)) {
+		work = list_entry(bdi->work_list.next,
+				  struct wb_writeback_work, list);
+		list_del_init(&work->list);
 	}
-
-	rcu_read_unlock();
-	return ret;
+	spin_unlock(&bdi->wb_lock);
+	return work;
 }
 
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -832,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
 	if (nr_pages) {
-		struct wb_writeback_args args = {
+		struct wb_writeback_work work = {
 			.nr_pages	= nr_pages,
 			.sync_mode	= WB_SYNC_NONE,
 			.for_kupdate	= 1,
 			.range_cyclic	= 1,
 		};
 
-		return wb_writeback(wb, &args);
+		return wb_writeback(wb, &work);
 	}
 
 	return 0;
@@ -851,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 {
 	struct backing_dev_info *bdi = wb->bdi;
-	struct bdi_work *work;
+	struct wb_writeback_work *work;
 	long wrote = 0;
 
 	while ((work = get_next_work_item(bdi, wb)) != NULL) {
-		struct wb_writeback_args args = work->args;
-
 		/*
 		 * Override sync mode, in case we must wait for completion
+		 * because this thread is exiting now.
 		 */
 		if (force_wait)
-			work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
+			work->sync_mode = WB_SYNC_ALL;
 
-		/*
-		 * If this isn't a data integrity operation, just notify
-		 * that we have seen this work and we are now starting it.
-		 */
-		if (!test_bit(WS_ONSTACK, &work->state))
-			wb_clear_pending(wb, work);
-
-		wrote += wb_writeback(wb, &args);
+		wrote += wb_writeback(wb, work);
 
 		/*
-		 * This is a data integrity writeback, so only do the
-		 * notification when we have completed the work.
+		 * Notify the caller of completion if this is a synchronous
+		 * work item, otherwise just free it.
 		 */
-		if (test_bit(WS_ONSTACK, &work->state))
-			wb_clear_pending(wb, work);
+		if (work->done)
+			complete(work->done);
+		else
+			kfree(work);
 	}
 
 	/*
@@ -940,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 void wakeup_flusher_threads(long nr_pages)
 {
 	struct backing_dev_info *bdi;
-	struct wb_writeback_args args = {
-		.sync_mode	= WB_SYNC_NONE,
-	};
 
-	if (nr_pages) {
-		args.nr_pages = nr_pages;
-	} else {
-		args.nr_pages = global_page_state(NR_FILE_DIRTY) +
+	if (!nr_pages) {
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
 				global_page_state(NR_UNSTABLE_NFS);
 	}
 
@@ -955,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages)
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-		bdi_alloc_queue_work(bdi, &args);
+		__bdi_start_writeback(bdi, nr_pages, false, false);
 	}
 	rcu_read_unlock();
 }
@@ -1164,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
 {
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-	struct wb_writeback_args args = {
+	DECLARE_COMPLETION_ONSTACK(done);
+	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
+		.done		= &done,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	args.nr_pages = nr_dirty + nr_unstable +
+	work.nr_pages = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	bdi_queue_work_onstack(&args);
+	bdi_queue_work(sb->s_bdi, &work);
+	wait_for_completion(&done);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1206,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  */
 void sync_inodes_sb(struct super_block *sb)
 {
-	struct wb_writeback_args args = {
+	DECLARE_COMPLETION_ONSTACK(done);
+	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_pages	= LONG_MAX,
 		.range_cyclic	= 0,
+		.done		= &done,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	bdi_queue_work_onstack(&args);
+	bdi_queue_work(sb->s_bdi, &work);
+	wait_for_completion(&done);
+
 	wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9ae2889096b..e9aec0d099d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -82,8 +82,6 @@ struct backing_dev_info {
 	struct bdi_writeback wb;  /* default writeback info for this bdi */
 	spinlock_t wb_lock;	  /* protects update side of wb_list */
 	struct list_head wb_list; /* the flusher threads hanging off this bdi */
-	unsigned long wb_mask;	  /* bitmask of registered tasks */
-	unsigned int wb_cnt;	  /* number of registered tasks */
 
 	struct list_head work_list;
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6e0b09a1ec2..123bcef13e5 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "b_more_io:        %8lu\n"
 		   "bdi_list:         %8u\n"
 		   "state:            %8lx\n"
-		   "wb_mask:          %8lx\n"
-		   "wb_list:          %8u\n"
-		   "wb_cnt:           %8u\n",
+		   "wb_list:          %8u\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
 		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
 		   K(bdi_thresh), K(dirty_thresh),
 		   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
-		   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
-		   !list_empty(&bdi->wb_list), bdi->wb_cnt);
+		   !list_empty(&bdi->bdi_list), bdi->state,
+		   !list_empty(&bdi->wb_list));
 #undef K
 
 	return 0;
@@ -674,12 +672,6 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	bdi_wb_init(&bdi->wb, bdi);
 
-	/*
-	 * Just one thread support for now, hard code mask and count
-	 */
-	bdi->wb_mask = 1;
-	bdi->wb_cnt = 1;
-
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
-- 
cgit v1.2.3-70-g09d2


From 140236b4b1c749c9b795ea3d11558a0eb5a3a080 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 10 Jun 2010 13:56:33 +0300
Subject: VFS: introduce s_dirty accessors

This patch introduces 3 VFS accessors: 'sb_mark_dirty()',
'sb_mark_clean()', and 'sb_is_dirty()'. They simply
set 'sb->s_dirt' or test 'sb->s_dirt'. The plan is to make
every FS use these accessors later instead of manipulating
the 'sb->s_dirt' flag directly.

Ultimately, this change is a preparation for the periodic
superblock synchronization optimization which is about
preventing the "sync_supers" kernel thread from waking up
even if there is nothing to synchronize.

This patch does not do any functional change, just adds
accessor functions.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 471e1ff5079..68ca1b0491a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1783,6 +1783,19 @@ extern int get_sb_pseudo(struct file_system_type *, char *,
 	struct vfsmount *mnt);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 
+static inline void sb_mark_dirty(struct super_block *sb)
+{
+	sb->s_dirt = 1;
+}
+static inline void sb_mark_clean(struct super_block *sb)
+{
+	sb->s_dirt = 0;
+}
+static inline int sb_is_dirty(struct super_block *sb)
+{
+	return sb->s_dirt;
+}
+
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
 #define fops_get(fops) \
 	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
-- 
cgit v1.2.3-70-g09d2


From 44a54f787c0abcf75a2ed49b8ec8b2b512468f73 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 9 Jul 2010 15:41:44 -0400
Subject: tracing: Add alignment to syscall metadata declarations

For some reason if we declare a static variable and then assign it
later, and the assignment contains a __attribute__((__aligned__(#))),
some versions of gcc will ignore it.

This caused the syscall meta data to not be compact in its section
and caused a kernel oops when the section was being read.

The fix for these versions of gcc seems to be to add the aligned
attribute to the declaration as well.

This fixes the BZ regression:

  https://bugzilla.kernel.org/show_bug.cgi?id=16353

Reported-by: Zeev Tarantov <zeev.tarantov@gmail.com>
Tested-by: Zeev Tarantov <zeev.tarantov@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <AANLkTinkKVmB0fpVeqUkMeqe3ZYeXJdI8xDuzJEOjYwh@mail.gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7f614ce274a..13ebb5413a7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -124,7 +124,8 @@ extern struct trace_event_functions enter_syscall_print_funcs;
 extern struct trace_event_functions exit_syscall_print_funcs;
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
-	static struct syscall_metadata __syscall_meta_##sname;		\
+	static struct syscall_metadata					\
+	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
 	static struct ftrace_event_call					\
 	__attribute__((__aligned__(4))) event_enter_##sname;		\
 	static struct ftrace_event_call __used				\
@@ -138,7 +139,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
-	static struct syscall_metadata __syscall_meta_##sname;		\
+	static struct syscall_metadata					\
+	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
 	static struct ftrace_event_call					\
 	__attribute__((__aligned__(4))) event_exit_##sname;		\
 	static struct ftrace_event_call __used				\
-- 
cgit v1.2.3-70-g09d2


From 95f72d1ed41a66f1c1c29c24d479de81a0bea36f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 12 Jul 2010 14:36:09 +1000
Subject: lmb: rename to memblock

via following scripts

      FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')

      sed -i \
        -e 's/lmb/memblock/g' \
        -e 's/LMB/MEMBLOCK/g' \
        $FILES

      for N in $(find . -name lmb.[ch]); do
        M=$(echo $N | sed 's/lmb/memblock/g')
        mv $N $M
      done

and remove some wrong change like lmbench and dlmb etc.

also move memblock.c from lib/ to mm/

Suggested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/kernel-parameters.txt             |   2 +-
 arch/microblaze/Kconfig                         |   2 +-
 arch/microblaze/include/asm/lmb.h               |  17 -
 arch/microblaze/include/asm/memblock.h          |  17 +
 arch/microblaze/kernel/prom.c                   |  14 +-
 arch/microblaze/mm/init.c                       |  40 +-
 arch/powerpc/Kconfig                            |   2 +-
 arch/powerpc/include/asm/abs_addr.h             |   2 +-
 arch/powerpc/include/asm/lmb.h                  |  15 -
 arch/powerpc/include/asm/memblock.h             |  15 +
 arch/powerpc/kernel/btext.c                     |   2 +-
 arch/powerpc/kernel/crash.c                     |   2 +-
 arch/powerpc/kernel/crash_dump.c                |   4 +-
 arch/powerpc/kernel/dma-swiotlb.c               |   2 +-
 arch/powerpc/kernel/dma.c                       |   4 +-
 arch/powerpc/kernel/machine_kexec.c             |  12 +-
 arch/powerpc/kernel/paca.c                      |   8 +-
 arch/powerpc/kernel/prom.c                      |  62 +--
 arch/powerpc/kernel/rtas.c                      |   6 +-
 arch/powerpc/kernel/setup-common.c              |   2 +-
 arch/powerpc/kernel/setup_32.c                  |  16 +-
 arch/powerpc/kernel/setup_64.c                  |  20 +-
 arch/powerpc/kernel/vdso.c                      |   4 +-
 arch/powerpc/mm/40x_mmu.c                       |   2 +-
 arch/powerpc/mm/hash_utils_64.c                 |  26 +-
 arch/powerpc/mm/init_32.c                       |  16 +-
 arch/powerpc/mm/init_64.c                       |   2 +-
 arch/powerpc/mm/mem.c                           |  78 ++--
 arch/powerpc/mm/numa.c                          |  84 ++--
 arch/powerpc/mm/pgtable_32.c                    |   6 +-
 arch/powerpc/mm/pgtable_64.c                    |   4 +-
 arch/powerpc/mm/ppc_mmu_32.c                    |   4 +-
 arch/powerpc/mm/stab.c                          |   4 +-
 arch/powerpc/mm/tlb_nohash.c                    |   4 +-
 arch/powerpc/platforms/85xx/corenet_ds.c        |   4 +-
 arch/powerpc/platforms/85xx/mpc8536_ds.c        |   4 +-
 arch/powerpc/platforms/85xx/mpc85xx_ds.c        |   4 +-
 arch/powerpc/platforms/85xx/mpc85xx_mds.c       |   4 +-
 arch/powerpc/platforms/86xx/mpc86xx_hpcn.c      |   4 +-
 arch/powerpc/platforms/cell/iommu.c             |  10 +-
 arch/powerpc/platforms/embedded6xx/wii.c        |  12 +-
 arch/powerpc/platforms/maple/setup.c            |   2 +-
 arch/powerpc/platforms/pasemi/iommu.c           |   4 +-
 arch/powerpc/platforms/powermac/setup.c         |   4 +-
 arch/powerpc/platforms/ps3/htab.c               |   2 +-
 arch/powerpc/platforms/ps3/mm.c                 |   6 +-
 arch/powerpc/platforms/ps3/os-area.c            |   4 +-
 arch/powerpc/platforms/pseries/hotplug-memory.c |  38 +-
 arch/powerpc/platforms/pseries/iommu.c          |   2 +-
 arch/powerpc/platforms/pseries/phyp_dump.c      |   4 +-
 arch/powerpc/sysdev/dart_iommu.c                |   8 +-
 arch/powerpc/sysdev/fsl_pci.c                   |   4 +-
 arch/sh/Kconfig                                 |   2 +-
 arch/sh/include/asm/lmb.h                       |   6 -
 arch/sh/include/asm/memblock.h                  |   6 +
 arch/sh/kernel/machine_kexec.c                  |  18 +-
 arch/sh/kernel/setup.c                          |   8 +-
 arch/sh/mm/init.c                               |  40 +-
 arch/sh/mm/numa.c                               |   8 +-
 arch/sparc/Kconfig                              |   2 +-
 arch/sparc/include/asm/lmb.h                    |  10 -
 arch/sparc/include/asm/memblock.h               |  10 +
 arch/sparc/kernel/mdesc.c                       |  16 +-
 arch/sparc/kernel/prom_64.c                     |   4 +-
 arch/sparc/mm/init_64.c                         |  54 +--
 include/linux/lmb.h                             |  89 ----
 include/linux/memblock.h                        |  89 ++++
 lib/Kconfig                                     |   3 -
 lib/Makefile                                    |   2 -
 lib/lmb.c                                       | 541 ------------------------
 mm/Kconfig                                      |   3 +
 mm/Makefile                                     |   2 +
 mm/memblock.c                                   | 541 ++++++++++++++++++++++++
 73 files changed, 1037 insertions(+), 1037 deletions(-)
 delete mode 100644 arch/microblaze/include/asm/lmb.h
 create mode 100644 arch/microblaze/include/asm/memblock.h
 delete mode 100644 arch/powerpc/include/asm/lmb.h
 create mode 100644 arch/powerpc/include/asm/memblock.h
 delete mode 100644 arch/sh/include/asm/lmb.h
 create mode 100644 arch/sh/include/asm/memblock.h
 delete mode 100644 arch/sparc/include/asm/lmb.h
 create mode 100644 arch/sparc/include/asm/memblock.h
 delete mode 100644 include/linux/lmb.h
 create mode 100644 include/linux/memblock.h
 delete mode 100644 lib/lmb.c
 create mode 100644 mm/memblock.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 82d6aeb5228..4ddb58df081 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1265,7 +1265,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			If there are multiple matching configurations changing
 			the same attribute, the last one is used.
 
-	lmb=debug	[KNL] Enable lmb debug messages.
+	memblock=debug	[KNL] Enable memblock debug messages.
 
 	load_ramdisk=	[RAM] List of ramdisks to load from floppy
 			See Documentation/blockdev/ramdisk.txt.
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 76818f92653..505a0859242 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -5,7 +5,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
 
 config MICROBLAZE
 	def_bool y
-	select HAVE_LMB
+	select HAVE_MEMBLOCK
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/microblaze/include/asm/lmb.h b/arch/microblaze/include/asm/lmb.h
deleted file mode 100644
index a0a0a929c29..00000000000
--- a/arch/microblaze/include/asm/lmb.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_LMB_H
-#define _ASM_MICROBLAZE_LMB_H
-
-/* LMB limit is OFF */
-#define LMB_REAL_LIMIT	0xFFFFFFFF
-
-#endif /* _ASM_MICROBLAZE_LMB_H */
-
-
diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
new file mode 100644
index 00000000000..f9c2fa331d2
--- /dev/null
+++ b/arch/microblaze/include/asm/memblock.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
+#define _ASM_MICROBLAZE_MEMBLOCK_H
+
+/* MEMBLOCK limit is OFF */
+#define MEMBLOCK_REAL_LIMIT	0xFFFFFFFF
+
+#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
+
+
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index a15ef6d67ca..427b13b4740 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -29,7 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/debugfs.h>
 #include <linux/irq.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/page.h>
@@ -49,12 +49,12 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node)
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-	lmb_add(base, size);
+	memblock_add(base, size);
 }
 
 u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return lmb_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 #ifdef CONFIG_EARLY_PRINTK
@@ -104,8 +104,8 @@ void __init early_init_devtree(void *params)
 	 */
 	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
 
-	/* Scan memory nodes and rebuild LMBs */
-	lmb_init();
+	/* Scan memory nodes and rebuild MEMBLOCKs */
+	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
@@ -113,9 +113,9 @@ void __init early_init_devtree(void *params)
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 	parse_early_param();
 
-	lmb_analyze();
+	memblock_analyze();
 
-	pr_debug("Phys. mem: %lx\n", (unsigned long) lmb_phys_mem_size());
+	pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
 
 	pr_debug(" <- early_init_devtree()\n");
 }
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index cca3579d426..db593498992 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/mm.h> /* mem_init */
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
@@ -76,10 +76,10 @@ void __init setup_memory(void)
 	u32 kernel_align_start, kernel_align_size;
 
 	/* Find main memory where is the kernel */
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		memory_start = (u32) lmb.memory.region[i].base;
-		memory_end = (u32) lmb.memory.region[i].base
-				+ (u32) lmb.memory.region[i].size;
+	for (i = 0; i < memblock.memory.cnt; i++) {
+		memory_start = (u32) memblock.memory.region[i].base;
+		memory_end = (u32) memblock.memory.region[i].base
+				+ (u32) memblock.memory.region[i].size;
 		if ((memory_start <= (u32)_text) &&
 					((u32)_text <= memory_end)) {
 			memory_size = memory_end - memory_start;
@@ -100,7 +100,7 @@ void __init setup_memory(void)
 	kernel_align_start = PAGE_DOWN((u32)_text);
 	/* ALIGN can be remove because _end in vmlinux.lds.S is align */
 	kernel_align_size = PAGE_UP((u32)klimit) - kernel_align_start;
-	lmb_reserve(kernel_align_start, kernel_align_size);
+	memblock_reserve(kernel_align_start, kernel_align_size);
 	printk(KERN_INFO "%s: kernel addr=0x%08x-0x%08x size=0x%08x\n",
 		__func__, kernel_align_start, kernel_align_start
 			+ kernel_align_size, kernel_align_size);
@@ -141,18 +141,18 @@ void __init setup_memory(void)
 	map_size = init_bootmem_node(&contig_page_data,
 		PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn);
 #endif
-	lmb_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size);
+	memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size);
 
 	/* free bootmem is whole main memory */
 	free_bootmem(memory_start, memory_size);
 
 	/* reserve allocate blocks */
-	for (i = 0; i < lmb.reserved.cnt; i++) {
+	for (i = 0; i < memblock.reserved.cnt; i++) {
 		pr_debug("reserved %d - 0x%08x-0x%08x\n", i,
-			(u32) lmb.reserved.region[i].base,
-			(u32) lmb_size_bytes(&lmb.reserved, i));
-		reserve_bootmem(lmb.reserved.region[i].base,
-			lmb_size_bytes(&lmb.reserved, i) - 1, BOOTMEM_DEFAULT);
+			(u32) memblock.reserved.region[i].base,
+			(u32) memblock_size_bytes(&memblock.reserved, i));
+		reserve_bootmem(memblock.reserved.region[i].base,
+			memblock_size_bytes(&memblock.reserved, i) - 1, BOOTMEM_DEFAULT);
 	}
 #ifdef CONFIG_MMU
 	init_bootmem_done = 1;
@@ -235,7 +235,7 @@ static void mm_cmdline_setup(void)
 		if (maxmem && memory_size > maxmem) {
 			memory_size = maxmem;
 			memory_end = memory_start + memory_size;
-			lmb.memory.region[0].size = memory_size;
+			memblock.memory.region[0].size = memory_size;
 		}
 	}
 }
@@ -273,19 +273,19 @@ asmlinkage void __init mmu_init(void)
 {
 	unsigned int kstart, ksize;
 
-	if (!lmb.reserved.cnt) {
+	if (!memblock.reserved.cnt) {
 		printk(KERN_EMERG "Error memory count\n");
 		machine_restart(NULL);
 	}
 
-	if ((u32) lmb.memory.region[0].size < 0x1000000) {
+	if ((u32) memblock.memory.region[0].size < 0x1000000) {
 		printk(KERN_EMERG "Memory must be greater than 16MB\n");
 		machine_restart(NULL);
 	}
 	/* Find main memory where the kernel is */
-	memory_start = (u32) lmb.memory.region[0].base;
-	memory_end = (u32) lmb.memory.region[0].base +
-				(u32) lmb.memory.region[0].size;
+	memory_start = (u32) memblock.memory.region[0].base;
+	memory_end = (u32) memblock.memory.region[0].base +
+				(u32) memblock.memory.region[0].size;
 	memory_size = memory_end - memory_start;
 
 	mm_cmdline_setup(); /* FIXME parse args from command line - not used */
@@ -297,7 +297,7 @@ asmlinkage void __init mmu_init(void)
 	kstart = __pa(CONFIG_KERNEL_START); /* kernel start */
 	/* kernel size */
 	ksize = PAGE_ALIGN(((u32)_end - (u32)CONFIG_KERNEL_START));
-	lmb_reserve(kstart, ksize);
+	memblock_reserve(kstart, ksize);
 
 #if defined(CONFIG_BLK_DEV_INITRD)
 	/* Remove the init RAM disk from the available memory. */
@@ -335,7 +335,7 @@ void __init *early_get_page(void)
 		 * Mem start + 32MB -> here is limit
 		 * because of mem mapping from head.S
 		 */
-		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
 					memory_start + 0x2000000));
 	}
 	return p;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6506bf4fbff..2031a284686 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -132,7 +132,7 @@ config PPC
 	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_LMB
+	select HAVE_MEMBLOCK
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select USE_GENERIC_SMP_HELPERS if SMP
diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h
index 98324c5a828..9a846efe638 100644
--- a/arch/powerpc/include/asm/abs_addr.h
+++ b/arch/powerpc/include/asm/abs_addr.h
@@ -12,7 +12,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/types.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/include/asm/lmb.h b/arch/powerpc/include/asm/lmb.h
deleted file mode 100644
index 6f5fdf0a19a..00000000000
--- a/arch/powerpc/include/asm/lmb.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _ASM_POWERPC_LMB_H
-#define _ASM_POWERPC_LMB_H
-
-#include <asm/udbg.h>
-
-#define LMB_DBG(fmt...) udbg_printf(fmt)
-
-#ifdef CONFIG_PPC32
-extern phys_addr_t lowmem_end_addr;
-#define LMB_REAL_LIMIT	lowmem_end_addr
-#else
-#define LMB_REAL_LIMIT	0
-#endif
-
-#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
new file mode 100644
index 00000000000..3c29728b56b
--- /dev/null
+++ b/arch/powerpc/include/asm/memblock.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_MEMBLOCK_H
+#define _ASM_POWERPC_MEMBLOCK_H
+
+#include <asm/udbg.h>
+
+#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
+
+#ifdef CONFIG_PPC32
+extern phys_addr_t lowmem_end_addr;
+#define MEMBLOCK_REAL_LIMIT	lowmem_end_addr
+#else
+#define MEMBLOCK_REAL_LIMIT	0
+#endif
+
+#endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 26e58630ed7..625942ae558 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -7,7 +7,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 29df48f2b61..417f7b05a9c 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/types.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 5fb667a6089..40f524643ba 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -13,7 +13,7 @@
 
 #include <linux/crash_dump.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <asm/code-patching.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -33,7 +33,7 @@ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
 #ifndef CONFIG_RELOCATABLE
 void __init reserve_kdump_trampoline(void)
 {
-	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
+	memblock_reserve(0, KDUMP_RESERVE_LIMIT);
 }
 
 static void __init create_trampoline(unsigned long addr)
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index e7fe218b869..02f724f3675 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -71,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
 	sd->max_direct_dma_addr = 0;
 
 	/* May need to bounce if the device can't address all of DRAM */
-	if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM())
+	if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
 		set_dma_ops(dev, &swiotlb_dma_ops);
 
 	return NOTIFY_DONE;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8d1de6f31d5..84d6367ec00 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -9,7 +9,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/gfp.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <asm/bug.h>
 #include <asm/abs_addr.h>
 
@@ -89,7 +89,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask)
 	/* Could be improved so platforms can set the limit in case
 	 * they have limited DMA windows
 	 */
-	return mask >= (lmb_end_of_DRAM() - 1);
+	return mask >= (memblock_end_of_DRAM() - 1);
 #else
 	return 1;
 #endif
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index bb3d893a835..89f005116aa 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -12,7 +12,7 @@
 #include <linux/kexec.h>
 #include <linux/reboot.h>
 #include <linux/threads.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <asm/machdep.h>
 #include <asm/prom.h>
@@ -66,11 +66,11 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of lmb_phys_mem_size() */
-	lmb_analyze();
+	/* this is necessary because of memblock_phys_mem_size() */
+	memblock_analyze();
 
 	/* use common parsing */
-	ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
 		crashk_res.start = crash_base;
@@ -133,9 +133,9 @@ void __init reserve_crashkernel(void)
 			"for crashkernel (System RAM: %ldMB)\n",
 			(unsigned long)(crash_size >> 20),
 			(unsigned long)(crashk_res.start >> 20),
-			(unsigned long)(lmb_phys_mem_size() >> 20));
+			(unsigned long)(memblock_phys_mem_size() >> 20));
 
-	lmb_reserve(crashk_res.start, crash_size);
+	memblock_reserve(crashk_res.start, crash_size);
 }
 
 int overlaps_crashkernel(unsigned long start, unsigned long size)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index f88acf0218d..139a773853f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -9,7 +9,7 @@
 
 #include <linux/threads.h>
 #include <linux/module.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/firmware.h>
 #include <asm/lppaca.h>
@@ -117,7 +117,7 @@ void __init allocate_pacas(void)
 	 * the first segment. On iSeries they must be within the area mapped
 	 * by the HV, which is HvPagesToMap * HVPAGESIZE bytes.
 	 */
-	limit = min(0x10000000ULL, lmb.rmo_size);
+	limit = min(0x10000000ULL, memblock.rmo_size);
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
@@ -128,7 +128,7 @@ void __init allocate_pacas(void)
 
 	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
 
-	paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit));
+	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
 	memset(paca, 0, paca_size);
 
 	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
@@ -148,7 +148,7 @@ void __init free_unused_pacas(void)
 	if (new_size >= paca_size)
 		return;
 
-	lmb_free(__pa(paca) + new_size, paca_size - new_size);
+	memblock_free(__pa(paca) + new_size, paca_size - new_size);
 
 	printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
 		paca_size - new_size);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 05131d634e7..9d3953983fb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -31,7 +31,7 @@
 #include <linux/kexec.h>
 #include <linux/debugfs.h>
 #include <linux/irq.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -98,7 +98,7 @@ static void __init move_device_tree(void)
 
 	if ((memory_limit && (start + size) > memory_limit) ||
 			overlaps_crashkernel(start, size)) {
-		p = __va(lmb_alloc_base(size, PAGE_SIZE, lmb.rmo_size));
+		p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size));
 		memcpy(p, initial_boot_params, size);
 		initial_boot_params = (struct boot_param_header *)p;
 		DBG("Moved device tree to 0x%p\n", p);
@@ -411,13 +411,13 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 {
 	__be32 *dm, *ls, *usm;
 	unsigned long l, n, flags;
-	u64 base, size, lmb_size;
+	u64 base, size, memblock_size;
 	unsigned int is_kexec_kdump = 0, rngs;
 
-	ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+	ls = of_get_flat_dt_prop(node, "ibm,memblock-size", &l);
 	if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
 		return 0;
-	lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+	memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
 
 	dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
 	if (dm == NULL || l < sizeof(__be32))
@@ -442,11 +442,11 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 		   or if the block is not assigned to this partition (0x8) */
 		if ((flags & 0x80) || !(flags & 0x8))
 			continue;
-		size = lmb_size;
+		size = memblock_size;
 		rngs = 1;
 		if (is_kexec_kdump) {
 			/*
-			 * For each lmb in ibm,dynamic-memory, a corresponding
+			 * For each memblock in ibm,dynamic-memory, a corresponding
 			 * entry in linux,drconf-usable-memory property contains
 			 * a counter 'p' followed by 'p' (base, size) duple.
 			 * Now read the counter from
@@ -469,10 +469,10 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 				if ((base + size) > 0x80000000ul)
 					size = 0x80000000ul - base;
 			}
-			lmb_add(base, size);
+			memblock_add(base, size);
 		} while (--rngs);
 	}
-	lmb_dump_all();
+	memblock_dump_all();
 	return 0;
 }
 #else
@@ -501,14 +501,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	}
 #endif
 
-	lmb_add(base, size);
+	memblock_add(base, size);
 
 	memstart_addr = min((u64)memstart_addr, base);
 }
 
 u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return lmb_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -534,12 +534,12 @@ static void __init early_reserve_mem(void)
 	/* before we do anything, lets reserve the dt blob */
 	self_base = __pa((unsigned long)initial_boot_params);
 	self_size = initial_boot_params->totalsize;
-	lmb_reserve(self_base, self_size);
+	memblock_reserve(self_base, self_size);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* then reserve the initrd, if any */
 	if (initrd_start && (initrd_end > initrd_start))
-		lmb_reserve(__pa(initrd_start), initrd_end - initrd_start);
+		memblock_reserve(__pa(initrd_start), initrd_end - initrd_start);
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 #ifdef CONFIG_PPC32
@@ -560,7 +560,7 @@ static void __init early_reserve_mem(void)
 			if (base_32 == self_base && size_32 == self_size)
 				continue;
 			DBG("reserving: %x -> %x\n", base_32, size_32);
-			lmb_reserve(base_32, size_32);
+			memblock_reserve(base_32, size_32);
 		}
 		return;
 	}
@@ -571,7 +571,7 @@ static void __init early_reserve_mem(void)
 		if (size == 0)
 			break;
 		DBG("reserving: %llx -> %llx\n", base, size);
-		lmb_reserve(base, size);
+		memblock_reserve(base, size);
 	}
 }
 
@@ -594,7 +594,7 @@ static inline unsigned long phyp_dump_calculate_reserve_size(void)
 		return phyp_dump_info->reserve_bootvar;
 
 	/* divide by 20 to get 5% of value */
-	tmp = lmb_end_of_DRAM();
+	tmp = memblock_end_of_DRAM();
 	do_div(tmp, 20);
 
 	/* round it down in multiples of 256 */
@@ -633,11 +633,11 @@ static void __init phyp_dump_reserve_mem(void)
 	if (phyp_dump_info->phyp_dump_is_active) {
 		/* Reserve *everything* above RMR.Area freed by userland tools*/
 		base = variable_reserve_size;
-		size = lmb_end_of_DRAM() - base;
+		size = memblock_end_of_DRAM() - base;
 
 		/* XXX crashed_ram_end is wrong, since it may be beyond
 		 * the memory_limit, it will need to be adjusted. */
-		lmb_reserve(base, size);
+		memblock_reserve(base, size);
 
 		phyp_dump_info->init_reserve_start = base;
 		phyp_dump_info->init_reserve_size = size;
@@ -645,8 +645,8 @@ static void __init phyp_dump_reserve_mem(void)
 		size = phyp_dump_info->cpu_state_size +
 			phyp_dump_info->hpte_region_size +
 			variable_reserve_size;
-		base = lmb_end_of_DRAM() - size;
-		lmb_reserve(base, size);
+		base = memblock_end_of_DRAM() - size;
+		memblock_reserve(base, size);
 		phyp_dump_info->init_reserve_start = base;
 		phyp_dump_info->init_reserve_size = size;
 	}
@@ -681,8 +681,8 @@ void __init early_init_devtree(void *params)
 	 */
 	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
 
-	/* Scan memory nodes and rebuild LMBs */
-	lmb_init();
+	/* Scan memory nodes and rebuild MEMBLOCKs */
+	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
@@ -690,11 +690,11 @@ void __init early_init_devtree(void *params)
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 	parse_early_param();
 
-	/* Reserve LMB regions used by kernel, initrd, dt, etc... */
-	lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+	/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
+	memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
 	/* If relocatable, reserve first 32k for interrupt vectors etc. */
 	if (PHYSICAL_START > MEMORY_START)
-		lmb_reserve(MEMORY_START, 0x8000);
+		memblock_reserve(MEMORY_START, 0x8000);
 	reserve_kdump_trampoline();
 	reserve_crashkernel();
 	early_reserve_mem();
@@ -706,17 +706,17 @@ void __init early_init_devtree(void *params)
 
 		/* Ensure that total memory size is page-aligned, because
 		 * otherwise mark_bootmem() gets upset. */
-		lmb_analyze();
-		memsize = lmb_phys_mem_size();
+		memblock_analyze();
+		memsize = memblock_phys_mem_size();
 		if ((memsize & PAGE_MASK) != memsize)
 			limit = memsize & PAGE_MASK;
 	}
-	lmb_enforce_memory_limit(limit);
+	memblock_enforce_memory_limit(limit);
 
-	lmb_analyze();
-	lmb_dump_all();
+	memblock_analyze();
+	memblock_dump_all();
 
-	DBG("Phys. mem: %llx\n", lmb_phys_mem_size());
+	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
 
 	/* We may need to relocate the flat tree, do it now.
 	 * FIXME .. and the initrd too? */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 0e1ec6f746f..d0516dbee76 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -22,7 +22,7 @@
 #include <linux/smp.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/prom.h>
@@ -934,11 +934,11 @@ void __init rtas_initialize(void)
 	 */
 #ifdef CONFIG_PPC64
 	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
-		rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
+		rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX);
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
 #endif
-	rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+	rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
 
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 	rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 5e4d852f640..b7e6c7e193a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,7 +33,7 @@
 #include <linux/serial_8250.h>
 #include <linux/debugfs.h>
 #include <linux/percpu.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/of_platform.h>
 #include <asm/io.h>
 #include <asm/paca.h>
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7d84b210f16..a10ffc85ada 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -16,7 +16,7 @@
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/io.h>
 #include <asm/prom.h>
@@ -246,12 +246,12 @@ static void __init irqstack_early_init(void)
 	unsigned int i;
 
 	/* interrupt stacks must be in lowmem, we get that for free on ppc32
-	 * as the lmb is limited to lowmem by LMB_REAL_LIMIT */
+	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 		hardirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
 }
 
@@ -261,15 +261,15 @@ static void __init exc_lvl_early_init(void)
 	unsigned int i;
 
 	/* interrupt stacks must be in lowmem, we get that for free on ppc32
-	 * as the lmb is limited to lowmem by LMB_REAL_LIMIT */
+	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
 	for_each_possible_cpu(i) {
 		critirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 #ifdef CONFIG_BOOKE
 		dbgirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 		mcheckirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 #endif
 	}
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 643dcac40fc..d135f93cb0f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -34,7 +34,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/lockdep.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -158,7 +158,7 @@ static void __init setup_paca(struct paca_struct *new_paca)
  * the CPU that ignores the top 2 bits of the address in real
  * mode so we can access kernel globals normally provided we
  * only toy with things in the RMO region. From here, we do
- * some early parsing of the device-tree to setup out LMB
+ * some early parsing of the device-tree to setup out MEMBLOCK
  * data structures, and allocate & initialize the hash table
  * and segment tables so we can start running with translation
  * enabled.
@@ -404,7 +404,7 @@ void __init setup_system(void)
 
 	printk("-----------------------------------------------------\n");
 	printk("ppc64_pft_size                = 0x%llx\n", ppc64_pft_size);
-	printk("physicalMemorySize            = 0x%llx\n", lmb_phys_mem_size());
+	printk("physicalMemorySize            = 0x%llx\n", memblock_phys_mem_size());
 	if (ppc64_caches.dline_size != 0x80)
 		printk("ppc64_caches.dcache_line_size = 0x%x\n",
 		       ppc64_caches.dline_size);
@@ -443,10 +443,10 @@ static void __init irqstack_early_init(void)
 	 */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc_base(THREAD_SIZE,
+			__va(memblock_alloc_base(THREAD_SIZE,
 					    THREAD_SIZE, limit));
 		hardirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc_base(THREAD_SIZE,
+			__va(memblock_alloc_base(THREAD_SIZE,
 					    THREAD_SIZE, limit));
 	}
 }
@@ -458,11 +458,11 @@ static void __init exc_lvl_early_init(void)
 
 	for_each_possible_cpu(i) {
 		critirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 		dbgirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 		mcheckirq_ctx[i] = (struct thread_info *)
-			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
 }
 #else
@@ -487,11 +487,11 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), lmb.rmo_size);
+	limit = min(slb0_limit(), memblock.rmo_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
-		sp  = lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
 		sp += THREAD_SIZE;
 		paca[i].emergency_sp = __va(sp);
 	}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d84d19224a9..13002fe206e 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,7 +22,7 @@
 #include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -734,7 +734,7 @@ static int __init vdso_init(void)
 	vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100;
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		vdso_data->platform |= 1;
-	vdso_data->physicalMemorySize = lmb_phys_mem_size();
+	vdso_data->physicalMemorySize = memblock_phys_mem_size();
 	vdso_data->dcache_size = ppc64_caches.dsize;
 	vdso_data->dcache_line_size = ppc64_caches.dline_size;
 	vdso_data->icache_size = ppc64_caches.isize;
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 65abfcfaaa9..1dc2fa5ce1b 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -135,7 +135,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	/* If the size of RAM is not an exact power of two, we may not
 	 * have covered RAM in its entirety with 16 and 4 MiB
 	 * pages. Consequently, restrict the top end of RAM currently
-	 * allocable so that calls to the LMB to allocate PTEs for "tail"
+	 * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
 	 * coverage with normal-sized pages (or other reasons) do not
 	 * attempt to allocate outside the allowed range.
 	 */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3ecdcec0a39..98f262de558 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -31,7 +31,7 @@
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/signal.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/pgtable.h>
@@ -384,8 +384,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 	printk(KERN_INFO "Huge page(16GB) memory: "
 			"addr = 0x%lX size = 0x%lX pages = %d\n",
 			phys_addr, block_size, expected_pages);
-	if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) {
-		lmb_reserve(phys_addr, block_size * expected_pages);
+	if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) {
+		memblock_reserve(phys_addr, block_size * expected_pages);
 		add_gpage(phys_addr, block_size, expected_pages);
 	}
 	return 0;
@@ -458,7 +458,7 @@ static void __init htab_init_page_sizes(void)
 	 * and we have at least 1G of RAM at boot
 	 */
 	if (mmu_psize_defs[MMU_PAGE_16M].shift &&
-	    lmb_phys_mem_size() >= 0x40000000)
+	    memblock_phys_mem_size() >= 0x40000000)
 		mmu_vmemmap_psize = MMU_PAGE_16M;
 	else if (mmu_psize_defs[MMU_PAGE_64K].shift)
 		mmu_vmemmap_psize = MMU_PAGE_64K;
@@ -520,7 +520,7 @@ static unsigned long __init htab_get_table_size(void)
 		return 1UL << ppc64_pft_size;
 
 	/* round mem_size up to next power of 2 */
-	mem_size = lmb_phys_mem_size();
+	mem_size = memblock_phys_mem_size();
 	rnd_mem_size = 1UL << __ilog2(mem_size);
 	if (rnd_mem_size < mem_size)
 		rnd_mem_size <<= 1;
@@ -627,7 +627,7 @@ static void __init htab_initialize(void)
 		else
 			limit = 0;
 
-		table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit);
+		table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
 
 		DBG("Hash table allocated at %lx, size: %lx\n", table,
 		    htab_size_bytes);
@@ -647,9 +647,9 @@ static void __init htab_initialize(void)
 	prot = pgprot_val(PAGE_KERNEL);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-	linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count,
-						    1, lmb.rmo_size));
+	linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
+						    1, memblock.rmo_size));
 	memset(linear_map_hash_slots, 0, linear_map_hash_count);
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
@@ -659,16 +659,16 @@ static void __init htab_initialize(void)
 	 */
 
 	/* create bolted the linear mapping in the hash table */
-	for (i=0; i < lmb.memory.cnt; i++) {
-		base = (unsigned long)__va(lmb.memory.region[i].base);
-		size = lmb.memory.region[i].size;
+	for (i=0; i < memblock.memory.cnt; i++) {
+		base = (unsigned long)__va(memblock.memory.region[i].base);
+		size = memblock.memory.region[i].size;
 
 		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
 		    base, size, prot);
 
 #ifdef CONFIG_U3_DART
 		/* Do not map the DART space. Fortunately, it will be aligned
-		 * in such a way that it will not cross two lmb regions and
+		 * in such a way that it will not cross two memblock regions and
 		 * will fit within a single 16Mb page.
 		 * The DART space is assumed to be a full 16Mb region even if
 		 * we only use 2Mb of that space. We will use more of it later
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 767333005eb..6a6975dc265 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -30,7 +30,7 @@
 #include <linux/highmem.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 
 #include <asm/pgalloc.h>
@@ -136,17 +136,17 @@ void __init MMU_init(void)
 	/* parse args from command line */
 	MMU_setup();
 
-	if (lmb.memory.cnt > 1) {
+	if (memblock.memory.cnt > 1) {
 #ifndef CONFIG_WII
-		lmb.memory.cnt = 1;
-		lmb_analyze();
+		memblock.memory.cnt = 1;
+		memblock_analyze();
 		printk(KERN_WARNING "Only using first contiguous memory region");
 #else
 		wii_memory_fixups();
 #endif
 	}
 
-	total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr;
+	total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
 	lowmem_end_addr = memstart_addr + total_lowmem;
 
 #ifdef CONFIG_FSL_BOOKE
@@ -161,8 +161,8 @@ void __init MMU_init(void)
 		lowmem_end_addr = memstart_addr + total_lowmem;
 #ifndef CONFIG_HIGHMEM
 		total_memory = total_lowmem;
-		lmb_enforce_memory_limit(lowmem_end_addr);
-		lmb_analyze();
+		memblock_enforce_memory_limit(lowmem_end_addr);
+		memblock_analyze();
 #endif /* CONFIG_HIGHMEM */
 	}
 
@@ -200,7 +200,7 @@ void __init *early_get_page(void)
 	if (init_bootmem_done) {
 		p = alloc_bootmem_pages(PAGE_SIZE);
 	} else {
-		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
 					__initial_memory_limit_addr));
 	}
 	return p;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e267f223fdf..71f1415e247 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -40,7 +40,7 @@
 #include <linux/nodemask.h>
 #include <linux/module.h>
 #include <linux/poison.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0f594d774bf..1a84a8d0000 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -32,7 +32,7 @@
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
 #include <linux/suspend.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/hugetlb.h>
 
 #include <asm/pgalloc.h>
@@ -83,13 +83,13 @@ int page_is_ram(unsigned long pfn)
 #else
 	unsigned long paddr = (pfn << PAGE_SHIFT);
 	int i;
-	for (i=0; i < lmb.memory.cnt; i++) {
+	for (i=0; i < memblock.memory.cnt; i++) {
 		unsigned long base;
 
-		base = lmb.memory.region[i].base;
+		base = memblock.memory.region[i].base;
 
 		if ((paddr >= base) &&
-			(paddr < (base + lmb.memory.region[i].size))) {
+			(paddr < (base + memblock.memory.region[i].size))) {
 			return 1;
 		}
 	}
@@ -142,14 +142,14 @@ int arch_add_memory(int nid, u64 start, u64 size)
 /*
  * walk_memory_resource() needs to make sure there is no holes in a given
  * memory range.  PPC64 does not maintain the memory layout in /proc/iomem.
- * Instead it maintains it in lmb.memory structures.  Walk through the
+ * Instead it maintains it in memblock.memory structures.  Walk through the
  * memory regions, find holes and callback for contiguous regions.
  */
 int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
-	struct lmb_property res;
+	struct memblock_property res;
 	unsigned long pfn, len;
 	u64 end;
 	int ret = -1;
@@ -158,7 +158,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 	res.size = (u64) nr_pages << PAGE_SHIFT;
 
 	end = res.base + res.size - 1;
-	while ((res.base < end) && (lmb_find(&res) >= 0)) {
+	while ((res.base < end) && (memblock_find(&res) >= 0)) {
 		pfn = (unsigned long)(res.base >> PAGE_SHIFT);
 		len = (unsigned long)(res.size >> PAGE_SHIFT);
 		ret = (*func)(pfn, len, arg);
@@ -184,8 +184,8 @@ void __init do_init_bootmem(void)
 	unsigned long total_pages;
 	int boot_mapsize;
 
-	max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
 #ifdef CONFIG_HIGHMEM
 	total_pages = total_lowmem >> PAGE_SHIFT;
 	max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
@@ -198,16 +198,16 @@ void __init do_init_bootmem(void)
 	 */
 	bootmap_pages = bootmem_bootmap_pages(total_pages);
 
-	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
+	start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
 
 	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
 	boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
 
 	/* Add active regions with valid PFNs */
-	for (i = 0; i < lmb.memory.cnt; i++) {
+	for (i = 0; i < memblock.memory.cnt; i++) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -218,17 +218,17 @@ void __init do_init_bootmem(void)
 	free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
 
 	/* reserve the sections we're already using */
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		unsigned long addr = lmb.reserved.region[i].base +
-				     lmb_size_bytes(&lmb.reserved, i) - 1;
+	for (i = 0; i < memblock.reserved.cnt; i++) {
+		unsigned long addr = memblock.reserved.region[i].base +
+				     memblock_size_bytes(&memblock.reserved, i) - 1;
 		if (addr < lowmem_end_addr)
-			reserve_bootmem(lmb.reserved.region[i].base,
-					lmb_size_bytes(&lmb.reserved, i),
+			reserve_bootmem(memblock.reserved.region[i].base,
+					memblock_size_bytes(&memblock.reserved, i),
 					BOOTMEM_DEFAULT);
-		else if (lmb.reserved.region[i].base < lowmem_end_addr) {
+		else if (memblock.reserved.region[i].base < lowmem_end_addr) {
 			unsigned long adjusted_size = lowmem_end_addr -
-				      lmb.reserved.region[i].base;
-			reserve_bootmem(lmb.reserved.region[i].base,
+				      memblock.reserved.region[i].base;
+			reserve_bootmem(memblock.reserved.region[i].base,
 					adjusted_size, BOOTMEM_DEFAULT);
 		}
 	}
@@ -236,9 +236,9 @@ void __init do_init_bootmem(void)
 	free_bootmem_with_active_regions(0, max_pfn);
 
 	/* reserve the sections we're already using */
-	for (i = 0; i < lmb.reserved.cnt; i++)
-		reserve_bootmem(lmb.reserved.region[i].base,
-				lmb_size_bytes(&lmb.reserved, i),
+	for (i = 0; i < memblock.reserved.cnt; i++)
+		reserve_bootmem(memblock.reserved.region[i].base,
+				memblock_size_bytes(&memblock.reserved, i),
 				BOOTMEM_DEFAULT);
 
 #endif
@@ -251,20 +251,20 @@ void __init do_init_bootmem(void)
 /* mark pages that don't exist as nosave */
 static int __init mark_nonram_nosave(void)
 {
-	unsigned long lmb_next_region_start_pfn,
-		      lmb_region_max_pfn;
+	unsigned long memblock_next_region_start_pfn,
+		      memblock_region_max_pfn;
 	int i;
 
-	for (i = 0; i < lmb.memory.cnt - 1; i++) {
-		lmb_region_max_pfn =
-			(lmb.memory.region[i].base >> PAGE_SHIFT) +
-			(lmb.memory.region[i].size >> PAGE_SHIFT);
-		lmb_next_region_start_pfn =
-			lmb.memory.region[i+1].base >> PAGE_SHIFT;
+	for (i = 0; i < memblock.memory.cnt - 1; i++) {
+		memblock_region_max_pfn =
+			(memblock.memory.region[i].base >> PAGE_SHIFT) +
+			(memblock.memory.region[i].size >> PAGE_SHIFT);
+		memblock_next_region_start_pfn =
+			memblock.memory.region[i+1].base >> PAGE_SHIFT;
 
-		if (lmb_region_max_pfn < lmb_next_region_start_pfn)
-			register_nosave_region(lmb_region_max_pfn,
-					       lmb_next_region_start_pfn);
+		if (memblock_region_max_pfn < memblock_next_region_start_pfn)
+			register_nosave_region(memblock_region_max_pfn,
+					       memblock_next_region_start_pfn);
 	}
 
 	return 0;
@@ -275,8 +275,8 @@ static int __init mark_nonram_nosave(void)
  */
 void __init paging_init(void)
 {
-	unsigned long total_ram = lmb_phys_mem_size();
-	phys_addr_t top_of_ram = lmb_end_of_DRAM();
+	unsigned long total_ram = memblock_phys_mem_size();
+	phys_addr_t top_of_ram = memblock_end_of_DRAM();
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
 #ifdef CONFIG_PPC32
@@ -327,7 +327,7 @@ void __init mem_init(void)
 		swiotlb_init(1);
 #endif
 
-	num_physpages = lmb.memory.size >> PAGE_SHIFT;
+	num_physpages = memblock.memory.size >> PAGE_SHIFT;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -364,7 +364,7 @@ void __init mem_init(void)
 		highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
 		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
 			struct page *page = pfn_to_page(pfn);
-			if (lmb_is_reserved(pfn << PAGE_SHIFT))
+			if (memblock_is_reserved(pfn << PAGE_SHIFT))
 				continue;
 			ClearPageReserved(page);
 			init_page_count(page);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 80d110635d2..f47364585ec 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -17,7 +17,7 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/pfn.h>
 #include <asm/sparsemem.h>
@@ -351,7 +351,7 @@ struct of_drconf_cell {
 #define DRCONF_MEM_RESERVED	0x00000080
 
 /*
- * Read the next lmb list entry from the ibm,dynamic-memory property
+ * Read the next memblock list entry from the ibm,dynamic-memory property
  * and return the information in the provided of_drconf_cell structure.
  */
 static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
@@ -372,8 +372,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
 /*
  * Retreive and validate the ibm,dynamic-memory property of the device tree.
  *
- * The layout of the ibm,dynamic-memory property is a number N of lmb
- * list entries followed by N lmb list entries.  Each lmb list entry
+ * The layout of the ibm,dynamic-memory property is a number N of memblock
+ * list entries followed by N memblock list entries.  Each memblock list entry
  * contains information as layed out in the of_drconf_cell struct above.
  */
 static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
@@ -398,15 +398,15 @@ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
 }
 
 /*
- * Retreive and validate the ibm,lmb-size property for drconf memory
+ * Retreive and validate the ibm,memblock-size property for drconf memory
  * from the device tree.
  */
-static u64 of_get_lmb_size(struct device_node *memory)
+static u64 of_get_memblock_size(struct device_node *memory)
 {
 	const u32 *prop;
 	u32 len;
 
-	prop = of_get_property(memory, "ibm,lmb-size", &len);
+	prop = of_get_property(memory, "ibm,memblock-size", &len);
 	if (!prop || len < sizeof(unsigned int))
 		return 0;
 
@@ -540,19 +540,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
 						      unsigned long size)
 {
 	/*
-	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
+	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
 	 * we've already adjusted it for the limit and it takes care of
 	 * having memory holes below the limit.  Also, in the case of
 	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
 	 */
 
-	if (start + size <= lmb_end_of_DRAM())
+	if (start + size <= memblock_end_of_DRAM())
 		return size;
 
-	if (start >= lmb_end_of_DRAM())
+	if (start >= memblock_end_of_DRAM())
 		return 0;
 
-	return lmb_end_of_DRAM() - start;
+	return memblock_end_of_DRAM() - start;
 }
 
 /*
@@ -562,7 +562,7 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
 static inline int __init read_usm_ranges(const u32 **usm)
 {
 	/*
-	 * For each lmb in ibm,dynamic-memory a corresponding
+	 * For each memblock in ibm,dynamic-memory a corresponding
 	 * entry in linux,drconf-usable-memory property contains
 	 * a counter followed by that many (base, size) duple.
 	 * read the counter from linux,drconf-usable-memory
@@ -578,7 +578,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
 {
 	const u32 *dm, *usm;
 	unsigned int n, rc, ranges, is_kexec_kdump = 0;
-	unsigned long lmb_size, base, size, sz;
+	unsigned long memblock_size, base, size, sz;
 	int nid;
 	struct assoc_arrays aa;
 
@@ -586,8 +586,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
 	if (!n)
 		return;
 
-	lmb_size = of_get_lmb_size(memory);
-	if (!lmb_size)
+	memblock_size = of_get_memblock_size(memory);
+	if (!memblock_size)
 		return;
 
 	rc = of_get_assoc_arrays(memory, &aa);
@@ -611,7 +611,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
 			continue;
 
 		base = drmem.base_addr;
-		size = lmb_size;
+		size = memblock_size;
 		ranges = 1;
 
 		if (is_kexec_kdump) {
@@ -731,7 +731,7 @@ new_range:
 	}
 
 	/*
-	 * Now do the same thing for each LMB listed in the ibm,dynamic-memory
+	 * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory
 	 * property in the ibm,dynamic-reconfiguration-memory node.
 	 */
 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -743,8 +743,8 @@ new_range:
 
 static void __init setup_nonnuma(void)
 {
-	unsigned long top_of_ram = lmb_end_of_DRAM();
-	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = memblock_end_of_DRAM();
+	unsigned long total_ram = memblock_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
 	unsigned int i, nid = 0;
 
@@ -753,9 +753,9 @@ static void __init setup_nonnuma(void)
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 
-	for (i = 0; i < lmb.memory.cnt; ++i) {
-		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+	for (i = 0; i < memblock.memory.cnt; ++i) {
+		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 
 		fake_numa_create_new_node(end_pfn, &nid);
 		add_active_range(nid, start_pfn, end_pfn);
@@ -813,7 +813,7 @@ static void __init dump_numa_memory_topology(void)
 
 		count = 0;
 
-		for (i = 0; i < lmb_end_of_DRAM();
+		for (i = 0; i < memblock_end_of_DRAM();
 		     i += (1 << SECTION_SIZE_BITS)) {
 			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
 				if (count == 0)
@@ -833,7 +833,7 @@ static void __init dump_numa_memory_topology(void)
 }
 
 /*
- * Allocate some memory, satisfying the lmb or bootmem allocator where
+ * Allocate some memory, satisfying the memblock or bootmem allocator where
  * required. nid is the preferred node and end is the physical address of
  * the highest address in the node.
  *
@@ -847,11 +847,11 @@ static void __init *careful_zallocation(int nid, unsigned long size,
 	int new_nid;
 	unsigned long ret_paddr;
 
-	ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+	ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
 	/* retry over all memory */
 	if (!ret_paddr)
-		ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+		ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
 
 	if (!ret_paddr)
 		panic("numa.c: cannot allocate %lu bytes for node %d",
@@ -861,14 +861,14 @@ static void __init *careful_zallocation(int nid, unsigned long size,
 
 	/*
 	 * We initialize the nodes in numeric order: 0, 1, 2...
-	 * and hand over control from the LMB allocator to the
+	 * and hand over control from the MEMBLOCK allocator to the
 	 * bootmem allocator.  If this function is called for
 	 * node 5, then we know that all nodes <5 are using the
-	 * bootmem allocator instead of the LMB allocator.
+	 * bootmem allocator instead of the MEMBLOCK allocator.
 	 *
 	 * So, check the nid from which this allocation came
 	 * and double check to see if we need to use bootmem
-	 * instead of the LMB.  We don't free the LMB memory
+	 * instead of the MEMBLOCK.  We don't free the MEMBLOCK memory
 	 * since it would be useless.
 	 */
 	new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
@@ -893,9 +893,9 @@ static void mark_reserved_regions_for_nid(int nid)
 	struct pglist_data *node = NODE_DATA(nid);
 	int i;
 
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		unsigned long physbase = lmb.reserved.region[i].base;
-		unsigned long size = lmb.reserved.region[i].size;
+	for (i = 0; i < memblock.reserved.cnt; i++) {
+		unsigned long physbase = memblock.reserved.region[i].base;
+		unsigned long size = memblock.reserved.region[i].size;
 		unsigned long start_pfn = physbase >> PAGE_SHIFT;
 		unsigned long end_pfn = PFN_UP(physbase + size);
 		struct node_active_region node_ar;
@@ -903,7 +903,7 @@ static void mark_reserved_regions_for_nid(int nid)
 					     node->node_spanned_pages;
 
 		/*
-		 * Check to make sure that this lmb.reserved area is
+		 * Check to make sure that this memblock.reserved area is
 		 * within the bounds of the node that we care about.
 		 * Checking the nid of the start and end points is not
 		 * sufficient because the reserved area could span the
@@ -961,7 +961,7 @@ void __init do_init_bootmem(void)
 	int nid;
 
 	min_low_pfn = 0;
-	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	max_pfn = max_low_pfn;
 
 	if (parse_numa_properties())
@@ -1038,7 +1038,7 @@ void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	free_area_init_nodes(max_zone_pfns);
 }
 
@@ -1072,7 +1072,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 {
 	const u32 *dm;
 	unsigned int drconf_cell_cnt, rc;
-	unsigned long lmb_size;
+	unsigned long memblock_size;
 	struct assoc_arrays aa;
 	int nid = -1;
 
@@ -1080,8 +1080,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 	if (!drconf_cell_cnt)
 		return -1;
 
-	lmb_size = of_get_lmb_size(memory);
-	if (!lmb_size)
+	memblock_size = of_get_memblock_size(memory);
+	if (!memblock_size)
 		return -1;
 
 	rc = of_get_assoc_arrays(memory, &aa);
@@ -1100,7 +1100,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 			continue;
 
 		if ((scn_addr < drmem.base_addr)
-		    || (scn_addr >= (drmem.base_addr + lmb_size)))
+		    || (scn_addr >= (drmem.base_addr + memblock_size)))
 			continue;
 
 		nid = of_drconf_to_nid_single(&drmem, &aa);
@@ -1113,7 +1113,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
 /*
  * Find the node associated with a hot added memory section for memory
  * represented in the device tree as a node (i.e. memory@XXXX) for
- * each lmb.
+ * each memblock.
  */
 int hot_add_node_scn_to_nid(unsigned long scn_addr)
 {
@@ -1154,8 +1154,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr)
 
 /*
  * Find the node associated with a hot added memory section.  Section
- * corresponds to a SPARSEMEM section, not an LMB.  It is assumed that
- * sections are fully contained within a single LMB.
+ * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
+ * sections are fully contained within a single MEMBLOCK.
  */
 int hot_add_scn_to_nid(unsigned long scn_addr)
 {
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 34347b2e7e3..a87ead0138b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -26,7 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/pgtable.h>
@@ -198,7 +198,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
 	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
-	    !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) {
+	    !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
 		printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
@@ -331,7 +331,7 @@ void __init mapin_ram(void)
 		s = mmu_mapin_ram(top);
 		__mapin_ram_chunk(s, top);
 
-		top = lmb_end_of_DRAM();
+		top = memblock_end_of_DRAM();
 		s = wii_mmu_mapin_mem2(top);
 		__mapin_ram_chunk(s, top);
 	}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d050fc8d971..21d6dfab794 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -34,7 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/pgalloc.h>
@@ -67,7 +67,7 @@ static void *early_alloc_pgtable(unsigned long size)
 	if (init_bootmem_done)
 		pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
 	else
-		pt = __va(lmb_alloc_base(size, size,
+		pt = __va(memblock_alloc_base(size, size,
 					 __pa(MAX_DMA_ADDRESS)));
 	memset(pt, 0, size);
 
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f11c2cdcb0f..f8a01829d64 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -26,7 +26,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/mmu.h>
@@ -223,7 +223,7 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(lmb_alloc_base(Hash_size, Hash_size,
+	Hash = __va(memblock_alloc_base(Hash_size, Hash_size,
 				   __initial_memory_limit_addr));
 	cacheable_memzero(Hash, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 687fddaa24c..446a01842a7 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -12,7 +12,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
@@ -252,7 +252,7 @@ void __init stabs_alloc(void)
 		if (cpu == 0)
 			continue; /* stab for CPU 0 is statically allocated */
 
-		newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
+		newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
 					 1<<SID_SHIFT);
 		newstab = (unsigned long)__va(newstab);
 
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index e81d5d67f83..d8695b02a96 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -34,7 +34,7 @@
 #include <linux/pagemap.h>
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
@@ -426,7 +426,7 @@ static void __early_init_mmu(int boot_cpu)
 	/* Set the global containing the top of the linear mapping
 	 * for use by the TLB miss code
 	 */
-	linear_map_top = lmb_end_of_DRAM();
+	linear_map_top = memblock_end_of_DRAM();
 
 	/* A sync won't hurt us after mucking around with
 	 * the MMU configuration
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index 534c2ecc89d..2ab338c9ac3 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -16,7 +16,7 @@
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/system.h>
 #include <asm/time.h>
@@ -100,7 +100,7 @@ void __init corenet_ds_setup_arch(void)
 #endif
 
 #ifdef CONFIG_SWIOTLB
-	if (lmb_end_of_DRAM() > max) {
+	if (memblock_end_of_DRAM() > max) {
 		ppc_swiotlb_enable = 1;
 		set_pci_dma_ops(&swiotlb_dma_ops);
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 004b7d36cdb..f79f2f10214 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -17,7 +17,7 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/system.h>
 #include <asm/time.h>
@@ -94,7 +94,7 @@ static void __init mpc8536_ds_setup_arch(void)
 #endif
 
 #ifdef CONFIG_SWIOTLB
-	if (lmb_end_of_DRAM() > max) {
+	if (memblock_end_of_DRAM() > max) {
 		ppc_swiotlb_enable = 1;
 		set_pci_dma_ops(&swiotlb_dma_ops);
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 544011a562f..8190bc25bf2 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -20,7 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/system.h>
 #include <asm/time.h>
@@ -190,7 +190,7 @@ static void __init mpc85xx_ds_setup_arch(void)
 #endif
 
 #ifdef CONFIG_SWIOTLB
-	if (lmb_end_of_DRAM() > max) {
+	if (memblock_end_of_DRAM() > max) {
 		ppc_swiotlb_enable = 1;
 		set_pci_dma_ops(&swiotlb_dma_ops);
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 8fe87fc6148..494513682d7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -33,7 +33,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_device.h>
 #include <linux/phy.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/system.h>
 #include <asm/atomic.h>
@@ -325,7 +325,7 @@ static void __init mpc85xx_mds_setup_arch(void)
 #endif	/* CONFIG_QUICC_ENGINE */
 
 #ifdef CONFIG_SWIOTLB
-	if (lmb_end_of_DRAM() > max) {
+	if (memblock_end_of_DRAM() > max) {
 		ppc_swiotlb_enable = 1;
 		set_pci_dma_ops(&swiotlb_dma_ops);
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 2aa69a69bcc..b11c3535f35 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -19,7 +19,7 @@
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/of_platform.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/system.h>
 #include <asm/time.h>
@@ -103,7 +103,7 @@ mpc86xx_hpcn_setup_arch(void)
 #endif
 
 #ifdef CONFIG_SWIOTLB
-	if (lmb_end_of_DRAM() > max) {
+	if (memblock_end_of_DRAM() > max) {
 		ppc_swiotlb_enable = 1;
 		set_pci_dma_ops(&swiotlb_dma_ops);
 		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 4326b737d91..3712900471b 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -29,7 +29,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/iommu.h>
@@ -845,10 +845,10 @@ static int __init cell_iommu_init_disabled(void)
 	/* If we found a DMA window, we check if it's big enough to enclose
 	 * all of physical memory. If not, we force enable IOMMU
 	 */
-	if (np && size < lmb_end_of_DRAM()) {
+	if (np && size < memblock_end_of_DRAM()) {
 		printk(KERN_WARNING "iommu: force-enabled, dma window"
 		       " (%ldMB) smaller than total memory (%lldMB)\n",
-		       size >> 20, lmb_end_of_DRAM() >> 20);
+		       size >> 20, memblock_end_of_DRAM() >> 20);
 		return -ENODEV;
 	}
 
@@ -1064,7 +1064,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
 	}
 
 	fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
-	fsize = lmb_phys_mem_size();
+	fsize = memblock_phys_mem_size();
 
 	if ((fbase + fsize) <= 0x800000000ul)
 		hbase = 0; /* use the device tree window */
@@ -1169,7 +1169,7 @@ static int __init cell_iommu_init(void)
 	 * Note: should we make sure we have the IOMMU actually disabled ?
 	 */
 	if (iommu_is_off ||
-	    (!iommu_force_on && lmb_end_of_DRAM() <= 0x80000000ull))
+	    (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
 		if (cell_iommu_init_disabled() == 0)
 			goto bail;
 
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 174a04ac480..5cdcc7c8d97 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -20,7 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/kexec.h>
 #include <linux/of_platform.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <mm/mmu_decl.h>
 
 #include <asm/io.h>
@@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x)
 
 void __init wii_memory_fixups(void)
 {
-	struct lmb_property *p = lmb.memory.region;
+	struct memblock_property *p = memblock.memory.region;
 
 	/*
 	 * This is part of a workaround to allow the use of two
@@ -77,7 +77,7 @@ void __init wii_memory_fixups(void)
 	 * between both ranges.
 	 */
 
-	BUG_ON(lmb.memory.cnt != 2);
+	BUG_ON(memblock.memory.cnt != 2);
 	BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
 
 	p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE);
@@ -92,11 +92,11 @@ void __init wii_memory_fixups(void)
 
 	p[0].size += wii_hole_size + p[1].size;
 
-	lmb.memory.cnt = 1;
-	lmb_analyze();
+	memblock.memory.cnt = 1;
+	memblock_analyze();
 
 	/* reserve the hole */
-	lmb_reserve(wii_hole_start, wii_hole_size);
+	memblock_reserve(wii_hole_start, wii_hole_size);
 
 	/* allow ioremapping the address space in the hole */
 	__allow_ioremap_reserved = 1;
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 39df70529d2..3fff8d979b4 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -41,7 +41,7 @@
 #include <linux/smp.h>
 #include <linux/bitops.h>
 #include <linux/of_device.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 7b1d608ea3c..1f9fb2c5776 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -204,7 +204,7 @@ int __init iob_init(struct device_node *dn)
 	pr_debug(" -> %s\n", __func__);
 
 	/* Allocate a spare page to map all invalid IOTLB pages. */
-	tmp = lmb_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+	tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
 	if (!tmp)
 		panic("IOBMAP: Cannot allocate spare page!");
 	/* Empty l1 is marked invalid */
@@ -275,7 +275,7 @@ void __init alloc_iobmap_l2(void)
 	return;
 #endif
 	/* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
-	iob_l2_base = (u32 *)abs_to_virt(lmb_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
+	iob_l2_base = (u32 *)abs_to_virt(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
 
 	printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base);
 }
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index f1d0132ebcc..9deb274841f 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -51,7 +51,7 @@
 #include <linux/suspend.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -619,7 +619,7 @@ static int __init pmac_probe(void)
 	 * driver needs that. We have to allocate it now. We allocate 4k
 	 * (1 small page) for now.
 	 */
-	smu_cmdbuf_abs = lmb_alloc_base(4096, 4096, 0x80000000UL);
+	smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL);
 #endif /* CONFIG_PMAC_SMU */
 
 	return 1;
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 1e8a1e39dfe..2c0ed87f202 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 
 #include <asm/machdep.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 7925751e464..c2045880e67 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -21,7 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/memory_hotplug.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/cell-regs.h>
@@ -318,8 +318,8 @@ static int __init ps3_mm_add_memory(void)
 		return result;
 	}
 
-	lmb_add(start_addr, map.r1.size);
-	lmb_analyze();
+	memblock_add(start_addr, map.r1.size);
+	memblock_analyze();
 
 	result = online_pages(start_pfn, nr_pages);
 
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index dd521a181f2..5b759b66959 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -24,7 +24,7 @@
 #include <linux/fs.h>
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 
@@ -723,7 +723,7 @@ static void os_area_queue_work(void)
  * flash to a high address in the boot memory region and then puts that RAM
  * address and the byte count into the repository for retrieval by the guest.
  * We copy the data we want into a static variable and allow the memory setup
- * by the HV to be claimed by the lmb manager.
+ * by the HV to be claimed by the memblock manager.
  *
  * The os area mirror will not be available to a second stage kernel, and
  * the header verify will fail.  In this case, the saved_params values will
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 01e7b5bb3c1..deab5f94609 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -10,14 +10,14 @@
  */
 
 #include <linux/of.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
-static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size)
+static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
 	struct zone *zone;
@@ -26,7 +26,7 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size)
 	start_pfn = base >> PAGE_SHIFT;
 
 	if (!pfn_valid(start_pfn)) {
-		lmb_remove(base, lmb_size);
+		memblock_remove(base, memblock_size);
 		return 0;
 	}
 
@@ -41,20 +41,20 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size)
 	 * to sysfs "state" file and we can't remove sysfs entries
 	 * while writing to it. So we have to defer it to here.
 	 */
-	ret = __remove_pages(zone, start_pfn, lmb_size >> PAGE_SHIFT);
+	ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
 	if (ret)
 		return ret;
 
 	/*
 	 * Update memory regions for memory remove
 	 */
-	lmb_remove(base, lmb_size);
+	memblock_remove(base, memblock_size);
 
 	/*
 	 * Remove htab bolted mappings for this section of memory
 	 */
 	start = (unsigned long)__va(base);
-	ret = remove_section_mapping(start, start + lmb_size);
+	ret = remove_section_mapping(start, start + memblock_size);
 
 	/* Ensure all vmalloc mappings are flushed in case they also
 	 * hit that section of memory
@@ -69,7 +69,7 @@ static int pseries_remove_memory(struct device_node *np)
 	const char *type;
 	const unsigned int *regs;
 	unsigned long base;
-	unsigned int lmb_size;
+	unsigned int memblock_size;
 	int ret = -EINVAL;
 
 	/*
@@ -80,16 +80,16 @@ static int pseries_remove_memory(struct device_node *np)
 		return 0;
 
 	/*
-	 * Find the bae address and size of the lmb
+	 * Find the bae address and size of the memblock
 	 */
 	regs = of_get_property(np, "reg", NULL);
 	if (!regs)
 		return ret;
 
 	base = *(unsigned long *)regs;
-	lmb_size = regs[3];
+	memblock_size = regs[3];
 
-	ret = pseries_remove_lmb(base, lmb_size);
+	ret = pseries_remove_memblock(base, memblock_size);
 	return ret;
 }
 
@@ -98,7 +98,7 @@ static int pseries_add_memory(struct device_node *np)
 	const char *type;
 	const unsigned int *regs;
 	unsigned long base;
-	unsigned int lmb_size;
+	unsigned int memblock_size;
 	int ret = -EINVAL;
 
 	/*
@@ -109,43 +109,43 @@ static int pseries_add_memory(struct device_node *np)
 		return 0;
 
 	/*
-	 * Find the base and size of the lmb
+	 * Find the base and size of the memblock
 	 */
 	regs = of_get_property(np, "reg", NULL);
 	if (!regs)
 		return ret;
 
 	base = *(unsigned long *)regs;
-	lmb_size = regs[3];
+	memblock_size = regs[3];
 
 	/*
 	 * Update memory region to represent the memory add
 	 */
-	ret = lmb_add(base, lmb_size);
+	ret = memblock_add(base, memblock_size);
 	return (ret < 0) ? -EINVAL : 0;
 }
 
 static int pseries_drconf_memory(unsigned long *base, unsigned int action)
 {
 	struct device_node *np;
-	const unsigned long *lmb_size;
+	const unsigned long *memblock_size;
 	int rc;
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (!np)
 		return -EINVAL;
 
-	lmb_size = of_get_property(np, "ibm,lmb-size", NULL);
-	if (!lmb_size) {
+	memblock_size = of_get_property(np, "ibm,memblock-size", NULL);
+	if (!memblock_size) {
 		of_node_put(np);
 		return -EINVAL;
 	}
 
 	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = lmb_add(*base, *lmb_size);
+		rc = memblock_add(*base, *memblock_size);
 		rc = (rc < 0) ? -EINVAL : 0;
 	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_lmb(*base, *lmb_size);
+		rc = pseries_remove_memblock(*base, *memblock_size);
 	} else {
 		rc = -EINVAL;
 	}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d26182d42cb..395848e30c5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -66,7 +66,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 	tcep = ((u64 *)tbl->it_base) + index;
 
 	while (npages--) {
-		/* can't move this out since we might cross LMB boundary */
+		/* can't move this out since we might cross MEMBLOCK boundary */
 		rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 		*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
 
diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c
index 7ebd9e88d36..6e7742da007 100644
--- a/arch/powerpc/platforms/pseries/phyp_dump.c
+++ b/arch/powerpc/platforms/pseries/phyp_dump.c
@@ -255,12 +255,12 @@ void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
 
 /* ------------------------------------------------- */
 /**
- * release_memory_range -- release memory previously lmb_reserved
+ * release_memory_range -- release memory previously memblock_reserved
  * @start_pfn: starting physical frame number
  * @nr_pages: number of pages to free.
  *
  * This routine will release memory that had been previously
- * lmb_reserved in early boot. The released memory becomes
+ * memblock_reserved in early boot. The released memory becomes
  * available for genreal use.
  */
 static void release_memory_range(unsigned long start_pfn,
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index c8b96ed7c01..559db2b846a 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -36,7 +36,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
 #include <linux/suspend.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <asm/io.h>
 #include <asm/prom.h>
@@ -232,7 +232,7 @@ static int __init dart_init(struct device_node *dart_node)
 	 * that to work around what looks like a problem with the HT bridge
 	 * prefetching into invalid pages and corrupting data
 	 */
-	tmp = lmb_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 					 DARTMAP_RPNMASK);
 
@@ -407,7 +407,7 @@ void __init alloc_dart_table(void)
 	if (iommu_is_off)
 		return;
 
-	if (!iommu_force_on && lmb_end_of_DRAM() <= 0x40000000ull)
+	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
 		return;
 
 	/* 512 pages (2MB) is max DART tablesize. */
@@ -416,7 +416,7 @@ void __init alloc_dart_table(void)
 	 * will blow up an entire large page anyway in the kernel mapping
 	 */
 	dart_tablebase = (unsigned long)
-		abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
+		abs_to_virt(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
 
 	printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase);
 }
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index a14760fe513..356c6a0e1b2 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -23,7 +23,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/log2.h>
 #include <linux/slab.h>
 
@@ -190,7 +190,7 @@ static void __init setup_pci_atmu(struct pci_controller *hose,
 	pr_info("%s: PCICSRBAR @ 0x%x\n", name, pcicsrbar);
 
 	/* Setup inbound mem window */
-	mem = lmb_end_of_DRAM();
+	mem = memblock_end_of_DRAM();
 	sz = min(mem, paddr_lo);
 	mem_log = __ilog2_u64(sz);
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 573fca1fbd9..82868fee21f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -10,7 +10,7 @@ config SUPERH
 	select EMBEDDED
 	select HAVE_CLK
 	select HAVE_IDE if HAS_IOPORT
-	select HAVE_LMB
+	select HAVE_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/include/asm/lmb.h b/arch/sh/include/asm/lmb.h
deleted file mode 100644
index 9b437f657ff..00000000000
--- a/arch/sh/include/asm/lmb.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_SH_LMB_H
-#define __ASM_SH_LMB_H
-
-#define LMB_REAL_LIMIT	0
-
-#endif /* __ASM_SH_LMB_H */
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
new file mode 100644
index 00000000000..dfe683b8807
--- /dev/null
+++ b/arch/sh/include/asm/memblock.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_SH_MEMBLOCK_H
+#define __ASM_SH_MEMBLOCK_H
+
+#define MEMBLOCK_REAL_LIMIT	0
+
+#endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 5a559e666eb..e2a3af31ff9 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -15,7 +15,7 @@
 #include <linux/numa.h>
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
@@ -157,10 +157,10 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of lmb_phys_mem_size() */
-	lmb_analyze();
+	/* this is necessary because of memblock_phys_mem_size() */
+	memblock_analyze();
 
-	ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
 		crashk_res.start = crash_base;
@@ -172,14 +172,14 @@ void __init reserve_crashkernel(void)
 
 	crash_size = PAGE_ALIGN(crashk_res.end - crashk_res.start + 1);
 	if (!crashk_res.start) {
-		unsigned long max = lmb_end_of_DRAM() - memory_limit;
-		crashk_res.start = __lmb_alloc_base(crash_size, PAGE_SIZE, max);
+		unsigned long max = memblock_end_of_DRAM() - memory_limit;
+		crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, max);
 		if (!crashk_res.start) {
 			pr_err("crashkernel allocation failed\n");
 			goto disable;
 		}
 	} else {
-		ret = lmb_reserve(crashk_res.start, crash_size);
+		ret = memblock_reserve(crashk_res.start, crash_size);
 		if (unlikely(ret < 0)) {
 			pr_err("crashkernel reservation failed - "
 			       "memory is in use\n");
@@ -192,7 +192,7 @@ void __init reserve_crashkernel(void)
 	/*
 	 * Crash kernel trumps memory limit
 	 */
-	if ((lmb_end_of_DRAM() - memory_limit) <= crashk_res.end) {
+	if ((memblock_end_of_DRAM() - memory_limit) <= crashk_res.end) {
 		memory_limit = 0;
 		pr_info("Disabled memory limit for crashkernel\n");
 	}
@@ -201,7 +201,7 @@ void __init reserve_crashkernel(void)
 		"for crashkernel (System RAM: %ldMB)\n",
 		(unsigned long)(crash_size >> 20),
 		(unsigned long)(crashk_res.start),
-		(unsigned long)(lmb_phys_mem_size() >> 20));
+		(unsigned long)(memblock_phys_mem_size() >> 20));
 
 	return;
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 272734681d2..e769401a78b 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -30,7 +30,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/page.h>
@@ -141,10 +141,10 @@ void __init check_for_initrd(void)
 		goto disable;
 	}
 
-	if (unlikely(end > lmb_end_of_DRAM())) {
+	if (unlikely(end > memblock_end_of_DRAM())) {
 		pr_err("initrd extends beyond end of memory "
 		       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-		       end, (unsigned long)lmb_end_of_DRAM());
+		       end, (unsigned long)memblock_end_of_DRAM());
 		goto disable;
 	}
 
@@ -161,7 +161,7 @@ void __init check_for_initrd(void)
 	initrd_start = (unsigned long)__va(__pa(start));
 	initrd_end = initrd_start + INITRD_SIZE;
 
-	lmb_reserve(__pa(initrd_start), INITRD_SIZE);
+	memblock_reserve(__pa(initrd_start), INITRD_SIZE);
 
 	return;
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 46f84de6246..d0e249100e9 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -16,7 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 #include <asm/mmu_context.h>
 #include <asm/mmzone.h>
@@ -33,7 +33,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 void __init generic_mem_init(void)
 {
-	lmb_add(__MEMORY_START, __MEMORY_SIZE);
+	memblock_add(__MEMORY_START, __MEMORY_SIZE);
 }
 
 void __init __weak plat_mem_setup(void)
@@ -176,12 +176,12 @@ void __init allocate_pgdat(unsigned int nid)
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-	phys = __lmb_alloc_base(sizeof(struct pglist_data),
+	phys = __memblock_alloc_base(sizeof(struct pglist_data),
 				SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
 	/* Retry with all of system memory */
 	if (!phys)
-		phys = __lmb_alloc_base(sizeof(struct pglist_data),
-					SMP_CACHE_BYTES, lmb_end_of_DRAM());
+		phys = __memblock_alloc_base(sizeof(struct pglist_data),
+					SMP_CACHE_BYTES, memblock_end_of_DRAM());
 	if (!phys)
 		panic("Can't allocate pgdat for node %d\n", nid);
 
@@ -212,7 +212,7 @@ static void __init bootmem_init_one_node(unsigned int nid)
 
 	total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
 
-	paddr = lmb_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+	paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
 	if (!paddr)
 		panic("Can't allocate bootmap for nid[%d]\n", nid);
 
@@ -227,9 +227,9 @@ static void __init bootmem_init_one_node(unsigned int nid)
 	 */
 	if (nid == 0) {
 		/* Reserve the sections we're already using. */
-		for (i = 0; i < lmb.reserved.cnt; i++)
-			reserve_bootmem(lmb.reserved.region[i].base,
-					lmb_size_bytes(&lmb.reserved, i),
+		for (i = 0; i < memblock.reserved.cnt; i++)
+			reserve_bootmem(memblock.reserved.region[i].base,
+					memblock_size_bytes(&memblock.reserved, i),
 					BOOTMEM_DEFAULT);
 	}
 
@@ -241,10 +241,10 @@ static void __init do_init_bootmem(void)
 	int i;
 
 	/* Add active regions with valid PFNs. */
-	for (i = 0; i < lmb.memory.cnt; i++) {
+	for (i = 0; i < memblock.memory.cnt; i++) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		__add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -276,7 +276,7 @@ static void __init early_reserve_mem(void)
 	 * this catches the (definitely buggy) case of us accidentally
 	 * initializing the bootmem allocator with an invalid RAM area.
 	 */
-	lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
+	memblock_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
 		    (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) -
 		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
 
@@ -284,7 +284,7 @@ static void __init early_reserve_mem(void)
 	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
 	 */
 	if (CONFIG_ZERO_PAGE_OFFSET != 0)
-		lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
+		memblock_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
 
 	/*
 	 * Handle additional early reservations
@@ -299,27 +299,27 @@ void __init paging_init(void)
 	unsigned long vaddr, end;
 	int nid;
 
-	lmb_init();
+	memblock_init();
 
 	sh_mv.mv_mem_init();
 
 	early_reserve_mem();
 
-	lmb_enforce_memory_limit(memory_limit);
-	lmb_analyze();
+	memblock_enforce_memory_limit(memory_limit);
+	memblock_analyze();
 
-	lmb_dump_all();
+	memblock_dump_all();
 
 	/*
 	 * Determine low and high memory ranges:
 	 */
-	max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
 
 	nodes_clear(node_online_map);
 
 	memory_start = (unsigned long)__va(__MEMORY_START);
-	memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size());
+	memory_end = memory_start + (memory_limit ?: memblock_phys_mem_size());
 
 	uncached_init();
 	pmb_init();
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index a2e645f64a3..3d85225b9e9 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -9,7 +9,7 @@
  */
 #include <linux/module.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/numa.h>
 #include <linux/pfn.h>
@@ -39,12 +39,12 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
 			 PAGE_KERNEL);
 
-	lmb_add(start, end - start);
+	memblock_add(start, end - start);
 
 	__add_active_range(nid, start_pfn, end_pfn);
 
 	/* Node-local pgdat */
-	NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data),
+	NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data),
 					     SMP_CACHE_BYTES, end));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
@@ -54,7 +54,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 
 	/* Node-local bootmap */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT,
+	bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
 				       PAGE_SIZE, end);
 	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
 			  start_pfn, end_pfn);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6f1470baa31..c0015db247b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,7 +42,7 @@ config SPARC64
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
-	select HAVE_LMB
+	select HAVE_MEMBLOCK
 	select HAVE_SYSCALL_WRAPPERS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/sparc/include/asm/lmb.h b/arch/sparc/include/asm/lmb.h
deleted file mode 100644
index 6a352cbcf52..00000000000
--- a/arch/sparc/include/asm/lmb.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _SPARC64_LMB_H
-#define _SPARC64_LMB_H
-
-#include <asm/oplib.h>
-
-#define LMB_DBG(fmt...) prom_printf(fmt)
-
-#define LMB_REAL_LIMIT	0
-
-#endif /* !(_SPARC64_LMB_H) */
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
new file mode 100644
index 00000000000..f12af880649
--- /dev/null
+++ b/arch/sparc/include/asm/memblock.h
@@ -0,0 +1,10 @@
+#ifndef _SPARC64_MEMBLOCK_H
+#define _SPARC64_MEMBLOCK_H
+
+#include <asm/oplib.h>
+
+#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
+
+#define MEMBLOCK_REAL_LIMIT	0
+
+#endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index cdc91d919e9..83e85c2e802 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -4,7 +4,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/log2.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -86,7 +86,7 @@ static void mdesc_handle_init(struct mdesc_handle *hp,
 	hp->handle_size = handle_size;
 }
 
-static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
+static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size)
 {
 	unsigned int handle_size, alloc_size;
 	struct mdesc_handle *hp;
@@ -97,7 +97,7 @@ static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
 		       mdesc_size);
 	alloc_size = PAGE_ALIGN(handle_size);
 
-	paddr = lmb_alloc(alloc_size, PAGE_SIZE);
+	paddr = memblock_alloc(alloc_size, PAGE_SIZE);
 
 	hp = NULL;
 	if (paddr) {
@@ -107,7 +107,7 @@ static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
 	return hp;
 }
 
-static void mdesc_lmb_free(struct mdesc_handle *hp)
+static void mdesc_memblock_free(struct mdesc_handle *hp)
 {
 	unsigned int alloc_size;
 	unsigned long start;
@@ -120,9 +120,9 @@ static void mdesc_lmb_free(struct mdesc_handle *hp)
 	free_bootmem_late(start, alloc_size);
 }
 
-static struct mdesc_mem_ops lmb_mdesc_ops = {
-	.alloc = mdesc_lmb_alloc,
-	.free  = mdesc_lmb_free,
+static struct mdesc_mem_ops memblock_mdesc_ops = {
+	.alloc = mdesc_memblock_alloc,
+	.free  = mdesc_memblock_free,
 };
 
 static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
@@ -914,7 +914,7 @@ void __init sun4v_mdesc_init(void)
 
 	printk("MDESC: Size is %lu bytes.\n", len);
 
-	hp = mdesc_alloc(len, &lmb_mdesc_ops);
+	hp = mdesc_alloc(len, &memblock_mdesc_ops);
 	if (hp == NULL) {
 		prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
 		prom_halt();
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index fb06ac2bd38..466a32763ea 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -20,7 +20,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/of_device.h>
 
 #include <asm/prom.h>
@@ -34,7 +34,7 @@
 
 void * __init prom_early_alloc(unsigned long size)
 {
-	unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES);
+	unsigned long paddr = memblock_alloc(size, SMP_CACHE_BYTES);
 	void *ret;
 
 	if (!paddr) {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index b2831dc3c12..f0434513df1 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -23,7 +23,7 @@
 #include <linux/cache.h>
 #include <linux/sort.h>
 #include <linux/percpu.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/gfp.h>
 
@@ -726,7 +726,7 @@ static void __init find_ramdisk(unsigned long phys_base)
 		initrd_start = ramdisk_image;
 		initrd_end = ramdisk_image + sparc_ramdisk_size;
 
-		lmb_reserve(initrd_start, sparc_ramdisk_size);
+		memblock_reserve(initrd_start, sparc_ramdisk_size);
 
 		initrd_start += PAGE_OFFSET;
 		initrd_end += PAGE_OFFSET;
@@ -822,7 +822,7 @@ static void __init allocate_node_data(int nid)
 	struct pglist_data *p;
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-	paddr = lmb_alloc_nid(sizeof(struct pglist_data),
+	paddr = memblock_alloc_nid(sizeof(struct pglist_data),
 			      SMP_CACHE_BYTES, nid, nid_range);
 	if (!paddr) {
 		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
@@ -843,7 +843,7 @@ static void __init allocate_node_data(int nid)
 	if (p->node_spanned_pages) {
 		num_pages = bootmem_bootmap_pages(p->node_spanned_pages);
 
-		paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid,
+		paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid,
 				      nid_range);
 		if (!paddr) {
 			prom_printf("Cannot allocate bootmap for nid[%d]\n",
@@ -974,11 +974,11 @@ static void __init add_node_ranges(void)
 {
 	int i;
 
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		unsigned long size = lmb_size_bytes(&lmb.memory, i);
+	for (i = 0; i < memblock.memory.cnt; i++) {
+		unsigned long size = memblock_size_bytes(&memblock.memory, i);
 		unsigned long start, end;
 
-		start = lmb.memory.region[i].base;
+		start = memblock.memory.region[i].base;
 		end = start + size;
 		while (start < end) {
 			unsigned long this_end;
@@ -1010,7 +1010,7 @@ static int __init grab_mlgroups(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup),
+	paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
 			  SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
@@ -1051,7 +1051,7 @@ static int __init grab_mblocks(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = lmb_alloc(count * sizeof(struct mdesc_mblock),
+	paddr = memblock_alloc(count * sizeof(struct mdesc_mblock),
 			  SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
@@ -1279,8 +1279,8 @@ static int bootmem_init_numa(void)
 
 static void __init bootmem_init_nonnuma(void)
 {
-	unsigned long top_of_ram = lmb_end_of_DRAM();
-	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = memblock_end_of_DRAM();
+	unsigned long total_ram = memblock_phys_mem_size();
 	unsigned int i;
 
 	numadbg("bootmem_init_nonnuma()\n");
@@ -1292,15 +1292,15 @@ static void __init bootmem_init_nonnuma(void)
 
 	init_node_masks_nonnuma();
 
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		unsigned long size = lmb_size_bytes(&lmb.memory, i);
+	for (i = 0; i < memblock.memory.cnt; i++) {
+		unsigned long size = memblock_size_bytes(&memblock.memory, i);
 		unsigned long start_pfn, end_pfn;
 
 		if (!size)
 			continue;
 
-		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -1338,9 +1338,9 @@ static void __init trim_reserved_in_node(int nid)
 
 	numadbg("  trim_reserved_in_node(%d)\n", nid);
 
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		unsigned long start = lmb.reserved.region[i].base;
-		unsigned long size = lmb_size_bytes(&lmb.reserved, i);
+	for (i = 0; i < memblock.reserved.cnt; i++) {
+		unsigned long start = memblock.reserved.region[i].base;
+		unsigned long size = memblock_size_bytes(&memblock.reserved, i);
 		unsigned long end = start + size;
 
 		reserve_range_in_node(nid, start, end);
@@ -1384,7 +1384,7 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
 	unsigned long end_pfn;
 	int nid;
 
-	end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	max_pfn = max_low_pfn = end_pfn;
 	min_low_pfn = (phys_base >> PAGE_SHIFT);
 
@@ -1734,7 +1734,7 @@ void __init paging_init(void)
 		sun4v_ktsb_init();
 	}
 
-	lmb_init();
+	memblock_init();
 
 	/* Find available physical memory...
 	 *
@@ -1752,17 +1752,17 @@ void __init paging_init(void)
 	phys_base = 0xffffffffffffffffUL;
 	for (i = 0; i < pavail_ents; i++) {
 		phys_base = min(phys_base, pavail[i].phys_addr);
-		lmb_add(pavail[i].phys_addr, pavail[i].reg_size);
+		memblock_add(pavail[i].phys_addr, pavail[i].reg_size);
 	}
 
-	lmb_reserve(kern_base, kern_size);
+	memblock_reserve(kern_base, kern_size);
 
 	find_ramdisk(phys_base);
 
-	lmb_enforce_memory_limit(cmdline_memory_size);
+	memblock_enforce_memory_limit(cmdline_memory_size);
 
-	lmb_analyze();
-	lmb_dump_all();
+	memblock_analyze();
+	memblock_dump_all();
 
 	set_bit(0, mmu_context_bmap);
 
@@ -1816,8 +1816,8 @@ void __init paging_init(void)
 	 */
 	for_each_possible_cpu(i) {
 		/* XXX Use node local allocations... XXX */
-		softirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
-		hardirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+		softirq_stack[i] = __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+		hardirq_stack[i] = __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
 
 	/* Setup bootmem... */
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
deleted file mode 100644
index f3d14333ebe..00000000000
--- a/include/linux/lmb.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _LINUX_LMB_H
-#define _LINUX_LMB_H
-#ifdef __KERNEL__
-
-/*
- * Logical memory blocks.
- *
- * Copyright (C) 2001 Peter Bergner, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/init.h>
-#include <linux/mm.h>
-
-#define MAX_LMB_REGIONS 128
-
-struct lmb_property {
-	u64 base;
-	u64 size;
-};
-
-struct lmb_region {
-	unsigned long cnt;
-	u64 size;
-	struct lmb_property region[MAX_LMB_REGIONS+1];
-};
-
-struct lmb {
-	unsigned long debug;
-	u64 rmo_size;
-	struct lmb_region memory;
-	struct lmb_region reserved;
-};
-
-extern struct lmb lmb;
-
-extern void __init lmb_init(void);
-extern void __init lmb_analyze(void);
-extern long lmb_add(u64 base, u64 size);
-extern long lmb_remove(u64 base, u64 size);
-extern long __init lmb_free(u64 base, u64 size);
-extern long __init lmb_reserve(u64 base, u64 size);
-extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
-				u64 (*nid_range)(u64, u64, int *));
-extern u64 __init lmb_alloc(u64 size, u64 align);
-extern u64 __init lmb_alloc_base(u64 size,
-		u64, u64 max_addr);
-extern u64 __init __lmb_alloc_base(u64 size,
-		u64 align, u64 max_addr);
-extern u64 __init lmb_phys_mem_size(void);
-extern u64 lmb_end_of_DRAM(void);
-extern void __init lmb_enforce_memory_limit(u64 memory_limit);
-extern int __init lmb_is_reserved(u64 addr);
-extern int lmb_is_region_reserved(u64 base, u64 size);
-extern int lmb_find(struct lmb_property *res);
-
-extern void lmb_dump_all(void);
-
-static inline u64
-lmb_size_bytes(struct lmb_region *type, unsigned long region_nr)
-{
-	return type->region[region_nr].size;
-}
-static inline u64
-lmb_size_pages(struct lmb_region *type, unsigned long region_nr)
-{
-	return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT;
-}
-static inline u64
-lmb_start_pfn(struct lmb_region *type, unsigned long region_nr)
-{
-	return type->region[region_nr].base >> PAGE_SHIFT;
-}
-static inline u64
-lmb_end_pfn(struct lmb_region *type, unsigned long region_nr)
-{
-	return lmb_start_pfn(type, region_nr) +
-	       lmb_size_pages(type, region_nr);
-}
-
-#include <asm/lmb.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_LMB_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
new file mode 100644
index 00000000000..a59faf2b5ed
--- /dev/null
+++ b/include/linux/memblock.h
@@ -0,0 +1,89 @@
+#ifndef _LINUX_MEMBLOCK_H
+#define _LINUX_MEMBLOCK_H
+#ifdef __KERNEL__
+
+/*
+ * Logical memory blocks.
+ *
+ * Copyright (C) 2001 Peter Bergner, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#define MAX_MEMBLOCK_REGIONS 128
+
+struct memblock_property {
+	u64 base;
+	u64 size;
+};
+
+struct memblock_region {
+	unsigned long cnt;
+	u64 size;
+	struct memblock_property region[MAX_MEMBLOCK_REGIONS+1];
+};
+
+struct memblock {
+	unsigned long debug;
+	u64 rmo_size;
+	struct memblock_region memory;
+	struct memblock_region reserved;
+};
+
+extern struct memblock memblock;
+
+extern void __init memblock_init(void);
+extern void __init memblock_analyze(void);
+extern long memblock_add(u64 base, u64 size);
+extern long memblock_remove(u64 base, u64 size);
+extern long __init memblock_free(u64 base, u64 size);
+extern long __init memblock_reserve(u64 base, u64 size);
+extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
+				u64 (*nid_range)(u64, u64, int *));
+extern u64 __init memblock_alloc(u64 size, u64 align);
+extern u64 __init memblock_alloc_base(u64 size,
+		u64, u64 max_addr);
+extern u64 __init __memblock_alloc_base(u64 size,
+		u64 align, u64 max_addr);
+extern u64 __init memblock_phys_mem_size(void);
+extern u64 memblock_end_of_DRAM(void);
+extern void __init memblock_enforce_memory_limit(u64 memory_limit);
+extern int __init memblock_is_reserved(u64 addr);
+extern int memblock_is_region_reserved(u64 base, u64 size);
+extern int memblock_find(struct memblock_property *res);
+
+extern void memblock_dump_all(void);
+
+static inline u64
+memblock_size_bytes(struct memblock_region *type, unsigned long region_nr)
+{
+	return type->region[region_nr].size;
+}
+static inline u64
+memblock_size_pages(struct memblock_region *type, unsigned long region_nr)
+{
+	return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT;
+}
+static inline u64
+memblock_start_pfn(struct memblock_region *type, unsigned long region_nr)
+{
+	return type->region[region_nr].base >> PAGE_SHIFT;
+}
+static inline u64
+memblock_end_pfn(struct memblock_region *type, unsigned long region_nr)
+{
+	return memblock_start_pfn(type, region_nr) +
+	       memblock_size_pages(type, region_nr);
+}
+
+#include <asm/memblock.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_MEMBLOCK_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 170d8ca901d..5b916bc0fba 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -181,9 +181,6 @@ config HAS_DMA
 config CHECK_SIGNATURE
 	bool
 
-config HAVE_LMB
-	boolean
-
 config CPUMASK_OFFSTACK
 	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 3f1062cbbff..0bfabba1bb3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -89,8 +89,6 @@ obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
-obj-$(CONFIG_HAVE_LMB) += lmb.o
-
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 
 obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index b1fc5260652..00000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Procedures for maintaining information about logical memory blocks.
- *
- * Peter Bergner, IBM Corp.	June 2001.
- * Copyright (C) 2001 Peter Bergner.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/lmb.h>
-
-#define LMB_ALLOC_ANYWHERE	0
-
-struct lmb lmb;
-
-static int lmb_debug;
-
-static int __init early_lmb(char *p)
-{
-	if (p && strstr(p, "debug"))
-		lmb_debug = 1;
-	return 0;
-}
-early_param("lmb", early_lmb);
-
-static void lmb_dump(struct lmb_region *region, char *name)
-{
-	unsigned long long base, size;
-	int i;
-
-	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
-
-	for (i = 0; i < region->cnt; i++) {
-		base = region->region[i].base;
-		size = region->region[i].size;
-
-		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
-		    name, i, base, base + size - 1, size);
-	}
-}
-
-void lmb_dump_all(void)
-{
-	if (!lmb_debug)
-		return;
-
-	pr_info("LMB configuration:\n");
-	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)lmb.rmo_size);
-	pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
-
-	lmb_dump(&lmb.memory, "memory");
-	lmb_dump(&lmb.reserved, "reserved");
-}
-
-static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
-					u64 size2)
-{
-	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-
-static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
-{
-	if (base2 == base1 + size1)
-		return 1;
-	else if (base1 == base2 + size2)
-		return -1;
-
-	return 0;
-}
-
-static long lmb_regions_adjacent(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	u64 base1 = rgn->region[r1].base;
-	u64 size1 = rgn->region[r1].size;
-	u64 base2 = rgn->region[r2].base;
-	u64 size2 = rgn->region[r2].size;
-
-	return lmb_addrs_adjacent(base1, size1, base2, size2);
-}
-
-static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
-{
-	unsigned long i;
-
-	for (i = r; i < rgn->cnt - 1; i++) {
-		rgn->region[i].base = rgn->region[i + 1].base;
-		rgn->region[i].size = rgn->region[i + 1].size;
-	}
-	rgn->cnt--;
-}
-
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void lmb_coalesce_regions(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	rgn->region[r1].size += rgn->region[r2].size;
-	lmb_remove_region(rgn, r2);
-}
-
-void __init lmb_init(void)
-{
-	/* Create a dummy zero size LMB which will get coalesced away later.
-	 * This simplifies the lmb_add() code below...
-	 */
-	lmb.memory.region[0].base = 0;
-	lmb.memory.region[0].size = 0;
-	lmb.memory.cnt = 1;
-
-	/* Ditto. */
-	lmb.reserved.region[0].base = 0;
-	lmb.reserved.region[0].size = 0;
-	lmb.reserved.cnt = 1;
-}
-
-void __init lmb_analyze(void)
-{
-	int i;
-
-	lmb.memory.size = 0;
-
-	for (i = 0; i < lmb.memory.cnt; i++)
-		lmb.memory.size += lmb.memory.region[i].size;
-}
-
-static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long coalesced = 0;
-	long adjacent, i;
-
-	if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-		return 0;
-	}
-
-	/* First try and coalesce this LMB with another. */
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-
-		if ((rgnbase == base) && (rgnsize == size))
-			/* Already have this region, so we're done */
-			return 0;
-
-		adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
-		if (adjacent > 0) {
-			rgn->region[i].base -= size;
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		} else if (adjacent < 0) {
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		}
-	}
-
-	if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
-		lmb_coalesce_regions(rgn, i, i+1);
-		coalesced++;
-	}
-
-	if (coalesced)
-		return coalesced;
-	if (rgn->cnt >= MAX_LMB_REGIONS)
-		return -1;
-
-	/* Couldn't coalesce the LMB, so add it to the sorted table. */
-	for (i = rgn->cnt - 1; i >= 0; i--) {
-		if (base < rgn->region[i].base) {
-			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].size = rgn->region[i].size;
-		} else {
-			rgn->region[i+1].base = base;
-			rgn->region[i+1].size = size;
-			break;
-		}
-	}
-
-	if (base < rgn->region[0].base) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-	}
-	rgn->cnt++;
-
-	return 0;
-}
-
-long lmb_add(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.memory;
-
-	/* On pSeries LPAR systems, the first LMB is our RMO region. */
-	if (base == 0)
-		lmb.rmo_size = size;
-
-	return lmb_add_region(_rgn, base, size);
-
-}
-
-static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size)
-{
-	u64 rgnbegin, rgnend;
-	u64 end = base + size;
-	int i;
-
-	rgnbegin = rgnend = 0; /* supress gcc warnings */
-
-	/* Find the region where (base, size) belongs to */
-	for (i=0; i < rgn->cnt; i++) {
-		rgnbegin = rgn->region[i].base;
-		rgnend = rgnbegin + rgn->region[i].size;
-
-		if ((rgnbegin <= base) && (end <= rgnend))
-			break;
-	}
-
-	/* Didn't find the region */
-	if (i == rgn->cnt)
-		return -1;
-
-	/* Check to see if we are removing entire region */
-	if ((rgnbegin == base) && (rgnend == end)) {
-		lmb_remove_region(rgn, i);
-		return 0;
-	}
-
-	/* Check to see if region is matching at the front */
-	if (rgnbegin == base) {
-		rgn->region[i].base = end;
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/* Check to see if the region is matching at the end */
-	if (rgnend == end) {
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/*
-	 * We need to split the entry -  adjust the current one to the
-	 * beginging of the hole and add the region after hole.
-	 */
-	rgn->region[i].size = base - rgn->region[i].base;
-	return lmb_add_region(rgn, end, rgnend - end);
-}
-
-long lmb_remove(u64 base, u64 size)
-{
-	return __lmb_remove(&lmb.memory, base, size);
-}
-
-long __init lmb_free(u64 base, u64 size)
-{
-	return __lmb_remove(&lmb.reserved, base, size);
-}
-
-long __init lmb_reserve(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.reserved;
-
-	BUG_ON(0 == size);
-
-	return lmb_add_region(_rgn, base, size);
-}
-
-long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long i;
-
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-		if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
-			break;
-	}
-
-	return (i < rgn->cnt) ? i : -1;
-}
-
-static u64 lmb_align_down(u64 addr, u64 size)
-{
-	return addr & ~(size - 1);
-}
-
-static u64 lmb_align_up(u64 addr, u64 size)
-{
-	return (addr + (size - 1)) & ~(size - 1);
-}
-
-static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
-					   u64 size, u64 align)
-{
-	u64 base, res_base;
-	long j;
-
-	base = lmb_align_down((end - size), align);
-	while (start <= base) {
-		j = lmb_overlaps_region(&lmb.reserved, base, size);
-		if (j < 0) {
-			/* this area isn't reserved, take it */
-			if (lmb_add_region(&lmb.reserved, base, size) < 0)
-				base = ~(u64)0;
-			return base;
-		}
-		res_base = lmb.reserved.region[j].base;
-		if (res_base < size)
-			break;
-		base = lmb_align_down(res_base - size, align);
-	}
-
-	return ~(u64)0;
-}
-
-static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
-				       u64 (*nid_range)(u64, u64, int *),
-				       u64 size, u64 align, int nid)
-{
-	u64 start, end;
-
-	start = mp->base;
-	end = start + mp->size;
-
-	start = lmb_align_up(start, align);
-	while (start < end) {
-		u64 this_end;
-		int this_nid;
-
-		this_end = nid_range(start, end, &this_nid);
-		if (this_nid == nid) {
-			u64 ret = lmb_alloc_nid_unreserved(start, this_end,
-							   size, align);
-			if (ret != ~(u64)0)
-				return ret;
-		}
-		start = this_end;
-	}
-
-	return ~(u64)0;
-}
-
-u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
-			 u64 (*nid_range)(u64 start, u64 end, int *nid))
-{
-	struct lmb_region *mem = &lmb.memory;
-	int i;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	for (i = 0; i < mem->cnt; i++) {
-		u64 ret = lmb_alloc_nid_region(&mem->region[i],
-					       nid_range,
-					       size, align, nid);
-		if (ret != ~(u64)0)
-			return ret;
-	}
-
-	return lmb_alloc(size, align);
-}
-
-u64 __init lmb_alloc(u64 size, u64 align)
-{
-	return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
-}
-
-u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	u64 alloc;
-
-	alloc = __lmb_alloc_base(size, align, max_addr);
-
-	if (alloc == 0)
-		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
-		      (unsigned long long) size, (unsigned long long) max_addr);
-
-	return alloc;
-}
-
-u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	long i, j;
-	u64 base = 0;
-	u64 res_base;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	/* On some platforms, make sure we allocate lowmem */
-	/* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
-	if (max_addr == LMB_ALLOC_ANYWHERE)
-		max_addr = LMB_REAL_LIMIT;
-
-	for (i = lmb.memory.cnt - 1; i >= 0; i--) {
-		u64 lmbbase = lmb.memory.region[i].base;
-		u64 lmbsize = lmb.memory.region[i].size;
-
-		if (lmbsize < size)
-			continue;
-		if (max_addr == LMB_ALLOC_ANYWHERE)
-			base = lmb_align_down(lmbbase + lmbsize - size, align);
-		else if (lmbbase < max_addr) {
-			base = min(lmbbase + lmbsize, max_addr);
-			base = lmb_align_down(base - size, align);
-		} else
-			continue;
-
-		while (base && lmbbase <= base) {
-			j = lmb_overlaps_region(&lmb.reserved, base, size);
-			if (j < 0) {
-				/* this area isn't reserved, take it */
-				if (lmb_add_region(&lmb.reserved, base, size) < 0)
-					return 0;
-				return base;
-			}
-			res_base = lmb.reserved.region[j].base;
-			if (res_base < size)
-				break;
-			base = lmb_align_down(res_base - size, align);
-		}
-	}
-	return 0;
-}
-
-/* You must call lmb_analyze() before this. */
-u64 __init lmb_phys_mem_size(void)
-{
-	return lmb.memory.size;
-}
-
-u64 lmb_end_of_DRAM(void)
-{
-	int idx = lmb.memory.cnt - 1;
-
-	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
-}
-
-/* You must call lmb_analyze() after this. */
-void __init lmb_enforce_memory_limit(u64 memory_limit)
-{
-	unsigned long i;
-	u64 limit;
-	struct lmb_property *p;
-
-	if (!memory_limit)
-		return;
-
-	/* Truncate the lmb regions to satisfy the memory limit. */
-	limit = memory_limit;
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		if (limit > lmb.memory.region[i].size) {
-			limit -= lmb.memory.region[i].size;
-			continue;
-		}
-
-		lmb.memory.region[i].size = limit;
-		lmb.memory.cnt = i + 1;
-		break;
-	}
-
-	if (lmb.memory.region[0].size < lmb.rmo_size)
-		lmb.rmo_size = lmb.memory.region[0].size;
-
-	memory_limit = lmb_end_of_DRAM();
-
-	/* And truncate any reserves above the limit also. */
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		p = &lmb.reserved.region[i];
-
-		if (p->base > memory_limit)
-			p->size = 0;
-		else if ((p->base + p->size) > memory_limit)
-			p->size = memory_limit - p->base;
-
-		if (p->size == 0) {
-			lmb_remove_region(&lmb.reserved, i);
-			i--;
-		}
-	}
-}
-
-int __init lmb_is_reserved(u64 addr)
-{
-	int i;
-
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		u64 upper = lmb.reserved.region[i].base +
-			lmb.reserved.region[i].size - 1;
-		if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
-			return 1;
-	}
-	return 0;
-}
-
-int lmb_is_region_reserved(u64 base, u64 size)
-{
-	return lmb_overlaps_region(&lmb.reserved, base, size);
-}
-
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int lmb_find(struct lmb_property *res)
-{
-	int i;
-	u64 rstart, rend;
-
-	rstart = res->base;
-	rend = rstart + res->size - 1;
-
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		u64 start = lmb.memory.region[i].base;
-		u64 end = start + lmb.memory.region[i].size - 1;
-
-		if (start > rend)
-			return -1;
-
-		if ((end >= rstart) && (start < rend)) {
-			/* adjust the request */
-			if (rstart < start)
-				rstart = start;
-			if (rend > end)
-				rend = end;
-			res->base = rstart;
-			res->size = rend - rstart + 1;
-			return 0;
-		}
-	}
-	return -1;
-}
diff --git a/mm/Kconfig b/mm/Kconfig
index 527136b2238..f4e516e9c37 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,6 +128,9 @@ config SPARSEMEM_VMEMMAP
 	 pfn_to_page and page_to_pfn operations.  This is the most
 	 efficient option when sufficient kernel resources are available.
 
+config HAVE_MEMBLOCK
+	boolean
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
diff --git a/mm/Makefile b/mm/Makefile
index 8982504bd03..34b2546a9e3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   $(mmu-y)
 obj-y += init-mm.o
 
+obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
+
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
diff --git a/mm/memblock.c b/mm/memblock.c
new file mode 100644
index 00000000000..3024eb30fc2
--- /dev/null
+++ b/mm/memblock.c
@@ -0,0 +1,541 @@
+/*
+ * Procedures for maintaining information about logical memory blocks.
+ *
+ * Peter Bergner, IBM Corp.	June 2001.
+ * Copyright (C) 2001 Peter Bergner.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/memblock.h>
+
+#define MEMBLOCK_ALLOC_ANYWHERE	0
+
+struct memblock memblock;
+
+static int memblock_debug;
+
+static int __init early_memblock(char *p)
+{
+	if (p && strstr(p, "debug"))
+		memblock_debug = 1;
+	return 0;
+}
+early_param("memblock", early_memblock);
+
+static void memblock_dump(struct memblock_region *region, char *name)
+{
+	unsigned long long base, size;
+	int i;
+
+	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
+
+	for (i = 0; i < region->cnt; i++) {
+		base = region->region[i].base;
+		size = region->region[i].size;
+
+		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
+		    name, i, base, base + size - 1, size);
+	}
+}
+
+void memblock_dump_all(void)
+{
+	if (!memblock_debug)
+		return;
+
+	pr_info("MEMBLOCK configuration:\n");
+	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)memblock.rmo_size);
+	pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
+
+	memblock_dump(&memblock.memory, "memory");
+	memblock_dump(&memblock.reserved, "reserved");
+}
+
+static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2,
+					u64 size2)
+{
+	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
+}
+
+static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
+{
+	if (base2 == base1 + size1)
+		return 1;
+	else if (base1 == base2 + size2)
+		return -1;
+
+	return 0;
+}
+
+static long memblock_regions_adjacent(struct memblock_region *rgn,
+		unsigned long r1, unsigned long r2)
+{
+	u64 base1 = rgn->region[r1].base;
+	u64 size1 = rgn->region[r1].size;
+	u64 base2 = rgn->region[r2].base;
+	u64 size2 = rgn->region[r2].size;
+
+	return memblock_addrs_adjacent(base1, size1, base2, size2);
+}
+
+static void memblock_remove_region(struct memblock_region *rgn, unsigned long r)
+{
+	unsigned long i;
+
+	for (i = r; i < rgn->cnt - 1; i++) {
+		rgn->region[i].base = rgn->region[i + 1].base;
+		rgn->region[i].size = rgn->region[i + 1].size;
+	}
+	rgn->cnt--;
+}
+
+/* Assumption: base addr of region 1 < base addr of region 2 */
+static void memblock_coalesce_regions(struct memblock_region *rgn,
+		unsigned long r1, unsigned long r2)
+{
+	rgn->region[r1].size += rgn->region[r2].size;
+	memblock_remove_region(rgn, r2);
+}
+
+void __init memblock_init(void)
+{
+	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
+	 * This simplifies the memblock_add() code below...
+	 */
+	memblock.memory.region[0].base = 0;
+	memblock.memory.region[0].size = 0;
+	memblock.memory.cnt = 1;
+
+	/* Ditto. */
+	memblock.reserved.region[0].base = 0;
+	memblock.reserved.region[0].size = 0;
+	memblock.reserved.cnt = 1;
+}
+
+void __init memblock_analyze(void)
+{
+	int i;
+
+	memblock.memory.size = 0;
+
+	for (i = 0; i < memblock.memory.cnt; i++)
+		memblock.memory.size += memblock.memory.region[i].size;
+}
+
+static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
+{
+	unsigned long coalesced = 0;
+	long adjacent, i;
+
+	if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
+		rgn->region[0].base = base;
+		rgn->region[0].size = size;
+		return 0;
+	}
+
+	/* First try and coalesce this MEMBLOCK with another. */
+	for (i = 0; i < rgn->cnt; i++) {
+		u64 rgnbase = rgn->region[i].base;
+		u64 rgnsize = rgn->region[i].size;
+
+		if ((rgnbase == base) && (rgnsize == size))
+			/* Already have this region, so we're done */
+			return 0;
+
+		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
+		if (adjacent > 0) {
+			rgn->region[i].base -= size;
+			rgn->region[i].size += size;
+			coalesced++;
+			break;
+		} else if (adjacent < 0) {
+			rgn->region[i].size += size;
+			coalesced++;
+			break;
+		}
+	}
+
+	if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
+		memblock_coalesce_regions(rgn, i, i+1);
+		coalesced++;
+	}
+
+	if (coalesced)
+		return coalesced;
+	if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
+		return -1;
+
+	/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
+	for (i = rgn->cnt - 1; i >= 0; i--) {
+		if (base < rgn->region[i].base) {
+			rgn->region[i+1].base = rgn->region[i].base;
+			rgn->region[i+1].size = rgn->region[i].size;
+		} else {
+			rgn->region[i+1].base = base;
+			rgn->region[i+1].size = size;
+			break;
+		}
+	}
+
+	if (base < rgn->region[0].base) {
+		rgn->region[0].base = base;
+		rgn->region[0].size = size;
+	}
+	rgn->cnt++;
+
+	return 0;
+}
+
+long memblock_add(u64 base, u64 size)
+{
+	struct memblock_region *_rgn = &memblock.memory;
+
+	/* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
+	if (base == 0)
+		memblock.rmo_size = size;
+
+	return memblock_add_region(_rgn, base, size);
+
+}
+
+static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
+{
+	u64 rgnbegin, rgnend;
+	u64 end = base + size;
+	int i;
+
+	rgnbegin = rgnend = 0; /* supress gcc warnings */
+
+	/* Find the region where (base, size) belongs to */
+	for (i=0; i < rgn->cnt; i++) {
+		rgnbegin = rgn->region[i].base;
+		rgnend = rgnbegin + rgn->region[i].size;
+
+		if ((rgnbegin <= base) && (end <= rgnend))
+			break;
+	}
+
+	/* Didn't find the region */
+	if (i == rgn->cnt)
+		return -1;
+
+	/* Check to see if we are removing entire region */
+	if ((rgnbegin == base) && (rgnend == end)) {
+		memblock_remove_region(rgn, i);
+		return 0;
+	}
+
+	/* Check to see if region is matching at the front */
+	if (rgnbegin == base) {
+		rgn->region[i].base = end;
+		rgn->region[i].size -= size;
+		return 0;
+	}
+
+	/* Check to see if the region is matching at the end */
+	if (rgnend == end) {
+		rgn->region[i].size -= size;
+		return 0;
+	}
+
+	/*
+	 * We need to split the entry -  adjust the current one to the
+	 * beginging of the hole and add the region after hole.
+	 */
+	rgn->region[i].size = base - rgn->region[i].base;
+	return memblock_add_region(rgn, end, rgnend - end);
+}
+
+long memblock_remove(u64 base, u64 size)
+{
+	return __memblock_remove(&memblock.memory, base, size);
+}
+
+long __init memblock_free(u64 base, u64 size)
+{
+	return __memblock_remove(&memblock.reserved, base, size);
+}
+
+long __init memblock_reserve(u64 base, u64 size)
+{
+	struct memblock_region *_rgn = &memblock.reserved;
+
+	BUG_ON(0 == size);
+
+	return memblock_add_region(_rgn, base, size);
+}
+
+long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size)
+{
+	unsigned long i;
+
+	for (i = 0; i < rgn->cnt; i++) {
+		u64 rgnbase = rgn->region[i].base;
+		u64 rgnsize = rgn->region[i].size;
+		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
+			break;
+	}
+
+	return (i < rgn->cnt) ? i : -1;
+}
+
+static u64 memblock_align_down(u64 addr, u64 size)
+{
+	return addr & ~(size - 1);
+}
+
+static u64 memblock_align_up(u64 addr, u64 size)
+{
+	return (addr + (size - 1)) & ~(size - 1);
+}
+
+static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
+					   u64 size, u64 align)
+{
+	u64 base, res_base;
+	long j;
+
+	base = memblock_align_down((end - size), align);
+	while (start <= base) {
+		j = memblock_overlaps_region(&memblock.reserved, base, size);
+		if (j < 0) {
+			/* this area isn't reserved, take it */
+			if (memblock_add_region(&memblock.reserved, base, size) < 0)
+				base = ~(u64)0;
+			return base;
+		}
+		res_base = memblock.reserved.region[j].base;
+		if (res_base < size)
+			break;
+		base = memblock_align_down(res_base - size, align);
+	}
+
+	return ~(u64)0;
+}
+
+static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
+				       u64 (*nid_range)(u64, u64, int *),
+				       u64 size, u64 align, int nid)
+{
+	u64 start, end;
+
+	start = mp->base;
+	end = start + mp->size;
+
+	start = memblock_align_up(start, align);
+	while (start < end) {
+		u64 this_end;
+		int this_nid;
+
+		this_end = nid_range(start, end, &this_nid);
+		if (this_nid == nid) {
+			u64 ret = memblock_alloc_nid_unreserved(start, this_end,
+							   size, align);
+			if (ret != ~(u64)0)
+				return ret;
+		}
+		start = this_end;
+	}
+
+	return ~(u64)0;
+}
+
+u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
+			 u64 (*nid_range)(u64 start, u64 end, int *nid))
+{
+	struct memblock_region *mem = &memblock.memory;
+	int i;
+
+	BUG_ON(0 == size);
+
+	size = memblock_align_up(size, align);
+
+	for (i = 0; i < mem->cnt; i++) {
+		u64 ret = memblock_alloc_nid_region(&mem->region[i],
+					       nid_range,
+					       size, align, nid);
+		if (ret != ~(u64)0)
+			return ret;
+	}
+
+	return memblock_alloc(size, align);
+}
+
+u64 __init memblock_alloc(u64 size, u64 align)
+{
+	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+}
+
+u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+{
+	u64 alloc;
+
+	alloc = __memblock_alloc_base(size, align, max_addr);
+
+	if (alloc == 0)
+		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
+		      (unsigned long long) size, (unsigned long long) max_addr);
+
+	return alloc;
+}
+
+u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
+{
+	long i, j;
+	u64 base = 0;
+	u64 res_base;
+
+	BUG_ON(0 == size);
+
+	size = memblock_align_up(size, align);
+
+	/* On some platforms, make sure we allocate lowmem */
+	/* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */
+	if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
+		max_addr = MEMBLOCK_REAL_LIMIT;
+
+	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
+		u64 memblockbase = memblock.memory.region[i].base;
+		u64 memblocksize = memblock.memory.region[i].size;
+
+		if (memblocksize < size)
+			continue;
+		if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
+			base = memblock_align_down(memblockbase + memblocksize - size, align);
+		else if (memblockbase < max_addr) {
+			base = min(memblockbase + memblocksize, max_addr);
+			base = memblock_align_down(base - size, align);
+		} else
+			continue;
+
+		while (base && memblockbase <= base) {
+			j = memblock_overlaps_region(&memblock.reserved, base, size);
+			if (j < 0) {
+				/* this area isn't reserved, take it */
+				if (memblock_add_region(&memblock.reserved, base, size) < 0)
+					return 0;
+				return base;
+			}
+			res_base = memblock.reserved.region[j].base;
+			if (res_base < size)
+				break;
+			base = memblock_align_down(res_base - size, align);
+		}
+	}
+	return 0;
+}
+
+/* You must call memblock_analyze() before this. */
+u64 __init memblock_phys_mem_size(void)
+{
+	return memblock.memory.size;
+}
+
+u64 memblock_end_of_DRAM(void)
+{
+	int idx = memblock.memory.cnt - 1;
+
+	return (memblock.memory.region[idx].base + memblock.memory.region[idx].size);
+}
+
+/* You must call memblock_analyze() after this. */
+void __init memblock_enforce_memory_limit(u64 memory_limit)
+{
+	unsigned long i;
+	u64 limit;
+	struct memblock_property *p;
+
+	if (!memory_limit)
+		return;
+
+	/* Truncate the memblock regions to satisfy the memory limit. */
+	limit = memory_limit;
+	for (i = 0; i < memblock.memory.cnt; i++) {
+		if (limit > memblock.memory.region[i].size) {
+			limit -= memblock.memory.region[i].size;
+			continue;
+		}
+
+		memblock.memory.region[i].size = limit;
+		memblock.memory.cnt = i + 1;
+		break;
+	}
+
+	if (memblock.memory.region[0].size < memblock.rmo_size)
+		memblock.rmo_size = memblock.memory.region[0].size;
+
+	memory_limit = memblock_end_of_DRAM();
+
+	/* And truncate any reserves above the limit also. */
+	for (i = 0; i < memblock.reserved.cnt; i++) {
+		p = &memblock.reserved.region[i];
+
+		if (p->base > memory_limit)
+			p->size = 0;
+		else if ((p->base + p->size) > memory_limit)
+			p->size = memory_limit - p->base;
+
+		if (p->size == 0) {
+			memblock_remove_region(&memblock.reserved, i);
+			i--;
+		}
+	}
+}
+
+int __init memblock_is_reserved(u64 addr)
+{
+	int i;
+
+	for (i = 0; i < memblock.reserved.cnt; i++) {
+		u64 upper = memblock.reserved.region[i].base +
+			memblock.reserved.region[i].size - 1;
+		if ((addr >= memblock.reserved.region[i].base) && (addr <= upper))
+			return 1;
+	}
+	return 0;
+}
+
+int memblock_is_region_reserved(u64 base, u64 size)
+{
+	return memblock_overlaps_region(&memblock.reserved, base, size);
+}
+
+/*
+ * Given a <base, len>, find which memory regions belong to this range.
+ * Adjust the request and return a contiguous chunk.
+ */
+int memblock_find(struct memblock_property *res)
+{
+	int i;
+	u64 rstart, rend;
+
+	rstart = res->base;
+	rend = rstart + res->size - 1;
+
+	for (i = 0; i < memblock.memory.cnt; i++) {
+		u64 start = memblock.memory.region[i].base;
+		u64 end = start + memblock.memory.region[i].size - 1;
+
+		if (start > rend)
+			return -1;
+
+		if ((end >= rstart) && (start < rend)) {
+			/* adjust the request */
+			if (rstart < start)
+				rstart = start;
+			if (rend > end)
+				rend = end;
+			res->base = rstart;
+			res->size = rend - rstart + 1;
+			return 0;
+		}
+	}
+	return -1;
+}
-- 
cgit v1.2.3-70-g09d2


From 13ceef099edd2b70c5a6f3a9ef5d6d97cda2e096 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Jul 2010 07:56:33 +0200
Subject: jbd2/ocfs2: Fix block checksumming when a buffer is used in several
 transactions

OCFS2 uses t_commit trigger to compute and store checksum of the just
committed blocks. When a buffer has b_frozen_data, checksum is computed
for it instead of b_data but this can result in an old checksum being
written to the filesystem in the following scenario:

1) transaction1 is opened
2) handle1 is opened
3) journal_access(handle1, bh)
    - This sets jh->b_transaction to transaction1
4) modify(bh)
5) journal_dirty(handle1, bh)
6) handle1 is closed
7) start committing transaction1, opening transaction2
8) handle2 is opened
9) journal_access(handle2, bh)
    - This copies off b_frozen_data to make it safe for transaction1 to commit.
      jh->b_next_transaction is set to transaction2.
10) jbd2_journal_write_metadata() checksums b_frozen_data
11) the journal correctly writes b_frozen_data to the disk journal
12) handle2 is closed
    - There was no dirty call for the bh on handle2, so it is never queued for
      any more journal operation
13) Checkpointing finally happens, and it just spools the bh via normal buffer
writeback.  This will write b_data, which was never triggered on and thus
contains a wrong (old) checksum.

This patch fixes the problem by calling the trigger at the moment data is
frozen for journal commit - i.e., either when b_frozen_data is created by
do_get_write_access or just before we write a buffer to the log if
b_frozen_data does not exist. We also rename the trigger to t_frozen as
that better describes when it is called.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/jbd2/journal.c     | 15 +++++++--------
 fs/jbd2/transaction.c |  9 ++++++---
 fs/ocfs2/journal.c    | 24 ++++++++++++------------
 include/linux/jbd2.h  | 11 ++++++-----
 4 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff593276..036880895bf 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	struct page *new_page;
 	unsigned int new_offset;
 	struct buffer_head *bh_in = jh2bh(jh_in);
-	struct jbd2_buffer_trigger_type *triggers;
 	journal_t *journal = transaction->t_journal;
 
 	/*
@@ -328,21 +327,21 @@ repeat:
 		done_copy_out = 1;
 		new_page = virt_to_page(jh_in->b_frozen_data);
 		new_offset = offset_in_page(jh_in->b_frozen_data);
-		triggers = jh_in->b_frozen_triggers;
 	} else {
 		new_page = jh2bh(jh_in)->b_page;
 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
-		triggers = jh_in->b_triggers;
 	}
 
 	mapped_data = kmap_atomic(new_page, KM_USER0);
 	/*
-	 * Fire any commit trigger.  Do this before checking for escaping,
-	 * as the trigger may modify the magic offset.  If a copy-out
-	 * happens afterwards, it will have the correct data in the buffer.
+	 * Fire data frozen trigger if data already wasn't frozen.  Do this
+	 * before checking for escaping, as the trigger may modify the magic
+	 * offset.  If a copy-out happens afterwards, it will have the correct
+	 * data in the buffer.
 	 */
-	jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
-				   triggers);
+	if (!done_copy_out)
+		jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+					   jh_in->b_triggers);
 
 	/*
 	 * Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620a..b8e0806681b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
 		page = jh2bh(jh)->b_page;
 		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
 		source = kmap_atomic(page, KM_USER0);
+		/* Fire data frozen trigger just before we copy the data */
+		jbd2_buffer_frozen_trigger(jh, source + offset,
+					   jh->b_triggers);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 		kunmap_atomic(source, KM_USER0);
 
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
 	jh->b_triggers = type;
 }
 
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
 				struct jbd2_buffer_trigger_type *triggers)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
-	if (!triggers || !triggers->t_commit)
+	if (!triggers || !triggers->t_frozen)
 		return;
 
-	triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+	triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
 }
 
 void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 39113b5e79e..625de9d7088 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
 	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
 }
 
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
 				 struct buffer_head *bh,
 				 void *data, size_t size)
 {
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Quota blocks have their own trigger because the struct ocfs2_block_check
  * offset depends on the blocksize.
  */
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
 				 struct buffer_head *bh,
 				 void *data, size_t size)
 {
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Directory blocks also have their own trigger because the
  * struct ocfs2_block_check offset depends on the blocksize.
  */
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
 				 struct buffer_head *bh,
 				 void *data, size_t size)
 {
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 
 static struct ocfs2_triggers di_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
 
 static struct ocfs2_triggers eb_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
 
 static struct ocfs2_triggers rb_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
 
 static struct ocfs2_triggers gd_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
 
 static struct ocfs2_triggers db_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_db_commit_trigger,
+		.t_frozen = ocfs2_db_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 };
 
 static struct ocfs2_triggers xb_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
 
 static struct ocfs2_triggers dq_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_dq_commit_trigger,
+		.t_frozen = ocfs2_dq_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 };
 
 static struct ocfs2_triggers dr_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
 
 static struct ocfs2_triggers dl_triggers = {
 	.ot_triggers = {
-		.t_commit = ocfs2_commit_trigger,
+		.t_frozen = ocfs2_frozen_trigger,
 		.t_abort = ocfs2_abort_trigger,
 	},
 	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a4d2e9f7088..adf832dec3f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 struct jbd2_buffer_trigger_type {
 	/*
-	 * Fired just before a buffer is written to the journal.
-	 * mapped_data is a mapped buffer that is the frozen data for
-	 * commit.
+	 * Fired a the moment data to write to the journal are known to be
+	 * stable - so either at the moment b_frozen_data is created or just
+	 * before a buffer is written to the journal.  mapped_data is a mapped
+	 * buffer that is the frozen data for commit.
 	 */
-	void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+	void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
 			 struct buffer_head *bh, void *mapped_data,
 			 size_t size);
 
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
 			struct buffer_head *bh);
 };
 
-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
 				       void *mapped_data,
 				       struct jbd2_buffer_trigger_type *triggers);
 extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
-- 
cgit v1.2.3-70-g09d2


From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 15 Jul 2010 09:41:42 -0600
Subject: PCI: fall back to original BIOS BAR addresses

If we fail to assign resources to a PCI BAR, this patch makes us try the
original address from BIOS rather than leaving it disabled.

Linux tries to make sure all PCI device BARs are inside the upstream
PCI host bridge or P2P bridge apertures, reassigning BARs if necessary.
Windows does similar reassignment.

Before this patch, if we could not move a BAR into an aperture, we left
the resource unassigned, i.e., at address zero.  Windows leaves such BARs
at the original BIOS addresses, and this patch makes Linux do the same.

This is a bit ugly because we disable the resource long before we try to
reassign it, so we have to keep track of the BIOS BAR address somewhere.
For lack of a better place, I put it in the struct pci_dev.

I think it would be cleaner to attempt the assignment immediately when the
claim fails, so we could easily remember the original address.  But we
currently claim motherboard resources in the middle, after attempting to
claim PCI resources and before assigning new PCI resources, and changing
that is a fairly big job.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263

Reported-by: Andrew <nitr0@seti.kr.ua>
Tested-by: Andrew <nitr0@seti.kr.ua>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/i386.c     |  1 +
 drivers/pci/setup-res.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/pci.h     |  1 +
 3 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6fdb3ec30c3..55253095be8 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass)
 					idx, r, disabled, pass);
 				if (pci_claim_resource(dev, idx) < 0) {
 					/* We'll assign a new address later */
+					dev->fw_addr[idx] = r->start;
 					r->end -= r->start;
 					r->start = 0;
 				}
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 92379e2d37e..2aaa13150de 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -156,6 +156,38 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 					     pcibios_align_resource, dev);
 	}
 
+	if (ret < 0 && dev->fw_addr[resno]) {
+		struct resource *root, *conflict;
+		resource_size_t start, end;
+
+		/*
+		 * If we failed to assign anything, let's try the address
+		 * where firmware left it.  That at least has a chance of
+		 * working, which is better than just leaving it disabled.
+		 */
+
+		if (res->flags & IORESOURCE_IO)
+			root = &ioport_resource;
+		else
+			root = &iomem_resource;
+
+		start = res->start;
+		end = res->end;
+		res->start = dev->fw_addr[resno];
+		res->end = res->start + size - 1;
+		dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
+			 resno, res);
+		conflict = request_resource_conflict(root, res);
+		if (conflict) {
+			dev_info(&dev->dev,
+				 "BAR %d: %pR conflicts with %s %pR\n", resno,
+				 res, conflict->name, conflict);
+			res->start = start;
+			res->end = end;
+		} else
+			ret = 0;
+	}
+
 	if (!ret) {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cb00845f15..f26fda76b87 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -288,6 +288,7 @@ struct pci_dev {
 	 */
 	unsigned int	irq;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+	resource_size_t	fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
 
 	/* These fields are used by common fixups */
 	unsigned int	transparent:1;	/* Transparent PCI bridge */
-- 
cgit v1.2.3-70-g09d2


From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Jul 2010 14:56:17 +1000
Subject: mm: add context argument to shrinker callback

The current shrinker implementation requires the registered callback
to have global state to work from. This makes it difficult to shrink
caches that are not global (e.g. per-filesystem caches). Pass the shrinker
structure to the callback so that users can embed the shrinker structure
in the context the shrinker needs to operate on and get back to it in the
callback via container_of().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kvm/mmu.c              | 2 +-
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 fs/dcache.c                     | 2 +-
 fs/gfs2/glock.c                 | 2 +-
 fs/gfs2/quota.c                 | 2 +-
 fs/gfs2/quota.h                 | 2 +-
 fs/inode.c                      | 2 +-
 fs/mbcache.c                    | 5 +++--
 fs/nfs/dir.c                    | 2 +-
 fs/nfs/internal.h               | 3 ++-
 fs/quota/dquot.c                | 2 +-
 fs/ubifs/shrinker.c             | 2 +-
 fs/ubifs/ubifs.h                | 2 +-
 fs/xfs/linux-2.6/xfs_buf.c      | 5 +++--
 fs/xfs/linux-2.6/xfs_sync.c     | 1 +
 fs/xfs/quota/xfs_qm.c           | 7 +++++--
 include/linux/mm.h              | 2 +-
 mm/vmscan.c                     | 8 +++++---
 18 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3699613e883..b1ed0a1a591 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
 	return kvm_mmu_zap_page(kvm, page) + 1;
 }
 
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8757ecf6e96..e7018708cc3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4978,7 +4978,7 @@ i915_gpu_is_active(struct drm_device *dev)
 }
 
 static int
-i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	drm_i915_private_t *dev_priv, *next_dev;
 	struct drm_i915_gem_object *obj_priv, *next_obj;
diff --git a/fs/dcache.c b/fs/dcache.c
index c8c78ba0782..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dbab3fdc258..0898f3ec821 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
 
 
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	struct gfs2_glock *gl;
 	int may_demote;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b256d6f2428..8f02d3db8f4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	struct gfs2_quota_data *qd;
 	struct gfs2_sbd *sdp;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
 	return ret;
 }
 
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		/*
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 static struct shrinker mb_cache_shrinker = {
 	.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
  * This function is called by the kernel memory management when memory
  * gets low.
  *
+ * @shrink: (ignored)
  * @nr_to_scan: Number of objects to scan
  * @gfp_mask: (ignored)
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..e60416d3f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
 	}
 }
 
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(head);
 	struct nfs_inode *nfsi;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386..e70f44b9b3f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 
 /* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+					int nr_to_scan, gfp_t gfp_mask);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6..437d2ca2de9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		spin_lock(&dq_list_lock);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefc..0b201114a5a 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
 	return 0;
 }
 
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
 	int freed, contention = 0;
 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c..04310878f44 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 
 /* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 /* commit.c */
 int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef59..2ee3f7a6016 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
 
 static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 static struct shrinker xfs_buf_shake = {
 	.shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
 					__func__, gfp_mask);
 
 			XFS_STATS_INC(xb_page_retries);
-			xfsbufd_wakeup(0, gfp_mask);
+			xfsbufd_wakeup(NULL, 0, gfp_mask);
 			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
 
 STATIC int
 xfsbufd_wakeup(
+	struct shrinker		*shrink,
 	int			priority,
 	gfp_t			mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bcc..be375827af9 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -838,6 +838,7 @@ static struct rw_semaphore xfs_mount_list_lock;
 
 static int
 xfs_reclaim_inode_shrink(
+	struct shrinker	*shrink,
 	int		nr_to_scan,
 	gfp_t		gfp_mask)
 {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8c117ff2e3a..67c018392d6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(int, gfp_t);
+STATIC int	xfs_qm_shake(struct shrinker *, int, gfp_t);
 
 static struct shrinker xfs_qm_shaker = {
 	.shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
  */
 /* ARGSUSED */
 STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+	struct shrinker	*shrink,
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
 {
 	int	ndqused, nfree, n;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b969efb0378..a2b48041b91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a..199fa436c0d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 	list_for_each_entry(shrinker, &shrinker_list, list) {
 		unsigned long long delta;
 		unsigned long total_scan;
-		unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
+		unsigned long max_pass;
 
+		max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
 		delta = (4 * scanned) / shrinker->seeks;
 		delta *= max_pass;
 		do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			int shrink_ret;
 			int nr_before;
 
-			nr_before = (*shrinker->shrink)(0, gfp_mask);
-			shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
+			nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
+			shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
+								gfp_mask);
 			if (shrink_ret == -1)
 				break;
 			if (shrink_ret < nr_before)
-- 
cgit v1.2.3-70-g09d2


From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 15 Jul 2010 10:39:47 +0200
Subject: fb: handle allocation failure in alloc_apertures()

If the kzalloc() fails we should return NULL.  All the places that call
alloc_apertures() check for this already.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: James Simmons <jsimmons@infradead.org>
Acked-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/fb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 8e5a9dfb76b..e7445df44d6 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -873,6 +873,8 @@ struct fb_info {
 static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 	struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
 			+ max_num * sizeof(struct aperture), GFP_KERNEL);
+	if (!a)
+		return NULL;
 	a->count = max_num;
 	return a;
 }
-- 
cgit v1.2.3-70-g09d2


From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Jul 2010 13:24:34 -0700
Subject: vfs: fix RCU-lockdep false positive due to /proc

If a single-threaded process does a file-descriptor operation, and some
other process accesses that same file descriptor via /proc, the current
rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep
splat due to the reference count being increased by the /proc access after
the reference-count check in fget_light() but before the check in
rcu_dereference_check_fdtable().

This commit prevents this false positive by checking for a single-threaded
process.  To avoid #include hell, this commit uses the wrapper for
thread_group_empty(current) defined by rcu_my_thread_group_empty()
provided in a separate commit.

Located-by: Miles Lane <miles.lane@gmail.com>
Located-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fdtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc529e95..d147461bc27 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -61,7 +61,8 @@ struct files_struct {
 	(rcu_dereference_check((fdtfd), \
 			       rcu_read_lock_held() || \
 			       lockdep_is_held(&(files)->file_lock) || \
-			       atomic_read(&(files)->count) == 1))
+			       atomic_read(&(files)->count) == 1 || \
+			       rcu_my_thread_group_empty()))
 
 #define files_fdtable(files) \
 		(rcu_dereference_check_fdtable((files), (files)->fdt))
-- 
cgit v1.2.3-70-g09d2


From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001
From: Doug Goldstein <cardoe@gentoo.org>
Date: Tue, 20 Jul 2010 15:22:25 -0700
Subject: include/linux/vgaarb.h: add missing part of include guard

vgaarb.h was missing the #define of the #ifndef at the top for the guard
to prevent multiple #include's from causing re-define errors

Signed-off-by: Doug Goldstein <cardoe@gentoo.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index c9a97597699..814f294d4cd 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -29,6 +29,7 @@
  */
 
 #ifndef LINUX_VGA_H
+#define LINUX_VGA_H
 
 #include <asm/vga.h>
 
-- 
cgit v1.2.3-70-g09d2