182 files changed, 3097 insertions, 2031 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 4b0669cbb3b..2505740b81d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX
 config HAVE_OPROFILE
 	bool
 
+config OPROFILE_NMI_TIMER
+	def_bool y
+	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+
 config KPROBES
 	bool "Kprobes"
 	depends on MODULES
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 3d0c6fb74ae..e8e8fe505df 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -183,7 +183,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		leds_event(led_idle_start);
 		while (!need_resched()) {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -213,7 +214,8 @@ void cpu_idle(void)
 			}
 		}
 		leds_event(led_idle_end);
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8fc2c8fcbdc..c0b59bff6be 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -52,6 +52,7 @@
 #include <asm/mach/time.h>
 #include <asm/traps.h>
 #include <asm/unwind.h>
+#include <asm/memblock.h>
 
 #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
 #include "compat.h"
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index fbdd12ea3a5..7c38474e533 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -32,6 +32,7 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
+#include <asm/memblock.h>
 
 #include "mm.h"
 
@@ -332,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 
 	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
 
-	memblock_init();
 	for (i = 0; i < mi->nr_banks; i++)
 		memblock_add(mi->bank[i].start, mi->bank[i].size);
 
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 	if (mdesc->reserve)
 		mdesc->reserve();
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 }
 
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index ef5a2a08fcc..ea339575032 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -34,10 +34,12 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			cpu_idle_sleep();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 6a80a9e9fc4..8dd0416673c 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -88,10 +88,12 @@ void cpu_idle(void)
 #endif
 		if (!idle)
 			idle = default_idle;
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index bb978ede898..6773fc83a67 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -47,14 +47,12 @@ static struct clocksource cont_rotime = {
 	.rating = 300,
 	.read   = read_cont_rotime,
 	.mask   = CLOCKSOURCE_MASK(32),
-	.shift  = 10,
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static int __init etrax_init_cont_rotime(void)
 {
-	cont_rotime.mult = clocksource_khz2mult(100000, cont_rotime.shift);
-	clocksource_register(&cont_rotime);
+	clocksource_register_khz(&cont_rotime, 100000);
 	return 0;
 }
 arch_initcall(etrax_init_cont_rotime);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 27489b6dd53..3b7a7c48378 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -23,6 +23,9 @@ config IA64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
 	select IRQ_PER_CPU
@@ -474,9 +477,6 @@ config NODES_SHIFT
 	  MAX_NUMNODES will be 2^(This value).
 	  If in doubt, use the default.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 config VIRTUAL_MEM_MAP
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 5a274af31b2..3deac956d32 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -26,60 +26,53 @@
 #include <linux/jiffies.h>
 #include <asm/processor.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
-#define cputime_zero			((cputime_t)0)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
 
 /*
  * Convert cputime <-> jiffies (HZ)
  */
-#define cputime_to_jiffies(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime_to_jiffies(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	\
+	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	\
+	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
 /*
  * Convert cputime <-> microseconds
  */
-#define cputime_to_usecs(__ct)		((__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)	((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs)	usecs_to_cputime(__usecs)
+#define cputime_to_usecs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	\
+	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs)	\
+	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
 
 /*
  * Convert cputime <-> seconds
  */
-#define cputime_to_secs(__ct)		((__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)		((__secs) * NSEC_PER_SEC)
+#define cputime_to_secs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		\
+	(__force cputime_t)((__secs) * NSEC_PER_SEC)
 
 /*
  * Convert cputime <-> timespec (nsec)
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_nsec);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 {
-	val->tv_sec  = ct / NSEC_PER_SEC;
-	val->tv_nsec = ct % NSEC_PER_SEC;
+	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
 }
 
 /*
@@ -87,25 +80,28 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
 static inline cputime_t timeval_to_cputime(struct timeval *val)
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_usec * NSEC_PER_USEC);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 {
-	val->tv_sec = ct / NSEC_PER_SEC;
-	val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
 }
 
 /*
  * Convert cputime <-> clock (USER_HZ)
  */
-#define cputime_to_clock_t(__ct)	((__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)		((__x) * (NSEC_PER_SEC / USER_HZ))
+#define cputime_to_clock_t(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		\
+	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
 
 /*
  * Convert cputime64 to clock.
  */
-#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
+#define cputime64_to_clock_t(__ct)	\
+	cputime_to_clock_t((__force cputime_t)__ct)
 
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index f114a3b14c6..1516d1dc11f 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -16,6 +16,7 @@
  */
 #include <linux/bootmem.h>
 #include <linux/efi.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/swap.h>
@@ -348,7 +349,7 @@ paging_init (void)
 		printk("Virtual mem_map starts at 0x%p\n", mem_map);
 	}
 #else /* !CONFIG_VIRTUAL_MEM_MAP */
-	add_active_range(0, 0, max_low_pfn);
+	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
 	free_area_init_nodes(max_zone_pfns);
 #endif /* !CONFIG_VIRTUAL_MEM_MAP */
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 00cb0e26c64..13df239dbed 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -10,6 +10,7 @@
 #include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -557,8 +558,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
 #endif
 
 	if (start < end)
-		add_active_range(nid, __pa(start) >> PAGE_SHIFT,
-			__pa(end) >> PAGE_SHIFT);
+		memblock_add_node(__pa(start), end - start, nid);
 	return 0;
 }
 
diff --git a/arch/m68k/platform/68328/timers.c b/arch/m68k/platform/68328/timers.c
index 309f725995b..f2678866067 100644
--- a/arch/m68k/platform/68328/timers.c
+++ b/arch/m68k/platform/68328/timers.c
@@ -93,7 +93,6 @@ static struct clocksource m68328_clk = {
 	.name	= "timer",
 	.rating	= 250,
 	.read	= m68328_read_clk,
-	.shift	= 20,
 	.mask	= CLOCKSOURCE_MASK(32),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -115,8 +114,7 @@ void hw_timer_init(void)
 
 	/* Enable timer 1 */
 	TCTL |= TCTL_TEN;
-	m68328_clk.mult = clocksource_hz2mult(TICKS_PER_JIFFY*HZ, m68328_clk.shift);
-	clocksource_register(&m68328_clk);
+	clocksource_register_hz(&m68328_clk, TICKS_PER_JIFFY*HZ);
 }
 
 /***************************************************************************/
diff --git a/arch/m68k/platform/coldfire/dma_timer.c b/arch/m68k/platform/coldfire/dma_timer.c
index a5f562823d7..235ad57c470 100644
--- a/arch/m68k/platform/coldfire/dma_timer.c
+++ b/arch/m68k/platform/coldfire/dma_timer.c
@@ -44,7 +44,6 @@ static struct clocksource clocksource_cf_dt = {
 	.rating		= 200,
 	.read		= cf_dt_get_cycles,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 20,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -60,9 +59,7 @@ static int __init  init_cf_dt_clocksource(void)
 	__raw_writeb(0x00, DTER0);
 	__raw_writel(0x00000000, DTRR0);
 	__raw_writew(DMA_DTMR_CLK_DIV_16 | DMA_DTMR_ENABLE, DTMR0);
-	clocksource_cf_dt.mult = clocksource_hz2mult(DMA_FREQ,
-						     clocksource_cf_dt.shift);
-	return clocksource_register(&clocksource_cf_dt);
+	return clocksource_register_hz(&clocksource_cf_dt, DMA_FREQ);
 }
 
 arch_initcall(init_cf_dt_clocksource);
diff --git a/arch/m68k/platform/coldfire/pit.c b/arch/m68k/platform/coldfire/pit.c
index c2b980926be..02663d25822 100644
--- a/arch/m68k/platform/coldfire/pit.c
+++ b/arch/m68k/platform/coldfire/pit.c
@@ -144,7 +144,6 @@ static struct clocksource pit_clk = {
 	.name	= "pit",
 	.rating	= 100,
 	.read	= pit_read_clk,
-	.shift	= 20,
 	.mask	= CLOCKSOURCE_MASK(32),
 };
 
@@ -162,8 +161,7 @@ void hw_timer_init(void)
 
 	setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &pit_irq);
 
-	pit_clk.mult = clocksource_hz2mult(FREQ, pit_clk.shift);
-	clocksource_register(&pit_clk);
+	clocksource_register_hz(&pit_clk, FREQ);
 }
 
 /***************************************************************************/
diff --git a/arch/m68k/platform/coldfire/sltimers.c b/arch/m68k/platform/coldfire/sltimers.c
index 6a85daf9a7f..b7f822b552b 100644
--- a/arch/m68k/platform/coldfire/sltimers.c
+++ b/arch/m68k/platform/coldfire/sltimers.c
@@ -114,7 +114,6 @@ static struct clocksource mcfslt_clk = {
 	.name	= "slt",
 	.rating	= 250,
 	.read	= mcfslt_read_clk,
-	.shift	= 20,
 	.mask	= CLOCKSOURCE_MASK(32),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -136,8 +135,7 @@ void hw_timer_init(void)
 
 	setup_irq(MCF_IRQ_TIMER, &mcfslt_timer_irq);
 
-	mcfslt_clk.mult = clocksource_hz2mult(MCF_BUSCLK, mcfslt_clk.shift);
-	clocksource_register(&mcfslt_clk);
+	clocksource_register_hz(&mcfslt_clk, MCF_BUSCLK);
 
 #ifdef CONFIG_HIGHPROFILE
 	mcfslt_profile_init();
diff --git a/arch/m68k/platform/coldfire/timers.c b/arch/m68k/platform/coldfire/timers.c
index 60242f65fea..0d90da32fcd 100644
--- a/arch/m68k/platform/coldfire/timers.c
+++ b/arch/m68k/platform/coldfire/timers.c
@@ -88,7 +88,6 @@ static struct clocksource mcftmr_clk = {
 	.name	= "tmr",
 	.rating	= 250,
 	.read	= mcftmr_read_clk,
-	.shift	= 20,
 	.mask	= CLOCKSOURCE_MASK(32),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -109,8 +108,7 @@ void hw_timer_init(void)
 	__raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 |
 		MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, TA(MCFTIMER_TMR));
 
-	mcftmr_clk.mult = clocksource_hz2mult(FREQ, mcftmr_clk.shift);
-	clocksource_register(&mcftmr_clk);
+	clocksource_register_hz(&mcftmr_clk, FREQ);
 
 	setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq);
 
diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
deleted file mode 100644
index 20a8e257c77..00000000000
--- a/arch/microblaze/include/asm/memblock.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
-#define _ASM_MICROBLAZE_MEMBLOCK_H
-
-#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
-
-
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 95cc295976a..7dcb5bfffb7 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -103,10 +103,12 @@ void cpu_idle(void)
 		if (!idle)
 			idle = default_idle;
 
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			idle();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 		schedule();
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 977484add21..80d314e8190 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -122,7 +122,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
@@ -130,7 +129,7 @@ void __init early_init_devtree(void *params)
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 	parse_early_param();
 
-	memblock_analyze();
+	memblock_allow_resize();
 
 	pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d46f1da18a3..9c652eb68aa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -25,6 +25,9 @@ config MIPS
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_JUMP_LABEL
 	select IRQ_FORCED_THREADING
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select ARCH_DISCARD_MEMBLOCK
 
 menu "Machine selection"
 
@@ -2064,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE
 	  or have huge holes in the physical address space for other reasons.
 	  See <file:Documentation/vm/numa> for more.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SPARSEMEM_ENABLE
 	bool
 	select SPARSEMEM_STATIC
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c47f96e453c..7955409051c 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -56,7 +56,8 @@ void __noreturn cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched() && cpu_online(cpu)) {
 #ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
@@ -77,7 +78,8 @@ void __noreturn cpu_idle(void)
 		     system_state == SYSTEM_BOOTING))
 			play_dead();
 #endif
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 84af26ab221..b1cb8f87d7b 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -14,6 +14,7 @@
 #include <linux/ioport.h>
 #include <linux/export.h>
 #include <linux/screen_info.h>
+#include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
@@ -352,7 +353,7 @@ static void __init bootmem_init(void)
 			continue;
 #endif
 
-		add_active_range(0, start, end);
+		memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
 	}
 
 	/*
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index bc1297109cc..b105eca3c02 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -12,6 +12,7 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -381,8 +382,8 @@ static void __init szmem(void)
 				continue;
 			}
 			num_physpages += slot_psize;
-			add_active_range(node, slot_getbasepfn(node, slot),
-					 slot_getbasepfn(node, slot) + slot_psize);
+			memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
+					  PFN_PHYS(slot_psize), node);
 		}
 	}
 }
diff --git a/arch/openrisc/include/asm/memblock.h b/arch/openrisc/include/asm/memblock.h
deleted file mode 100644
index bbe5a1c788c..00000000000
--- a/arch/openrisc/include/asm/memblock.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * OpenRISC Linux
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * OpenRISC implementation:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- * et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __ASM_OPENRISC_MEMBLOCK_H
-#define __ASM_OPENRISC_MEMBLOCK_H
-
-/* empty */
-
-#endif /* __ASM_OPENRISC_MEMBLOCK_H */
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index d5bc5f813e8..e5fc7887783 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -51,7 +51,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -69,7 +70,8 @@ void cpu_idle(void)
 			set_thread_flag(TIF_POLLING_NRFLAG);
 		}
 
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 1bb58ba89af..3d4478f6c94 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -76,14 +76,13 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 
-	memblock_analyze();
+	memblock_allow_resize();
 
 	/* We must copy the flattend device tree from init memory to regular
 	 * memory because the device tree references the strings in it
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 45b7389d77a..7c0774397b8 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -198,8 +198,6 @@ static struct clocksource clocksource_cr16 = {
 	.rating			= 300,
 	.read			= read_cr16,
 	.mask			= CLOCKSOURCE_MASK(BITS_PER_LONG),
-	.mult			= 0, /* to be set */
-	.shift			= 22,
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -270,7 +268,5 @@ void __init time_init(void)
 
 	/* register at clocksource framework */
 	current_cr16_khz = PAGE0->mem_10msec/10;  /* kHz */
-	clocksource_cr16.mult = clocksource_khz2mult(current_cr16_khz,
-						clocksource_cr16.shift);
-	clocksource_register(&clocksource_cr16);
+	clocksource_register_khz(&clocksource_cr16, current_cr16_khz);
 }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951e18f5335..ead0bc68439 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -117,6 +117,7 @@ config PPC
 	select HAVE_KRETPROBES
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select USE_GENERIC_SMP_HELPERS if SMP
@@ -421,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 	depends on (SMP && PPC_PSERIES) || PPC_PS3
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config SYS_SUPPORTS_HUGETLBFS
 	bool
 
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 98b7c4b49c9..6ec1c380a4d 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -29,25 +29,8 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/time.h>
 #include <asm/param.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
-
-#define cputime_zero			((cputime_t)0)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
 #ifdef __KERNEL__
 
@@ -65,7 +48,7 @@ DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 static inline unsigned long cputime_to_jiffies(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /* Estimate the scaled cputime by scaling the real cputime based on
@@ -74,14 +57,15 @@ static inline cputime_t cputime_to_scaled(const cputime_t ct)
 {
 	if (cpu_has_feature(CPU_FTR_SPURR) &&
 	    __get_cpu_var(cputime_last_delta))
-		return ct * __get_cpu_var(cputime_scaled_last_delta) /
-			    __get_cpu_var(cputime_last_delta);
+		return (__force u64) ct *
+			__get_cpu_var(cputime_scaled_last_delta) /
+			__get_cpu_var(cputime_last_delta);
 	return ct;
 }
 
 static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -93,7 +77,7 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	}
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 
 static inline void setup_cputime_one_jiffy(void)
@@ -103,7 +87,7 @@ static inline void setup_cputime_one_jiffy(void)
 
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
-	cputime_t ct;
+	u64 ct;
 	u64 sec;
 
 	/* have to be a little careful about overflow */
@@ -114,13 +98,13 @@ static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 		do_div(ct, HZ);
 	}
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime64_t) ct;
 }
 
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /*
@@ -130,12 +114,12 @@ extern u64 __cputime_msec_factor;
 
 static inline unsigned long cputime_to_usecs(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
+	return mulhdu((__force u64) ct, __cputime_msec_factor) * USEC_PER_MSEC;
 }
 
 static inline cputime_t usecs_to_cputime(const unsigned long us)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -147,7 +131,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
 	}
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 
 #define usecs_to_cputime64(us)		usecs_to_cputime(us)
@@ -159,12 +143,12 @@ extern u64 __cputime_sec_factor;
 
 static inline unsigned long cputime_to_secs(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_sec_factor);
+	return mulhdu((__force u64) ct, __cputime_sec_factor);
 }
 
 static inline cputime_t secs_to_cputime(const unsigned long sec)
 {
-	return (cputime_t) sec * tb_ticks_per_sec;
+	return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
 }
 
 /*
@@ -172,7 +156,7 @@ static inline cputime_t secs_to_cputime(const unsigned long sec)
  */
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 
 	frac = do_div(x, tb_ticks_per_sec);
@@ -184,11 +168,11 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 
 static inline cputime_t timespec_to_cputime(const struct timespec *p)
 {
-	cputime_t ct;
+	u64 ct;
 
 	ct = (u64) p->tv_nsec * tb_ticks_per_sec;
 	do_div(ct, 1000000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -196,7 +180,7 @@ static inline cputime_t timespec_to_cputime(const struct timespec *p)
  */
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 
 	frac = do_div(x, tb_ticks_per_sec);
@@ -208,11 +192,11 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 
 static inline cputime_t timeval_to_cputime(const struct timeval *p)
 {
-	cputime_t ct;
+	u64 ct;
 
 	ct = (u64) p->tv_usec * tb_ticks_per_sec;
 	do_div(ct, 1000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -222,12 +206,12 @@ extern u64 __cputime_clockt_factor;
 
 static inline unsigned long cputime_to_clock_t(const cputime_t ct)
 {
-	return mulhdu(ct, __cputime_clockt_factor);
+	return mulhdu((__force u64) ct, __cputime_clockt_factor);
 }
 
 static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 
 	/* have to be a little careful about overflow */
@@ -238,8 +222,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 		do_div(ct, USER_HZ);
 	}
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime_t) ct;
 }
 
 #define cputime64_to_clock_t(ct)	cputime_to_clock_t((cputime_t)(ct))
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
deleted file mode 100644
index 43efc345065..00000000000
--- a/arch/powerpc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_POWERPC_MEMBLOCK_H
-#define _ASM_POWERPC_MEMBLOCK_H
-
-#include <asm/udbg.h>
-
-#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
-
-#endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad5..9c3cd490b1b 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -46,6 +46,12 @@ static int __init powersave_off(char *arg)
 }
 __setup("powersave=off", powersave_off);
 
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS)
+static const bool idle_uses_rcu = 1;
+#else
+static const bool idle_uses_rcu;
+#endif
+
 /*
  * The body of the idle task.
  */
@@ -56,7 +62,10 @@ void cpu_idle(void)
 
 	set_thread_flag(TIF_POLLING_NRFLAG);
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		if (!idle_uses_rcu)
+			rcu_idle_enter();
+
 		while (!need_resched() && !cpu_should_die()) {
 			ppc64_runlatch_off();
 
@@ -93,7 +102,9 @@ void cpu_idle(void)
 
 		HMT_medium();
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		if (!idle_uses_rcu)
+			rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		if (cpu_should_die())
 			cpu_die();
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb5..a2158a395d9 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of memblock_phys_mem_size() */
-	memblock_analyze();
-
 	/* use common parsing */
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503..abe405dab34 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
 	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
-	memblock_init();
-
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
@@ -756,20 +754,14 @@ void __init early_init_devtree(void *params)
 	early_reserve_mem();
 	phyp_dump_reserve_mem();
 
-	limit = memory_limit;
-	if (! limit) {
-		phys_addr_t memsize;
-
-		/* Ensure that total memory size is page-aligned, because
-		 * otherwise mark_bootmem() gets upset. */
-		memblock_analyze();
-		memsize = memblock_phys_mem_size();
-		if ((memsize & PAGE_MASK) != memsize)
-			limit = memsize & PAGE_MASK;
-	}
+	/*
+	 * Ensure that total memory size is page-aligned, because otherwise
+	 * mark_bootmem() gets upset.
+	 */
+	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
 	memblock_enforce_memory_limit(limit);
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 161cefde5c1..58861fa1220 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -134,8 +134,7 @@ void __init MMU_init(void)
 
 	if (memblock.memory.cnt > 1) {
 #ifndef CONFIG_WII
-		memblock.memory.cnt = 1;
-		memblock_analyze();
+		memblock_enforce_memory_limit(memblock.memory.regions[0].size);
 		printk(KERN_WARNING "Only using first contiguous memory region");
 #else
 		wii_memory_fixups();
@@ -158,7 +157,6 @@ void __init MMU_init(void)
 #ifndef CONFIG_HIGHMEM
 		total_memory = total_lowmem;
 		memblock_enforce_memory_limit(total_lowmem);
-		memblock_analyze();
 #endif /* CONFIG_HIGHMEM */
 	}
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2dd6bdd31fe..8e2eb6611b0 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -199,7 +199,7 @@ void __init do_init_bootmem(void)
 		unsigned long start_pfn, end_pfn;
 		start_pfn = memblock_region_memory_base_pfn(reg);
 		end_pfn = memblock_region_memory_end_pfn(reg);
-		add_active_range(0, start_pfn, end_pfn);
+		memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
 	}
 
 	/* Add all physical memory to the bootmem map, mark each area
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b22a83a91cb..e6eea0ac80c 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
 }
 
 /*
- * get_active_region_work_fn - A helper function for get_node_active_region
- *	Returns datax set to the start_pfn and end_pfn if they contain
- *	the initial value of datax->start_pfn between them
- * @start_pfn: start page(inclusive) of region to check
- * @end_pfn: end page(exclusive) of region to check
- * @datax: comes in with ->start_pfn set to value to search for and
- *	goes out with active range if it contains it
- * Returns 1 if search value is in range else 0
- */
-static int __init get_active_region_work_fn(unsigned long start_pfn,
-					unsigned long end_pfn, void *datax)
-{
-	struct node_active_region *data;
-	data = (struct node_active_region *)datax;
-
-	if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
-		data->start_pfn = start_pfn;
-		data->end_pfn = end_pfn;
-		return 1;
-	}
-	return 0;
-
-}
-
-/*
- * get_node_active_region - Return active region containing start_pfn
+ * get_node_active_region - Return active region containing pfn
  * Active range returned is empty if none found.
- * @start_pfn: The page to return the region for.
- * @node_ar: Returned set to the active region containing start_pfn
+ * @pfn: The page to return the region for
+ * @node_ar: Returned set to the active region containing @pfn
  */
-static void __init get_node_active_region(unsigned long start_pfn,
-		       struct node_active_region *node_ar)
+static void __init get_node_active_region(unsigned long pfn,
+					  struct node_active_region *node_ar)
 {
-	int nid = early_pfn_to_nid(start_pfn);
+	unsigned long start_pfn, end_pfn;
+	int i, nid;
 
-	node_ar->nid = nid;
-	node_ar->start_pfn = start_pfn;
-	node_ar->end_pfn = start_pfn;
-	work_with_active_regions(nid, get_active_region_work_fn, node_ar);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+		if (pfn >= start_pfn && pfn < end_pfn) {
+			node_ar->nid = nid;
+			node_ar->start_pfn = start_pfn;
+			node_ar->end_pfn = end_pfn;
+			break;
+		}
+	}
 }
 
 static void map_cpu_to_node(int cpu, int node)
@@ -710,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
 			node_set_online(nid);
 			sz = numa_enforce_memory_limit(base, size);
 			if (sz)
-				add_active_range(nid, base >> PAGE_SHIFT,
-						 (base >> PAGE_SHIFT)
-						 + (sz >> PAGE_SHIFT));
+				memblock_set_node(base, sz, nid);
 		} while (--ranges);
 	}
 }
@@ -802,8 +780,7 @@ new_range:
 				continue;
 		}
 
-		add_active_range(nid, start >> PAGE_SHIFT,
-				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
+		memblock_set_node(start, size, nid);
 
 		if (--ranges)
 			goto new_range;
@@ -839,7 +816,8 @@ static void __init setup_nonnuma(void)
 		end_pfn = memblock_region_memory_end_pfn(reg);
 
 		fake_numa_create_new_node(end_pfn, &nid);
-		add_active_range(nid, start_pfn, end_pfn);
+		memblock_set_node(PFN_PHYS(start_pfn),
+				  PFN_PHYS(end_pfn - start_pfn), nid);
 		node_set_online(nid);
 	}
 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 4e13d6f9023..573ba3b69d1 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu)
 
 		/* limit memory so we dont have linear faults */
 		memblock_enforce_memory_limit(linear_map_top);
-		memblock_analyze();
 
 		patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
 		patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 1b5dc1a2e14..6d8dadf19f0 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -79,24 +79,19 @@ void __init wii_memory_fixups(void)
 	BUG_ON(memblock.memory.cnt != 2);
 	BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
 
-	p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE);
-	p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE);
+	/* trim unaligned tail */
+	memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
+			(phys_addr_t)ULLONG_MAX);
 
-	wii_hole_start = p[0].base + p[0].size;
+	/* determine hole, add & reserve them */
+	wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
 	wii_hole_size = p[1].base - wii_hole_start;
-
-	pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size);
-	pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size);
-	pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size);
-
-	p[0].size += wii_hole_size + p[1].size;
-
-	memblock.memory.cnt = 1;
-	memblock_analyze();
-
-	/* reserve the hole */
+	memblock_add(wii_hole_start, wii_hole_size);
 	memblock_reserve(wii_hole_start, wii_hole_size);
 
+	BUG_ON(memblock.memory.cnt != 1);
+	__memblock_dump_all();
+
 	/* allow ioremapping the address space in the hole */
 	__allow_ioremap_reserved = 1;
 }
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index ea0acbd8966..8fc62586a97 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -563,7 +563,8 @@ static void yield_shared_processor(void)
 static void iseries_shared_idle(void)
 {
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched() && !hvlpevent_is_pending()) {
 			local_irq_disable();
 			ppc64_runlatch_off();
@@ -577,7 +578,8 @@ static void iseries_shared_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		if (hvlpevent_is_pending())
 			process_iSeries_events();
@@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		if (!need_resched()) {
 			while (!need_resched()) {
 				ppc64_runlatch_off();
@@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void)
 		}
 
 		ppc64_runlatch_on();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 72714ad2784..8bd6ba54269 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void)
 	}
 
 	memblock_add(start_addr, map.r1.size);
-	memblock_analyze();
 
 	result = online_pages(start_pfn, nr_pages);
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 27a49508b41..52d429be6c7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -555,6 +555,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 
 	(*depth)++;
 	trace_hcall_entry(opcode, args);
+	if (opcode == H_CEDE)
+		rcu_idle_enter();
 	(*depth)--;
 
 out:
@@ -575,6 +577,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
 		goto out;
 
 	(*depth)++;
+	if (opcode == H_CEDE)
+		rcu_idle_exit();
 	trace_hcall_exit(opcode, retval, retbuf);
 	(*depth)--;
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373679b3744..d48ede33443 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -92,6 +92,9 @@ config S390
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select HAVE_RCU_TABLE_FREE if SMP
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
@@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK
 
 	  Say N if you are unsure.
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 comment "Kernel preemption"
 
 source "kernel/Kconfig.preempt"
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 92f1cb745d6..4de031d6b76 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
 	j = 0;
 	for_each_online_cpu(i) {
 		os_data->os_cpu[j].per_cpu_user =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
 		os_data->os_cpu[j].per_cpu_nice =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
 		os_data->os_cpu[j].per_cpu_system =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
 		os_data->os_cpu[j].per_cpu_idle =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
 		os_data->os_cpu[j].per_cpu_irq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
 		os_data->os_cpu[j].per_cpu_softirq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
 		os_data->os_cpu[j].per_cpu_iowait =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
 		os_data->os_cpu[j].per_cpu_steal =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
 		os_data->os_cpu[j].cpu_id = i;
 		j++;
 	}
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index b9acaaa175d..c23c3900c30 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -16,75 +16,60 @@
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long cputime_t;
-typedef unsigned long long cputime64_t;
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
 
-#ifndef __s390x__
-
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline unsigned long __div(unsigned long long n, unsigned long base)
 {
+#ifndef __s390x__
 	register_pair rp;
 
 	rp.pair = n >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
 	return rp.subreg.odd;
+#else /* __s390x__ */
+	return n / base;
+#endif /* __s390x__ */
 }
 
-#else /* __s390x__ */
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
 {
-	return n / base;
+	return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
 }
 
-#endif /* __s390x__ */
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+	return (__force cputime_t)(jif * (4096000000ULL / HZ));
+}
 
-#define cputime_zero			(0ULL)
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n) ({		\
-	unsigned long long __div = (__a);	\
-	do_div(__div,__n);			\
-	__div;					\
-})
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__div((__ct), 4096000000ULL / HZ))
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	((cputime_t)(__hz) * (4096000000ULL / HZ))
-
-#define cputime64_zero			(0ULL)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
-
-static inline u64
-cputime64_to_jiffies64(cputime64_t cputime)
-{
-	do_div(cputime, 4096000000ULL / HZ);
-	return cputime;
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+	unsigned long long jif = (__force unsigned long long) cputime;
+	do_div(jif, 4096000000ULL / HZ);
+	return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+	return (__force cputime64_t)(jif * (4096000000ULL / HZ));
 }
 
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int
-cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
 {
-	return cputime_div(cputime, 4096);
+	return (__force unsigned long long) cputime >> 12;
 }
 
-static inline cputime_t
-usecs_to_cputime(const unsigned int m)
+static inline cputime_t usecs_to_cputime(const unsigned int m)
 {
-	return (cputime_t) m * 4096;
+	return (__force cputime_t)(m * 4096ULL);
 }
 
 #define usecs_to_cputime64(m)		usecs_to_cputime(m)
@@ -92,40 +77,39 @@ usecs_to_cputime(const unsigned int m)
 /*
  * Convert cputime to milliseconds and back.
  */
-static inline unsigned int
-cputime_to_secs(const cputime_t cputime)
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
 {
-	return __div(cputime, 2048000000) >> 1;
+	return __div((__force unsigned long long) cputime, 2048000000) >> 1;
 }
 
-static inline cputime_t
-secs_to_cputime(const unsigned int s)
+static inline cputime_t secs_to_cputime(const unsigned int s)
 {
-	return (cputime_t) s * 4096000000ULL;
+	return (__force cputime_t)(s * 4096000000ULL);
 }
 
 /*
  * Convert cputime to timespec and back.
  */
-static inline cputime_t
-timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
-	return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
 }
 
-static inline void
-cputime_to_timespec(const cputime_t cputime, struct timespec *value)
+static inline void cputime_to_timespec(const cputime_t cputime,
+				       struct timespec *value)
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 	register_pair rp;
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_nsec = rp.subreg.even * 1000 / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
@@ -134,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
  * Since cputime and timeval have the same resolution (microseconds)
  * this is easy.
  */
-static inline cputime_t
-timeval_to_cputime(const struct timeval *value)
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
 {
-	return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
 }
 
-static inline void
-cputime_to_timeval(const cputime_t cputime, struct timeval *value)
+static inline void cputime_to_timeval(const cputime_t cputime,
+				      struct timeval *value)
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 	register_pair rp;
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_usec = rp.subreg.even / 4096;
 	value->tv_sec = rp.subreg.odd;
 #else
-	value->tv_usec = (cputime % 4096000000ULL) / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
 /*
  * Convert cputime to clock and back.
  */
-static inline clock_t
-cputime_to_clock_t(cputime_t cputime)
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
 {
-	return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 
-static inline cputime_t
-clock_t_to_cputime(unsigned long x)
+static inline cputime_t clock_t_to_cputime(unsigned long x)
 {
-	return (cputime_t) x * (4096000000ULL / USER_HZ);
+	return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
 }
 
 /*
  * Convert cputime64 to clock.
  */
-static inline clock_t
-cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 {
-       return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 
 struct s390_idle_data {
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9451b210a1b..3201ae44799 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,12 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched())
 			default_idle();
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e54c4ff8aba..f11d1b037c5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -820,7 +821,8 @@ setup_memory(void)
 		end_chunk = min(end_chunk, end_pfn);
 		if (start_chunk >= end_chunk)
 			continue;
-		add_active_range(0, start_chunk, end_chunk);
+		memblock_add_node(PFN_PHYS(start_chunk),
+				  PFN_PHYS(end_chunk - start_chunk), 0);
 		pfn = max(start_chunk, start_pfn);
 		for (; pfn < end_chunk; pfn++)
 			page_set_storage_key(PFN_PHYS(pfn),
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index f43c0e4282a..9daee91e6c3 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -22,6 +22,7 @@
 #include <asm/irq.h>
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		if (sample_data_ptr->P == 1) {
 			/* userspace sample */
 			unsigned int pid = sample_data_ptr->prim_asn;
+			if (!counter_config.user)
+				goto skip_sample;
 			rcu_read_lock();
 			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
 			if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 			rcu_read_unlock();
 		} else {
 			/* kernelspace sample */
+			if (!counter_config.kernel)
+				goto skip_sample;
 			regs = task_pt_regs(current);
 		}
 
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
 		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
 				!sample_data_ptr->P, tsk);
 		mutex_unlock(&hws_sem);
-
+	skip_sample:
 		sample_data_ptr++;
 	}
 }
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index bd58b72454c..2297be406c6 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -2,10 +2,11 @@
  * arch/s390/oprofile/init.c
  *
  * S390 Version
- *   Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
  *
  * @remark Copyright 2002-2011 OProfile authors
  */
@@ -14,6 +15,8 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
 
 #include "../../../drivers/oprofile/oprof.h"
 
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 #ifdef CONFIG_64BIT
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define DEFAULT_INTERVAL	4127518
 
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
 
-static int hwsampler_file;
+static int hwsampler_enabled;
 static int hwsampler_running;	/* start_mutex must be held to change */
+static int hwsampler_available;
 
 static struct oprofile_operations timer_ops;
 
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+	reserved = 0,		/* do not force */
+	timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+	if (!strcmp(str, "timer")) {
+		force_cpu_type = timer;
+		printk(KERN_INFO "oprofile: forcing timer to be returned "
+		                 "as cpu type\n");
+	} else {
+		force_cpu_type = 0;
+	}
+
+	return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+		           "(report cpu_type \"timer\"");
+
 static int oprofile_hwsampler_start(void)
 {
 	int retval;
 
-	hwsampler_running = hwsampler_file;
+	hwsampler_running = hwsampler_enabled;
 
 	if (!hwsampler_running)
 		return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
 	return;
 }
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
+ */
+
 static ssize_t hwsampler_read(struct file *file, char __user *buf,
 		size_t count, loff_t *offset)
 {
-	return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
 }
 
 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
 	if (retval <= 0)
 		return retval;
 
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
 	if (oprofile_started)
 		/*
 		 * save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
 		 */
 		return -EBUSY;
 
-	hwsampler_file = val;
+	hwsampler_enabled = val;
 
 	return count;
 }
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
 	.write		= hwsampler_write,
 };
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+				size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+					count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+				 size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+	if (val < oprofile_min_interval)
+		oprofile_hw_interval = oprofile_min_interval;
+	else if (val > oprofile_max_interval)
+		oprofile_hw_interval = oprofile_max_interval;
+	else
+		oprofile_hw_interval = val;
+
+	return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+	.read		= hw_interval_read,
+	.write		= hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+				    size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+				     size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+	if (val != 0)
+		return -EINVAL;
+	return count;
+}
+
+static const struct file_operations zero_fops = {
+	.read		= hwsampler_zero_read,
+	.write		= hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops.  */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+				     size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(counter_config.kernel,
+					buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+				      size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	counter_config.kernel = val;
+
+	return count;
+}
+
+static const struct file_operations kernel_fops = {
+	.read		= hwsampler_kernel_read,
+	.write		= hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+				   size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(counter_config.user,
+					buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+				    size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	counter_config.user = val;
+
+	return count;
+}
+
+static const struct file_operations user_fops = {
+	.read		= hwsampler_user_read,
+	.write		= hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+				   size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	/* Timer cannot be disabled without having hardware sampling.  */
+	if (val == 0 && !hwsampler_available)
+		return -EINVAL;
+
+	if (oprofile_started)
+		/*
+		 * save to do without locking as we set
+		 * hwsampler_running in start() when start_mutex is
+		 * held
+		 */
+		return -EBUSY;
+
+	hwsampler_enabled = !val;
+
+	return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+	.read		= timer_enabled_read,
+	.write		= timer_enabled_write,
+};
+
+
 static int oprofile_create_hwsampling_files(struct super_block *sb,
-						struct dentry *root)
+					    struct dentry *root)
 {
-	struct dentry *hw_dir;
+	struct dentry *dir;
+
+	dir = oprofilefs_mkdir(sb, root, "timer");
+	if (!dir)
+		return -EINVAL;
+
+	oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+
+	if (!hwsampler_available)
+		return 0;
 
 	/* reinitialize default values */
-	hwsampler_file = 1;
+	hwsampler_enabled = 1;
+	counter_config.kernel = 1;
+	counter_config.user = 1;
 
-	hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
-	if (!hw_dir)
-		return -EINVAL;
+	if (!force_cpu_type) {
+		/*
+		 * Create the counter file system.  A single virtual
+		 * counter is created which can be used to
+		 * enable/disable hardware sampling dynamically from
+		 * user space.  The user space will configure a single
+		 * counter with a single event.  The value of 'event'
+		 * and 'unit_mask' are not evaluated by the kernel code
+		 * and can only be set to 0.
+		 */
+
+		dir = oprofilefs_mkdir(sb, root, "0");
+		if (!dir)
+			return -EINVAL;
 
-	oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
-	oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
-				&oprofile_hw_interval);
-	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
-				&oprofile_min_interval);
-	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
-				&oprofile_max_interval);
-	oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
-				&oprofile_sdbt_blocks);
+		oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
+		oprofilefs_create_file(sb, dir, "event", &zero_fops);
+		oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
+		oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
+		oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
+		oprofilefs_create_file(sb, dir, "user", &user_fops);
+		oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+					&oprofile_sdbt_blocks);
 
+	} else {
+		/*
+		 * Hardware sampling can be used but the cpu_type is
+		 * forced to timer in order to deal with legacy user
+		 * space tools.  The /dev/oprofile/hwsampling fs is
+		 * provided in that case.
+		 */
+		dir = oprofilefs_mkdir(sb, root, "hwsampling");
+		if (!dir)
+			return -EINVAL;
+
+		oprofilefs_create_file(sb, dir, "hwsampler",
+				       &hwsampler_fops);
+		oprofilefs_create_file(sb, dir, "hw_interval",
+				       &hw_interval_fops);
+		oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+					   &oprofile_min_interval);
+		oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+					   &oprofile_max_interval);
+		oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+					&oprofile_sdbt_blocks);
+	}
 	return 0;
 }
 
 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 {
+	/*
+	 * Initialize the timer mode infrastructure as well in order
+	 * to be able to switch back dynamically.  oprofile_timer_init
+	 * is not supposed to fail.
+	 */
+	if (oprofile_timer_init(ops))
+		BUG();
+
+	memcpy(&timer_ops, ops, sizeof(timer_ops));
+	ops->create_files = oprofile_create_hwsampling_files;
+
+	/*
+	 * If the user space tools do not support newer cpu types,
+	 * the force_cpu_type module parameter
+	 * can be used to always return \"timer\" as cpu type.
+	 */
+	if (force_cpu_type != timer) {
+		struct cpuid id;
+
+		get_cpu_id (&id);
+
+		switch (id.machine) {
+		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+		default: return -ENODEV;
+		}
+	}
+
 	if (hwsampler_setup())
 		return -ENODEV;
 
 	/*
-	 * create hwsampler files only if hwsampler_setup() succeeds.
+	 * Query the range for the sampling interval from the
+	 * hardware.
 	 */
 	oprofile_min_interval = hwsampler_query_min_interval();
 	if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 	if (oprofile_hw_interval > oprofile_max_interval)
 		oprofile_hw_interval = oprofile_max_interval;
 
-	if (oprofile_timer_init(ops))
-		return -ENODEV;
-
-	printk(KERN_INFO "oprofile: using hardware sampling\n");
-
-	memcpy(&timer_ops, ops, sizeof(timer_ops));
+	printk(KERN_INFO "oprofile: System z hardware sampling "
+	       "facility found.\n");
 
 	ops->start = oprofile_hwsampler_start;
 	ops->stop = oprofile_hwsampler_stop;
-	ops->create_files = oprofile_create_hwsampling_files;
 
 	return 0;
 }
 
 static void oprofile_hwsampler_exit(void)
 {
-	oprofile_timer_exit();
 	hwsampler_shutdown();
 }
 
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	ops->backtrace = s390_backtrace;
 
 #ifdef CONFIG_64BIT
-	return oprofile_hwsampler_init(ops);
+
+	/*
+	 * -ENODEV is not reported to the caller.  The module itself
+         * will use the timer mode sampling as fallback and this is
+         * always available.
+	 */
+	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+	return 0;
 #else
 	return -ENODEV;
 #endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 00000000000..1a8d3ca0901
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,23 @@
+/**
+ * arch/s390/oprofile/op_counter.h
+ *
+ *   Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+	/* `enabled' maps to the hwsampler_file variable.  */
+	/* `count' maps to the oprofile_hw_interval variable.  */
+	/* `event' and `unit_mask' are unused. */
+	unsigned long kernel;
+	unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index df169e84db4..8b0c9464aa9 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -4,6 +4,9 @@ config SCORE
        def_bool y
        select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
 
 choice
 	prompt "System type"
@@ -60,9 +63,6 @@ config 32BIT
 config ARCH_FLATMEM_ENABLE
 	def_bool y
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 source "mm/Kconfig"
 
 config MEMORY_START
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index 6f898c05787..b48459afefd 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -26,6 +26,7 @@
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
@@ -54,7 +55,8 @@ static void __init bootmem_init(void)
 	/* Initialize the boot-time allocator with low memory only. */
 	bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
 					 min_low_pfn, max_low_pfn);
-	add_active_range(0, min_low_pfn, max_low_pfn);
+	memblock_add_node(PFN_PHYS(min_low_pfn),
+			  PFN_PHYS(max_low_pfn - min_low_pfn), 0);
 
 	free_bootmem(PFN_PHYS(start_pfn),
 		     (max_low_pfn - start_pfn) << PAGE_SHIFT);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5629e209913..47a2f1c2cb0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -4,6 +4,7 @@ config SUPERH
 	select CLKDEV_LOOKUP
 	select HAVE_IDE if HAS_IOPORT
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
deleted file mode 100644
index e87063fad2e..00000000000
--- a/arch/sh/include/asm/memblock.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASM_SH_MEMBLOCK_H
-#define __ASM_SH_MEMBLOCK_H
-
-#endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index db4ecd731a0..406508d4ce7 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -89,7 +89,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched()) {
 			check_pgt_cache();
@@ -111,7 +112,8 @@ void cpu_idle(void)
 			start_critical_timings();
 		}
 
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index c5a33f007f8..9fea49f6e66 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -157,9 +157,6 @@ void __init reserve_crashkernel(void)
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	/* this is necessary because of memblock_phys_mem_size() */
-	memblock_analyze();
-
 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
 			&crash_size, &crash_base);
 	if (ret == 0 && crash_size > 0) {
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 1a0e946679a..7b57bf1dc85 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -230,7 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
 			 PAGE_KERNEL);
 
-	add_active_range(nid, start_pfn, end_pfn);
+	memblock_set_node(PFN_PHYS(start_pfn),
+			  PFN_PHYS(end_pfn - start_pfn), nid);
 }
 
 void __init __weak plat_early_device_setup(void)
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index c3e61b36649..cb8f9920f4d 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS
 		       CPU_SUBTYPE_SH7785)
 	default "1"
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 939ca0f356f..82cc576fab1 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -324,7 +324,6 @@ void __init paging_init(void)
 	unsigned long vaddr, end;
 	int nid;
 
-	memblock_init();
 	sh_mv.mv_mem_init();
 
 	early_reserve_mem();
@@ -337,7 +336,7 @@ void __init paging_init(void)
 		sh_mv.mv_mem_reserve();
 
 	memblock_enforce_memory_limit(memory_limit);
-	memblock_analyze();
+	memblock_allow_resize();
 
 	memblock_dump_all();
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index f92602e8660..70ae9d81870 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC64
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_SYSCALL_WRAPPERS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
@@ -352,9 +353,6 @@ config NODES_SPAN_OTHER_NODES
 	def_bool y
 	depends on NEED_MULTIPLE_NODES
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y if SPARC64
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y if SPARC64
 
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
deleted file mode 100644
index c67b047ef85..00000000000
--- a/arch/sparc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC64_MEMBLOCK_H
-#define _SPARC64_MEMBLOCK_H
-
-#include <asm/oplib.h>
-
-#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
-
-#endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 3739a06a76c..39d8b05201a 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -95,12 +95,14 @@ void cpu_idle(void)
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	while(1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 
 		while (!need_resched() && !cpu_is_offline(cpu))
 			sparc64_yield(cpu);
 
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 
 		preempt_enable_no_resched();
 
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index fe1e3fc31bc..ffb883ddd0f 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -84,7 +84,7 @@ static void prom_sync_me(void)
 
 	prom_printf("PROM SYNC COMMAND...\n");
 	show_free_areas(0);
-	if(current->pid != 0) {
+	if (!is_idle_task(current)) {
 		local_irq_enable();
 		sys_sync();
 		local_irq_disable();
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8e073d80213..b3f5e7dfea5 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -790,7 +790,7 @@ static int find_node(unsigned long addr)
 	return -1;
 }
 
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = find_node(start);
 	start += PAGE_SIZE;
@@ -808,7 +808,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
 	return start;
 }
 #else
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
 	*nid = 0;
 	return end;
@@ -816,7 +816,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
 #endif
 
 /* This must be invoked after performing all of the necessary
- * add_active_range() calls for 'nid'.  We need to be able to get
+ * memblock_set_node() calls for 'nid'.  We need to be able to get
  * correct data from get_pfn_range_for_nid().
  */
 static void __init allocate_node_data(int nid)
@@ -987,14 +987,11 @@ static void __init add_node_ranges(void)
 
 			this_end = memblock_nid_range(start, end, &nid);
 
-			numadbg("Adding active range nid[%d] "
+			numadbg("Setting memblock NUMA node nid[%d] "
 				"start[%lx] end[%lx]\n",
 				nid, start, this_end);
 
-			add_active_range(nid,
-					 start >> PAGE_SHIFT,
-					 this_end >> PAGE_SHIFT);
-
+			memblock_set_node(start, this_end - start, nid);
 			start = this_end;
 		}
 	}
@@ -1282,7 +1279,6 @@ static void __init bootmem_init_nonnuma(void)
 {
 	unsigned long top_of_ram = memblock_end_of_DRAM();
 	unsigned long total_ram = memblock_phys_mem_size();
-	struct memblock_region *reg;
 
 	numadbg("bootmem_init_nonnuma()\n");
 
@@ -1292,20 +1288,8 @@ static void __init bootmem_init_nonnuma(void)
 	       (top_of_ram - total_ram) >> 20);
 
 	init_node_masks_nonnuma();
-
-	for_each_memblock(memory, reg) {
-		unsigned long start_pfn, end_pfn;
-
-		if (!reg->size)
-			continue;
-
-		start_pfn = memblock_region_memory_base_pfn(reg);
-		end_pfn = memblock_region_memory_end_pfn(reg);
-		add_active_range(0, start_pfn, end_pfn);
-	}
-
+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
 	allocate_node_data(0);
-
 	node_set_online(0);
 }
 
@@ -1769,8 +1753,6 @@ void __init paging_init(void)
 		sun4v_ktsb_init();
 	}
 
-	memblock_init();
-
 	/* Find available physical memory...
 	 *
 	 * Read it twice in order to work around a bug in openfirmware.
@@ -1796,7 +1778,7 @@ void __init paging_init(void)
 
 	memblock_enforce_memory_limit(cmdline_memory_size);
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 
 	set_bit(0, mmu_context_bmap);
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 9c45d8bbdf5..4c1ac6e5347 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -85,7 +85,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 			if (cpu_is_offline(cpu))
 				BUG();  /* no HOTPLUG_CPU */
@@ -105,7 +106,8 @@ void cpu_idle(void)
 				local_irq_enable();
 			current_thread_info()->status |= TS_POLLING;
 		}
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 25b7b90fd62..c1eaaa1fcc2 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -54,7 +54,7 @@ static noinline void force_sig_info_fault(const char *type, int si_signo,
 	if (unlikely(tsk->pid < 2)) {
 		panic("Signal %d (code %d) at %#lx sent to %s!",
 		      si_signo, si_code & 0xffff, address,
-		      tsk->pid ? "init" : "the idle task");
+		      is_idle_task(tsk) ? "the idle task" : "init");
 	}
 
 	info.si_signo = si_signo;
@@ -515,7 +515,7 @@ no_context:
 
 	if (unlikely(tsk->pid < 2)) {
 		panic("Kernel page fault running %s!",
-		      tsk->pid ? "init" : "the idle task");
+		      is_idle_task(tsk) ? "the idle task" : "init");
 	}
 
 	/*
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5338351aec..69f24905abd 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -246,10 +246,12 @@ void default_idle(void)
 		if (need_resched())
 			schedule();
 
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		nsecs = disable_timer();
 		idle_sleep(nsecs);
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 	}
 }
 
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index a08d9fab81f..82a6e22f1f3 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -75,8 +75,6 @@ static struct clocksource itimer_clocksource = {
 	.rating		= 300,
 	.read		= itimer_read,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1000,
-	.shift		= 0,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -94,9 +92,9 @@ static void __init setup_itimer(void)
 		clockevent_delta2ns(60 * HZ, &itimer_clockevent);
 	itimer_clockevent.min_delta_ns =
 		clockevent_delta2ns(1, &itimer_clockevent);
-	err = clocksource_register(&itimer_clocksource);
+	err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC);
 	if (err) {
-		printk(KERN_ERR "clocksource_register returned %d\n", err);
+		printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
 		return;
 	}
 	clockevents_register_device(&itimer_clockevent);
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index ba401df971e..52edc2b6287 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -55,7 +55,8 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 			local_irq_disable();
 			stop_critical_timings();
@@ -63,7 +64,8 @@ void cpu_idle(void)
 			local_irq_enable();
 			start_critical_timings();
 		}
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 471b6bca8da..673d7a89d8f 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
+#include <asm/memblock.h>
 
 #include "setup.h"
 
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 3b379cddbc6..de186bde897 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -26,6 +26,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/memblock.h>
 #include <mach/map.h>
 
 #include "mm.h"
@@ -245,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi)
 	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
 		meminfo_cmp, NULL);
 
-	memblock_init();
 	for (i = 0; i < mi->nr_banks; i++)
 		memblock_add(mi->bank[i].start, mi->bank[i].size);
 
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
 
 	uc32_mm_memblock_reserve();
 
-	memblock_analyze();
+	memblock_allow_resize();
 	memblock_dump_all();
 }
 
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 3e5c3e5a0b4..43c20b40e44 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -25,6 +25,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/memblock.h>
 
 #include <mach/map.h>
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc0..5731eb70e0a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,8 @@ config X86
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
 	bool
 	default X86_64
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config AUDIT_ARCH
 	bool
 	default X86_64
@@ -343,6 +342,7 @@ config X86_EXTENDED_PLATFORM
 
 	  If you enable this option then you'll be able to select support
 	  for the following (non-PC) 64 bit x86 platforms:
+		Numascale NumaChip
 		ScaleMP vSMP
 		SGI Ultraviolet
 
@@ -351,6 +351,18 @@ config X86_EXTENDED_PLATFORM
 endif
 # This is an alphabetically sorted list of 64 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
+config X86_NUMACHIP
+	bool "Numascale NumaChip"
+	depends on X86_64
+	depends on X86_EXTENDED_PLATFORM
+	depends on NUMA
+	depends on SMP
+	depends on X86_X2APIC
+	depends on !EDAC_AMD64
+	---help---
+	  Adds support for Numascale NumaChip large-SMP systems. Needed to
+	  enable more than ~168 cores.
+	  If you don't have one of these, you should say N here.
 
 config X86_VSMP
 	bool "ScaleMP vSMP"
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a6253ec1b28..3e274564f6b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rsp,0
 	pushfq_cfi
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
+	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
 	CFI_REGISTER rip,r10
 	pushq_cfi $__USER32_CS
 	/*CFI_REL_OFFSET cs,0*/
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
  	.section __ex_table,"a"
  	.quad 1b,ia32_badarg
  	.previous	
-	GET_THREAD_INFO(%r10)
-	orl    $TS_COMPAT,TI_status(%r10)
-	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
@@ -162,13 +161,12 @@ sysenter_do_call:
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags(%r10)
+	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status(%r10)
+	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	/* clear IF, that popfq doesn't enable interrupts early */
 	andl  $~0x200,EFLAGS-R11(%rsp) 
 	movl	RIP-R11(%rsp),%edx		/* User %eip */
@@ -205,7 +203,7 @@ sysexit_from_sys_call:
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	sti
@@ -215,12 +213,11 @@ sysexit_from_sys_call:
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 	call audit_syscall_exit
-	GET_THREAD_INFO(%r10)
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	cli
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags(%r10)
+	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz \exit
 	CLEAR_RREGS -ARGOFFSET
 	jmp int_with_check
@@ -238,7 +235,7 @@ sysexit_audit:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz	sysenter_auditsys
 #endif
 	SAVE_REST
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
 	.section __ex_table,"a"
 	.quad 1b,ia32_badarg
 	.previous	
-	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,TI_status(%r10)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
@@ -321,13 +317,12 @@ cstar_do_call:
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status(%r10)
+	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
@@ -355,7 +350,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
 	SAVE_ARGS 0,1,0
-	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,TI_status(%r10)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -459,8 +453,8 @@ quiet_ni_syscall:
 	CFI_ENDPROC
 	
 	.macro PTREGSCALL label, func, arg
-	.globl \label
-\label:
+	ALIGN
+GLOBAL(\label)
 	leaq \func(%rip),%rax
 	leaq -ARGOFFSET+8(%rsp),\arg	/* 8 for return address */
 	jmp  ia32_ptregs_common	
@@ -477,7 +471,8 @@ quiet_ni_syscall:
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
 	PTREGSCALL stub32_iopl, sys_iopl, %rsi
 
-ENTRY(ia32_ptregs_common)
+	ALIGN
+ia32_ptregs_common:
 	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b533b..952bd0100c5 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
 
 #ifdef CONFIG_SMP
 	.macro LOCK_PREFIX
-1:	lock
+672:	lock
 	.section .smp_locks,"a"
 	.balign 4
-	.long 1b - .
+	.long 672b - .
 	.previous
 	.endm
 #else
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1a6c09af048..3ab9bdd87e7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -176,6 +176,7 @@ static inline u64 native_x2apic_icr_read(void)
 }
 
 extern int x2apic_phys;
+extern int x2apic_preenabled;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
@@ -198,6 +199,9 @@ static inline void x2apic_force_phys(void)
 	x2apic_phys = 1;
 }
 #else
+static inline void disable_x2apic(void)
+{
+}
 static inline void check_x2apic(void)
 {
 }
@@ -212,6 +216,7 @@ static inline void x2apic_force_phys(void)
 {
 }
 
+#define	nox2apic	0
 #define	x2apic_preenabled 0
 #define	x2apic_supported()	0
 #endif
@@ -410,6 +415,7 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
+
 static inline u32 apic_read(u32 reg)
 {
 	return apic->read(reg);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
new file mode 100644
index 00000000000..a2d31279644
--- /dev/null
+++ b/arch/x86/include/asm/apic_flat_64.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_X86_APIC_FLAT_64_H
+#define _ASM_X86_APIC_FLAT_64_H
+
+extern void flat_init_apic_ldr(void);
+
+#endif
+
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 3925d800786..134bba00df0 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -144,6 +144,7 @@
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR	0x800
+#define XAPIC_ENABLE	(1UL << 11)
 #define X2APIC_ENABLE	(1UL << 10)
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 1775d6e5920..b97596e2b68 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
 	return word;
 }
 
+#undef ADDR
+
 #ifdef __KERNEL__
 /**
  * ffs - find first set bit in word
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
 static inline int ffs(int x)
 {
 	int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+	/*
+	 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
+	 * dest reg is undefined if x==0, but their CPU architect says its
+	 * value is written to set it to the same as before, except that the
+	 * top 32 bits will be cleared.
+	 *
+	 * We cannot do this on 32 bits because at the very least some
+	 * 486 CPUs did not behave this way.
+	 */
+	long tmp = -1;
+	asm("bsfl %1,%0"
+	    : "=r" (r)
+	    : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
 	asm("bsfl %1,%0\n\t"
 	    "cmovzl %2,%0"
-	    : "=r" (r) : "rm" (x), "r" (-1));
+	    : "=&r" (r) : "rm" (x), "r" (-1));
 #else
 	asm("bsfl %1,%0\n\t"
 	    "jnz 1f\n\t"
@@ -422,7 +439,22 @@ static inline int ffs(int x)
 static inline int fls(int x)
 {
 	int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+	/*
+	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
+	 * dest reg is undefined if x==0, but their CPU architect says its
+	 * value is written to set it to the same as before, except that the
+	 * top 32 bits will be cleared.
+	 *
+	 * We cannot do this on 32 bits because at the very least some
+	 * 486 CPUs did not behave this way.
+	 */
+	long tmp = -1;
+	asm("bsrl %1,%0"
+	    : "=r" (r)
+	    : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
 	asm("bsrl %1,%0\n\t"
 	    "cmovzl %2,%0"
 	    : "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +466,35 @@ static inline int fls(int x)
 #endif
 	return r + 1;
 }
-#endif /* __KERNEL__ */
-
-#undef ADDR
 
-#ifdef __KERNEL__
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#ifdef CONFIG_X86_64
+static __always_inline int fls64(__u64 x)
+{
+	long bitpos = -1;
+	/*
+	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
+	 * dest reg is undefined if x==0, but their CPU architect says its
+	 * value is written to set it to the same as before.
+	 */
+	asm("bsrq %1,%0"
+	    : "+r" (bitpos)
+	    : "rm" (x));
+	return bitpos + 1;
+}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
 
 #include <asm-generic/bitops/find.h>
 
@@ -450,12 +506,6 @@ static inline int fls(int x)
 
 #include <asm-generic/bitops/const_hweight.h>
 
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5d3acdf5a7a..0c9fa2745f1 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
 	__compiletime_error("Bad argument size for cmpxchg");
 extern void __xadd_wrong_size(void)
 	__compiletime_error("Bad argument size for xadd");
+extern void __add_wrong_size(void)
+	__compiletime_error("Bad argument size for add");
 
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
 #define	__X86_CASE_Q	-1		/* sizeof will never return -1 */
 #endif
 
+/* 
+ * An exchange-type operation, which takes a value and a pointer, and
+ * returns a the old value.
+ */
+#define __xchg_op(ptr, arg, op, lock)					\
+	({								\
+	        __typeof__ (*(ptr)) __ret = (arg);			\
+		switch (sizeof(*(ptr))) {				\
+		case __X86_CASE_B:					\
+			asm volatile (lock #op "b %b0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_W:					\
+			asm volatile (lock #op "w %w0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_L:					\
+			asm volatile (lock #op "l %0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_Q:					\
+			asm volatile (lock #op "q %q0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		default:						\
+			__ ## op ## _wrong_size();			\
+		}							\
+		__ret;							\
+	})
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
  * Since this is generally used to protect other memory information, we
  * use "asm volatile" and "memory" clobbers to prevent gcc from moving
  * information around.
  */
-#define __xchg(x, ptr, size)						\
-({									\
-	__typeof(*(ptr)) __x = (x);					\
-	switch (size) {							\
-	case __X86_CASE_B:						\
-	{								\
-		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile("xchgb %0,%1"				\
-			     : "=q" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case __X86_CASE_W:						\
-	{								\
-		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile("xchgw %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case __X86_CASE_L:						\
-	{								\
-		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile("xchgl %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case __X86_CASE_Q:						\
-	{								\
-		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
-		asm volatile("xchgq %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	default:							\
-		__xchg_wrong_size();					\
-	}								\
-	__x;								\
-})
-
-#define xchg(ptr, v)							\
-	__xchg((v), (ptr), sizeof(*ptr))
+#define xchg(ptr, v)	__xchg_op((ptr), (v), xchg, "")
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
 	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
-#define __xadd(ptr, inc, lock)						\
+/*
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
+ *
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
+ */
+#define __xadd(ptr, inc, lock)	__xchg_op((ptr), (inc), xadd, lock)
+#define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc)	__xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc)	__xadd((ptr), (inc), "")
+
+#define __add(ptr, inc, lock)						\
 	({								\
 	        __typeof__ (*(ptr)) __ret = (inc);			\
 		switch (sizeof(*(ptr))) {				\
 		case __X86_CASE_B:					\
-			asm volatile (lock "xaddb %b0, %1\n"		\
-				      : "+r" (__ret), "+m" (*(ptr))	\
-				      : : "memory", "cc");		\
+			asm volatile (lock "addb %b1, %0\n"		\
+				      : "+m" (*(ptr)) : "ri" (inc)	\
+				      : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_W:					\
-			asm volatile (lock "xaddw %w0, %1\n"		\
-				      : "+r" (__ret), "+m" (*(ptr))	\
-				      : : "memory", "cc");		\
+			asm volatile (lock "addw %w1, %0\n"		\
+				      : "+m" (*(ptr)) : "ri" (inc)	\
+				      : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_L:					\
-			asm volatile (lock "xaddl %0, %1\n"		\
-				      : "+r" (__ret), "+m" (*(ptr))	\
-				      : : "memory", "cc");		\
+			asm volatile (lock "addl %1, %0\n"		\
+				      : "+m" (*(ptr)) : "ri" (inc)	\
+				      : "memory", "cc");		\
 			break;						\
 		case __X86_CASE_Q:					\
-			asm volatile (lock "xaddq %q0, %1\n"		\
-				      : "+r" (__ret), "+m" (*(ptr))	\
-				      : : "memory", "cc");		\
+			asm volatile (lock "addq %1, %0\n"		\
+				      : "+m" (*(ptr)) : "ri" (inc)	\
+				      : "memory", "cc");		\
 			break;						\
 		default:						\
-			__xadd_wrong_size();				\
+			__add_wrong_size();				\
 		}							\
 		__ret;							\
 	})
 
 /*
- * xadd() adds "inc" to "*ptr" and atomically returns the previous
- * value of "*ptr".
+ * add_*() adds "inc" to "*ptr"
  *
- * xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
+ * __add() takes a lock prefix
+ * add_smp() is locked when multiple CPUs are online
+ * add_sync() is always locked
  */
-#define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc)	__xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc)	__xadd((ptr), (inc), "")
+#define add_smp(ptr, inc)	__add((ptr), (inc), LOCK_PREFIX)
+#define add_sync(ptr, inc)	__add((ptr), (inc), "lock; ")
+
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)			\
+({									\
+	bool __ret;							\
+	__typeof__(*(p1)) __old1 = (o1), __new1 = (n1);			\
+	__typeof__(*(p2)) __old2 = (o2), __new2 = (n2);			\
+	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
+	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
+	VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));		\
+	VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));	\
+	asm volatile(pfx "cmpxchg%c4b %2; sete %0"			\
+		     : "=a" (__ret), "+d" (__old2),			\
+		       "+m" (*(p1)), "+m" (*(p2))			\
+		     : "i" (2 * sizeof(long)), "a" (__old1),		\
+		       "b" (__new1), "c" (__new2));			\
+	__ret;								\
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+	__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+	__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
 
 #endif	/* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fbebb07dd80..53f4b219336 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
 
 #endif
 
-#define cmpxchg8b(ptr, o1, o2, n1, n2)				\
-({								\
-	char __ret;						\
-	__typeof__(o2) __dummy;					\
-	__typeof__(*(ptr)) __old1 = (o1);			\
-	__typeof__(o2) __old2 = (o2);				\
-	__typeof__(*(ptr)) __new1 = (n1);			\
-	__typeof__(o2) __new2 = (n2);				\
-	asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1"	\
-		       : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
-		       : "a" (__old1), "d"(__old2),		\
-		         "b" (__new1), "c" (__new2)		\
-		       : "memory");				\
-	__ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2)			\
-({								\
-	char __ret;						\
-	__typeof__(o2) __dummy;					\
-	__typeof__(*(ptr)) __old1 = (o1);			\
-	__typeof__(o2) __old2 = (o2);				\
-	__typeof__(*(ptr)) __new1 = (n1);			\
-	__typeof__(o2) __new2 = (n2);				\
-	asm volatile("cmpxchg8b %2; setz %1"			\
-		       : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
-		       : "a" (__old), "d"(__old2),		\
-		         "b" (__new1), "c" (__new2),		\
-		       : "memory");				\
-	__ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)				\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
-	VM_BUG_ON((unsigned long)(ptr) % 8);				\
-	cmpxchg8b((ptr), (o1), (o2), (n1), (n2));			\
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)			\
-({									\
-       BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
-       VM_BUG_ON((unsigned long)(ptr) % 8);				\
-       cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));			\
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx8
 
 #endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 285da02c38f..614be87f1a9 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
 	cmpxchg_local((ptr), (o), (n));					\
 })
 
-#define cmpxchg16b(ptr, o1, o2, n1, n2)				\
-({								\
-	char __ret;						\
-	__typeof__(o2) __junk;					\
-	__typeof__(*(ptr)) __old1 = (o1);			\
-	__typeof__(o2) __old2 = (o2);				\
-	__typeof__(*(ptr)) __new1 = (n1);			\
-	__typeof__(o2) __new2 = (n2);				\
-	asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1"	\
-		       : "=d"(__junk), "=a"(__ret), "+m" (*ptr)	\
-		       : "b"(__new1), "c"(__new2),		\
-		         "a"(__old1), "d"(__old2));		\
-	__ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2)			\
-({								\
-	char __ret;						\
-	__typeof__(o2) __junk;					\
-	__typeof__(*(ptr)) __old1 = (o1);			\
-	__typeof__(o2) __old2 = (o2);				\
-	__typeof__(*(ptr)) __new1 = (n1);			\
-	__typeof__(o2) __new2 = (n2);				\
-	asm volatile("cmpxchg16b %2;setz %1"			\
-		       : "=d"(__junk), "=a"(__ret), "+m" (*ptr)	\
-		       : "b"(__new1), "c"(__new2),		\
-		         "a"(__old1), "d"(__old2));		\
-	__ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)				\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	VM_BUG_ON((unsigned long)(ptr) % 16);				\
-	cmpxchg16b((ptr), (o1), (o2), (n1), (n2));			\
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)			\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	VM_BUG_ON((unsigned long)(ptr) % 16);				\
-	cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));		\
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx16
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9a2d644c08e..ced283ac79d 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -4,6 +4,7 @@
 #ifdef CONFIG_X86_32
 
 #include <linux/types.h>
+#include <linux/log2.h>
 
 /*
  * do_div() is NOT a C function. It wants to return
@@ -21,15 +22,20 @@
 ({								\
 	unsigned long __upper, __low, __high, __mod, __base;	\
 	__base = (base);					\
-	asm("":"=a" (__low), "=d" (__high) : "A" (n));		\
-	__upper = __high;					\
-	if (__high) {						\
-		__upper = __high % (__base);			\
-		__high = __high / (__base);			\
+	if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
+		__mod = n & (__base - 1);			\
+		n >>= ilog2(__base);				\
+	} else {						\
+		asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
+		__upper = __high;				\
+		if (__high) {					\
+			__upper = __high % (__base);		\
+			__high = __high / (__base);		\
+		}						\
+		asm("divl %2" : "=a" (__low), "=d" (__mod)	\
+			: "rm" (__base), "0" (__low), "1" (__upper));	\
+		asm("" : "=A" (n) : "a" (__low), "d" (__high));	\
 	}							\
-	asm("divl %2":"=a" (__low), "=d" (__mod)		\
-	    : "rm" (__base), "0" (__low), "1" (__upper));	\
-	asm("":"=A" (n) : "a" (__low), "d" (__high));		\
 	__mod;							\
 })
 
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d8..37782566af2 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
 
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
-extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+extern u64 early_reserve_e820(u64 sizet, u64 align);
 
 void memblock_x86_fill(void);
 void memblock_find_dma_reserve(void);
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 55e4de613f0..da0b3ca815b 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -11,6 +11,7 @@ typedef struct {
 #ifdef CONFIG_X86_LOCAL_APIC
 	unsigned int apic_timer_irqs;	/* arch dependent */
 	unsigned int irq_spurious_count;
+	unsigned int icr_read_retry_count;
 #endif
 	unsigned int x86_platform_ipis;	/* arch dependent */
 	unsigned int apic_perf_irqs;
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea0564..6919e936345 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #ifdef CONFIG_SMP
 #define safe_address (__per_cpu_offset[0])
 #else
-#define safe_address (kstat_cpu(0).cpustat.user)
+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
 #endif
 
 /*
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 88c765e1641..74df3f1eddf 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
 	return (insn->vex_prefix.value != 0);
 }
 
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+	return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+		insn->displacement.got && insn->immediate.got;
+}
+
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
diff --git a/arch/x86/include/asm/mach_timer.h b/arch/x86/include/asm/mach_timer.h
index 853728519ae..88d0c3c74c1 100644
--- a/arch/x86/include/asm/mach_timer.h
+++ b/arch/x86/include/asm/mach_timer.h
@@ -15,7 +15,7 @@
 
 #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
 #define CALIBRATE_LATCH	\
-	((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
+	((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
 
 static inline void mach_prepare_counter(void)
 {
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 01fdf5674e2..0e8e85bb7c5 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void)
 #else
 #define lock_cmos_prefix(reg) do {} while (0)
 #define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg)
-#define unlock_cmos()
+#define lock_cmos(reg) do { } while (0)
+#define unlock_cmos() do { } while (0)
 #define do_i_have_lock_cmos() 0
 #define current_lock_cmos_reg() 0
 #endif
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0e8ae57d365..6add827381c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -50,10 +50,11 @@
 #define MCJ_CTX_MASK		3
 #define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)
 #define MCJ_CTX_RANDOM		0    /* inject context: random */
-#define MCJ_CTX_PROCESS		1    /* inject context: process */
-#define MCJ_CTX_IRQ		2    /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST	4    /* do NMI broadcasting */
-#define MCJ_EXCEPTION		8    /* raise as exception */
+#define MCJ_CTX_PROCESS		0x1  /* inject context: process */
+#define MCJ_CTX_IRQ		0x2  /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST	0x4  /* do NMI broadcasting */
+#define MCJ_EXCEPTION		0x8  /* raise as exception */
+#define MCJ_IRQ_BRAODCAST	0x10 /* do IRQ broadcasting */
 
 /* Fields are zero when not available */
 struct mce {
@@ -120,7 +121,8 @@ struct mce_log {
 
 #ifdef __KERNEL__
 
-extern struct atomic_notifier_head x86_mce_decoder_chain;
+extern void mce_register_decode_chain(struct notifier_block *nb);
+extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 #include <linux/percpu.h>
 #include <linux/init.h>
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index 0cd3800f33b..00000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _X86_MEMBLOCK_H
-#define _X86_MEMBLOCK_H
-
-#define ARCH_DISCARD_MEMBLOCK
-
-u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
-
-void memblock_x86_reserve_range(u64 start, u64 end, char *name);
-void memblock_x86_free_range(u64 start, u64 end);
-struct range;
-int __get_free_all_memory_range(struct range **range, int nodeid,
-			 unsigned long start_pfn, unsigned long end_pfn);
-int get_free_all_memory_range(struct range **rangep, int nodeid);
-
-void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long last_pfn);
-u64 memblock_x86_hole_size(u64 start, u64 end);
-u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
-u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
-u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
-bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
-
-#endif
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 24215072d0e..4ebe157bf73 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -48,6 +48,7 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
+extern void __exit exit_amd_microcode(void);
 
 static inline void get_ucode_data(void *to, const u8 *from, size_t n)
 {
@@ -59,6 +60,7 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
 {
 	return NULL;
 }
+static inline void __exit exit_amd_microcode(void) {}
 #endif
 
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644
index 00000000000..660f843df92
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -0,0 +1,167 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific Header file
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <linux/swab.h>
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define CSR_NODE_SHIFT		16
+#define CSR_NODE_BITS(p)	(((unsigned long)(p)) << CSR_NODE_SHIFT)
+#define CSR_NODE_MASK		0x0fff		/* 4K nodes */
+
+/* 32K CSR space, b15 indicates geo/non-geo */
+#define CSR_OFFSET_MASK	0x7fffUL
+
+/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
+#define NUMACHIP_GCSR_BASE	0x3fff00000000ULL
+#define NUMACHIP_GCSR_LIM	0x3fff0fffffffULL
+#define NUMACHIP_GCSR_SIZE	(NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
+
+/*
+ * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
+ * when using the direct mapping on x86_64, both start and size needs to be
+ * aligned with PMD_SIZE which is 2M
+ */
+#define NUMACHIP_LCSR_BASE	0x3ffffe000000ULL
+#define NUMACHIP_LCSR_LIM	0x3fffffffffffULL
+#define NUMACHIP_LCSR_SIZE	(NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
+
+static inline void *gcsr_address(int node, unsigned long offset)
+{
+	return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
+		CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline void *lcsr_address(unsigned long offset)
+{
+	return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
+		CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline unsigned int read_gcsr(int node, unsigned long offset)
+{
+	return swab32(readl(gcsr_address(node, offset)));
+}
+
+static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
+{
+	writel(swab32(val), gcsr_address(node, offset));
+}
+
+static inline unsigned int read_lcsr(unsigned long offset)
+{
+	return swab32(readl(lcsr_address(offset)));
+}
+
+static inline void write_lcsr(unsigned long offset, unsigned int val)
+{
+	writel(swab32(val), lcsr_address(offset));
+}
+
+/* ========================================================================= */
+/*                   CSR_G0_STATE_CLEAR                                      */
+/* ========================================================================= */
+
+#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
+union numachip_csr_g0_state_clear {
+	unsigned int v;
+	struct numachip_csr_g0_state_clear_s {
+		unsigned int _state:2;
+		unsigned int _rsvd_2_6:5;
+		unsigned int _lost:1;
+		unsigned int _rsvd_8_31:24;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G0_NODE_IDS                                         */
+/* ========================================================================= */
+
+#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
+union numachip_csr_g0_node_ids {
+	unsigned int v;
+	struct numachip_csr_g0_node_ids_s {
+		unsigned int _initialid:16;
+		unsigned int _nodeid:12;
+		unsigned int _rsvd_28_31:4;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_GEN                                      */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
+union numachip_csr_g3_ext_irq_gen {
+	unsigned int v;
+	struct numachip_csr_g3_ext_irq_gen_s {
+		unsigned int _vector:8;
+		unsigned int _msgtype:3;
+		unsigned int _index:5;
+		unsigned int _destination_apic_id:16;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_STATUS                                   */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
+union numachip_csr_g3_ext_irq_status {
+	unsigned int v;
+	struct numachip_csr_g3_ext_irq_status_s {
+		unsigned int _result:32;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_DEST                                     */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
+union numachip_csr_g3_ext_irq_dest {
+	unsigned int v;
+	struct numachip_csr_g3_ext_irq_dest_s {
+		unsigned int _irq:8;
+		unsigned int _rsvd_8_31:24;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT                                */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
+union numachip_csr_g3_nc_att_map_select {
+	unsigned int v;
+	struct numachip_csr_g3_nc_att_map_select_s {
+		unsigned int _upper_address_bits:4;
+		unsigned int _select_ram:4;
+		unsigned int _rsvd_8_31:24;
+	} s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT_0-255                          */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 3470c9d0ebb..529bf07e806 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,23 +451,20 @@ do {									\
 #endif /* !CONFIG_M386 */
 
 #ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)			\
+#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)		\
 ({									\
-	char __ret;							\
-	typeof(o1) __o1 = o1;						\
-	typeof(o1) __n1 = n1;						\
-	typeof(o2) __o2 = o2;						\
-	typeof(o2) __n2 = n2;						\
-	typeof(o2) __dummy = n2;					\
+	bool __ret;							\
+	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
+	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
 	asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t"	\
-		    : "=a"(__ret), "=m" (pcp1), "=d"(__dummy)		\
-		    :  "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2));	\
+		    : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
+		    :  "b" (__n1), "c" (__n2), "a" (__o1));		\
 	__ret;								\
 })
 
-#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
+#define this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
+#define irqsafe_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
 #endif /* CONFIG_X86_CMPXCHG64 */
 
 /*
@@ -508,31 +505,23 @@ do {									\
  * it in software.  The address used in the cmpxchg16 instruction must be
  * aligned to a 16 byte boundary.
  */
-#ifdef CONFIG_SMP
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
-#else
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
-#endif
-#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)			\
+#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2)		\
 ({									\
-	char __ret;							\
-	typeof(o1) __o1 = o1;						\
-	typeof(o1) __n1 = n1;						\
-	typeof(o2) __o2 = o2;						\
-	typeof(o2) __n2 = n2;						\
-	typeof(o2) __dummy;						\
-	alternative_io(CMPXCHG16B_EMU_CALL,				\
-		       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",	\
+	bool __ret;							\
+	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
+	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
+	alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
+		       "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t",	\
 		       X86_FEATURE_CX16,				\
-		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
-		       "S" (&pcp1), "b"(__n1), "c"(__n2),		\
-		       "a"(__o1), "d"(__o2) : "memory");		\
+		       ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),		\
+				   "+m" (pcp2), "+d" (__o2)),		\
+		       "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");	\
 	__ret;								\
 })
 
-#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
+#define this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
+#define irqsafe_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
 
 #endif
 
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f7d5d..096c975e099 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -57,6 +57,7 @@
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
+#define ARCH_PERFMON_EVENTS_COUNT			7
 
 /*
  * Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
 	unsigned int full;
 };
 
+union cpuid10_ebx {
+	struct {
+		unsigned int no_unhalted_core_cycles:1;
+		unsigned int no_instructions_retired:1;
+		unsigned int no_unhalted_reference_cycles:1;
+		unsigned int no_llc_reference:1;
+		unsigned int no_llc_misses:1;
+		unsigned int no_branch_instruction_retired:1;
+		unsigned int no_branch_misses_retired:1;
+	} split;
+	unsigned int full;
+};
+
 union cpuid10_edx {
 	struct {
 		unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
 	unsigned int full;
 };
 
+struct x86_pmu_capability {
+	int		version;
+	int		num_counters_gp;
+	int		num_counters_fixed;
+	int		bit_width_gp;
+	int		bit_width_fixed;
+	unsigned int	events_mask;
+	int		events_mask_len;
+};
 
 /*
  * Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
 /*
  * All 3 fixed-mode PMCs are configured via this single MSR:
  */
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL	0x38d
 
 /*
  * The counts are available in three separate MSRs:
  */
 
 /* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
+#define MSR_ARCH_PERFMON_FIXED_CTR0	0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS	(X86_PMC_IDX_FIXED + 0)
 
 /* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
+#define MSR_ARCH_PERFMON_FIXED_CTR1	0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES	(X86_PMC_IDX_FIXED + 1)
 
 /* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
+#define MSR_ARCH_PERFMON_FIXED_CTR2	0x30b
+#define X86_PMC_IDX_FIXED_REF_CYCLES	(X86_PMC_IDX_FIXED + 2)
+#define X86_PMC_MSK_FIXED_REF_CYCLES	(1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
 
 /*
  * We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
 };
 
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 #else
 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 	return NULL;
 }
 
+static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+	memset(cap, 0, sizeof(*cap));
+}
+
 static inline void perf_events_lapic_init(void)	{ }
 #endif
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c86fab..49afb3f41eb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 	pte_update(mm, addr, ptep);
 }
 
-#define flush_tlb_fix_spurious_fault(vma, address)
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
 #define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
 
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 2dddb317bb3..f8ab3eaad12 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -6,6 +6,7 @@
  * EFLAGS bits
  */
 #define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_BIT1	0x00000002 /* Bit 1 - always on */
 #define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
 #define X86_EFLAGS_AF	0x00000010 /* Auxiliary carry Flag */
 #define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b650435ffb5..aa9088c2693 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -99,7 +99,6 @@ struct cpuinfo_x86 {
 	u16			apicid;
 	u16			initial_apicid;
 	u16			x86_clflush_size;
-#ifdef CONFIG_SMP
 	/* number of cores as seen by the OS: */
 	u16			booted_cores;
 	/* Physical processor id: */
@@ -110,7 +109,6 @@ struct cpuinfo_x86 {
 	u8			compute_unit_id;
 	/* Index into per_cpu list: */
 	u16			cpu_index;
-#endif
 	u32			microcode;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 972c260919a..a82c2bf504b 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-#if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
-	asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-		     : "+m" (lock->head_tail)
-		     :
-		     : "memory", "cc");
+	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
 }
-#else
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
-{
-	asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-		     : "+m" (lock->head_tail)
-		     :
-		     : "memory", "cc");
-}
-#endif
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1fe5c127b5..185b719ec61 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,7 +40,8 @@ struct thread_info {
 						*/
 	__u8			supervisor_stack[0];
 #endif
-	int			uaccess_err;
+	int			sig_on_uaccess_error:1;
+	int			uaccess_err:1;	/* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void)
 	movq PER_CPU_VAR(kernel_stack),reg ; \
 	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
+/*
+ * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
+ * a certain register (to be used in assembler memory operands).
+ */
+#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+
 #endif
 
 #endif /* !X86_32 */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index c00692476e9..800f77c6005 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
 	.balance_interval	= 1,					\
 }
 
-#ifdef CONFIG_X86_64
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
-#endif
 
 #else /* !CONFIG_NUMA */
 
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 83e2efd181e..15d99153a96 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,8 @@ extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
 extern unsigned long native_calibrate_tsc(void);
 
+extern int tsc_clocksource_reliable;
+
 /*
  * Boot-time check whether the TSCs are synchronized across
  * all CPUs/cores:
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 36361bf6fdd..8be5f54d936 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
 	barrier();
 
 #define uaccess_catch(err)						\
-	(err) |= current_thread_info()->uaccess_err;			\
+	(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);	\
 	current_thread_info()->uaccess_err = prev_err;			\
 } while (0)
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1971e652d24..1ac860a0984 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -7,6 +7,7 @@
 struct mpc_bus;
 struct mpc_cpu;
 struct mpc_table;
+struct cpuinfo_x86;
 
 /**
  * struct x86_init_mpparse - platform specific mpparse ops
@@ -147,6 +148,7 @@ struct x86_init_ops {
  */
 struct x86_cpuinit_ops {
 	void (*setup_percpu_clockev)(void);
+	void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
 };
 
 /**
@@ -186,5 +188,6 @@ extern struct x86_msi_ops x86_msi;
 
 extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
+extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
 
 #endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4558f0d0822..ce664f33ea8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -219,6 +219,8 @@ static int __init
 acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 {
 	struct acpi_madt_local_x2apic *processor = NULL;
+	int apic_id;
+	u8 enabled;
 
 	processor = (struct acpi_madt_local_x2apic *)header;
 
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 
 	acpi_table_print_madt_entry(header);
 
+	apic_id = processor->local_apic_id;
+	enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
 #ifdef CONFIG_X86_X2APIC
 	/*
 	 * We need to register disabled CPU as well to permit
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 	 * to not preallocating memory for all NR_CPUS
 	 * when we use CPU hotplug.
 	 */
-	acpi_register_lapic(processor->local_apic_id,	/* APIC ID */
-			    processor->lapic_flags & ACPI_MADT_ENABLED);
+	if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
+		printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+	else
+		acpi_register_lapic(apic_id, enabled);
 #else
 	printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
 #endif
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa8fac..013c1810ce7 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -123,16 +123,14 @@ int amd_get_subcaches(int cpu)
 {
 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
 	unsigned int mask;
-	int cuid = 0;
+	int cuid;
 
 	if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 		return 0;
 
 	pci_read_config_dword(link, 0x1d4, &mask);
 
-#ifdef CONFIG_SMP
 	cuid = cpu_data(cpu).compute_unit_id;
-#endif
 	return (mask >> (4 * cuid)) & 0xf;
 }
 
@@ -141,7 +139,7 @@ int amd_set_subcaches(int cpu, int mask)
 	static unsigned int reset, ban;
 	struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
 	unsigned int reg;
-	int cuid = 0;
+	int cuid;
 
 	if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
 		return -EINVAL;
@@ -159,9 +157,7 @@ int amd_set_subcaches(int cpu, int mask)
 		pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
 	}
 
-#ifdef CONFIG_SMP
 	cuid = cpu_data(cpu).compute_unit_id;
-#endif
 	mask <<= 4 * cuid;
 	mask |= (0xf ^ (1 << cuid)) << 26;
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca654..6e76c191a83 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
 	 */
 	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
 				      aper_size, aper_size);
-	if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
+	if (!addr || addr + aper_size > GART_MAX_ADDR) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%lx,%uK)\n",
 				addr, aper_size>>10);
 		return 0;
 	}
-	memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
+	memblock_reserve(addr, aper_size);
 	/*
 	 * Kmemleak should not scan this block as it may not be mapped via the
 	 * kernel direct mapping.
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04f284..0ae0323b1f9 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
 # APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP)	+= apic_numachip.o
 obj-$(CONFIG_X86_UV)		+= x2apic_uv_x.o
 obj-$(CONFIG_X86_X2APIC)	+= x2apic_phys.o
 obj-$(CONFIG_X86_X2APIC)	+= x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f98d84caf94..2eec05b6d1b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer);
 int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
 /* x2apic enabled before OS handover */
-static int x2apic_preenabled;
+int x2apic_preenabled;
+static int x2apic_disabled;
+static int nox2apic;
 static __init int setup_nox2apic(char *str)
 {
 	if (x2apic_enabled()) {
-		pr_warning("Bios already enabled x2apic, "
-			   "can't enforce nox2apic");
-		return 0;
-	}
+		int apicid = native_apic_msr_read(APIC_ID);
+
+		if (apicid >= 255) {
+			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
+				   apicid);
+			return 0;
+		}
+
+		pr_warning("x2apic already enabled. will disable it\n");
+	} else
+		setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+
+	nox2apic = 1;
 
-	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 	return 0;
 }
 early_param("nox2apic", setup_nox2apic);
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void)
 		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 		if (!send_status)
 			break;
+		inc_irq_stat(icr_read_retry_count);
 		udelay(100);
 	} while (timeout++ < 1000);
 
@@ -876,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
-	exit_idle();
 	irq_enter();
+	exit_idle();
 	local_apic_timer_interrupt();
 	irq_exit();
 
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void)
 }
 
 #ifdef CONFIG_X86_X2APIC
+/*
+ * Need to disable xapic and x2apic at the same time and then enable xapic mode
+ */
+static inline void __disable_x2apic(u64 msr)
+{
+	wrmsrl(MSR_IA32_APICBASE,
+	       msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+}
+
+static __init void disable_x2apic(void)
+{
+	u64 msr;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	rdmsrl(MSR_IA32_APICBASE, msr);
+	if (msr & X2APIC_ENABLE) {
+		u32 x2apic_id = read_apic_id();
+
+		if (x2apic_id >= 255)
+			panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
+
+		pr_info("Disabling x2apic\n");
+		__disable_x2apic(msr);
+
+		if (nox2apic) {
+			clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
+			setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+		}
+
+		x2apic_disabled = 1;
+		x2apic_mode = 0;
+
+		register_lapic_address(mp_lapic_addr);
+	}
+}
+
 void check_x2apic(void)
 {
 	if (x2apic_enabled()) {
@@ -1441,15 +1491,20 @@ void check_x2apic(void)
 
 void enable_x2apic(void)
 {
-	int msr, msr2;
+	u64 msr;
+
+	rdmsrl(MSR_IA32_APICBASE, msr);
+	if (x2apic_disabled) {
+		__disable_x2apic(msr);
+		return;
+	}
 
 	if (!x2apic_mode)
 		return;
 
-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
 	if (!(msr & X2APIC_ENABLE)) {
 		printk_once(KERN_INFO "Enabling x2apic\n");
-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2);
+		wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
 	}
 }
 #endif /* CONFIG_X86_X2APIC */
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void)
 	ret = save_ioapic_entries();
 	if (ret) {
 		pr_info("Saving IO-APIC state failed: %d\n", ret);
-		goto out;
+		return;
 	}
 
 	local_irq_save(flags);
 	legacy_pic->mask_all();
 	mask_ioapic_entries();
 
+	if (x2apic_preenabled && nox2apic)
+		disable_x2apic();
+
 	if (dmar_table_init_ret)
 		ret = -1;
 	else
 		ret = enable_IR();
 
+	if (!x2apic_supported())
+		goto skip_x2apic;
+
 	if (ret < 0) {
 		/* IR is required if there is APIC ID > 255 even when running
 		 * under KVM
 		 */
 		if (max_physical_apicid > 255 ||
-		    !hypervisor_x2apic_available())
-			goto nox2apic;
+		    !hypervisor_x2apic_available()) {
+			if (x2apic_preenabled)
+				disable_x2apic();
+			goto skip_x2apic;
+		}
 		/*
 		 * without IR all CPUs can be addressed by IOAPIC/MSI
 		 * only in physical mode
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void)
 		x2apic_force_phys();
 	}
 
-	if (ret == IRQ_REMAP_XAPIC_MODE)
-		goto nox2apic;
+	if (ret == IRQ_REMAP_XAPIC_MODE) {
+		pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+		goto skip_x2apic;
+	}
 
 	x2apic_enabled = 1;
 
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void)
 		pr_info("Enabled x2apic\n");
 	}
 
-nox2apic:
+skip_x2apic:
 	if (ret < 0) /* IR enabling failed */
 		restore_ioapic_entries();
 	legacy_pic->restore_mask();
 	local_irq_restore(flags);
-
-out:
-	if (x2apic_enabled || !x2apic_supported())
-		return;
-
-	if (x2apic_preenabled)
-		panic("x2apic: enabled by BIOS but kernel init failed.");
-	else if (ret == IRQ_REMAP_XAPIC_MODE)
-		pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
-	else if (ret < 0)
-		pr_info("x2apic not enabled, IRQ remapping init failed\n");
 }
 
 #ifdef CONFIG_X86_64
@@ -1809,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	u32 v;
 
-	exit_idle();
 	irq_enter();
+	exit_idle();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
 	 * if it is a vectored one.  Just in case...
@@ -1846,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs)
 		"Illegal register address",	/* APIC Error Bit 7 */
 	};
 
-	exit_idle();
 	irq_enter();
+	exit_idle();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f7a41e4cae4..8c3cdded6f2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
  * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
  * document number 292116).  So here it goes...
  */
-static void flat_init_apic_ldr(void)
+void flat_init_apic_ldr(void)
 {
 	unsigned long val;
 	unsigned long num, id;
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
 	return initial_apic_id >> index_msb;
 }
 
+static int flat_probe(void)
+{
+	return 1;
+}
+
 static struct apic apic_flat =  {
 	.name				= "flat",
-	.probe				= NULL,
+	.probe				= flat_probe,
 	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
 	.apic_id_registered		= flat_apic_id_registered,
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 00000000000..09d3d8c1cd9
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/apic_flat_64.h>
+
+static int numachip_system __read_mostly;
+
+static struct apic apic_numachip __read_mostly;
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned long value;
+	unsigned int id;
+
+	rdmsrl(MSR_FAM10H_NODE_ID, value);
+	id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xffU) << 24);
+	return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+	return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_registered(void)
+{
+	return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return initial_apic_id >> index_msb;
+}
+
+static const struct cpumask *numachip_target_cpus(void)
+{
+	return cpu_online_mask;
+}
+
+static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+	union numachip_csr_g3_ext_irq_gen int_gen;
+
+	int_gen.s._destination_apic_id = phys_apicid;
+	int_gen.s._vector = 0;
+	int_gen.s._msgtype = APIC_DM_INIT >> 8;
+	int_gen.s._index = 0;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+	int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+	int_gen.s._vector = start_rip >> 12;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+	atomic_set(&init_deasserted, 1);
+	return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+	union numachip_csr_g3_ext_irq_gen int_gen;
+	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+	int_gen.s._destination_apic_id = apicid;
+	int_gen.s._vector = vector;
+	int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+	int_gen.s._index = 0;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		numachip_send_IPI_one(cpu, vector);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+						int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu != this_cpu)
+			numachip_send_IPI_one(cpu, vector);
+	}
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != this_cpu)
+			numachip_send_IPI_one(cpu, vector);
+	}
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+	numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = cpumask_first(cpumask);
+	if (likely((unsigned)cpu < nr_cpu_ids))
+		return per_cpu(x86_cpu_to_apicid, cpu);
+
+	return BAD_APICID;
+}
+
+static unsigned int
+numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				const struct cpumask *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	}
+	return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int __init numachip_probe(void)
+{
+	return apic == &apic_numachip;
+}
+
+static void __init map_csrs(void)
+{
+	printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
+		NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
+	init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+
+	printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
+		NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
+	init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+}
+
+static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+	c->phys_proc_id = node;
+	per_cpu(cpu_llc_id, smp_processor_id()) = node;
+}
+
+static int __init numachip_system_init(void)
+{
+	unsigned int val;
+
+	if (!numachip_system)
+		return 0;
+
+	x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+
+	map_csrs();
+
+	val = read_lcsr(CSR_G0_NODE_IDS);
+	printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
+
+	return 0;
+}
+early_initcall(numachip_system_init);
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strncmp(oem_id, "NUMASC", 6)) {
+		numachip_system = 1;
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct apic apic_numachip __refconst = {
+
+	.name				= "NumaConnect system",
+	.probe				= numachip_probe,
+	.acpi_madt_oem_check		= numachip_acpi_madt_oem_check,
+	.apic_id_registered		= numachip_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= numachip_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= 0,
+	.check_apicid_used		= NULL,
+	.check_apicid_present		= NULL,
+
+	.vector_allocation_domain	= numachip_vector_allocation_domain,
+	.init_apic_ldr			= flat_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.multi_timer_check		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.setup_portio_remap		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.enable_apic_mode		= NULL,
+	.phys_pkg_id			= numachip_phys_pkg_id,
+	.mps_oem_check			= NULL,
+
+	.get_apic_id			= get_apic_id,
+	.set_apic_id			= set_apic_id,
+	.apic_id_mask			= 0xffU << 24,
+
+	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= numachip_send_IPI_mask,
+	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= numachip_send_IPI_allbutself,
+	.send_IPI_all			= numachip_send_IPI_all,
+	.send_IPI_self			= numachip_send_IPI_self,
+
+	.wakeup_secondary_cpu		= numachip_wakeup_secondary,
+	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
+	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
+	.wait_for_init_deassert		= NULL,
+	.smp_callin_clear_local_apic	= NULL,
+	.inquire_remote_apic		= NULL, /* REMRD not supported */
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+};
+apic_driver(apic_numachip);
+
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6d939d7847e..fb072754bc1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 	unsigned vector, me;
 
 	ack_APIC_irq();
-	exit_idle();
 	irq_enter();
+	exit_idle();
 
 	me = smp_processor_id();
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void)
 	}
 	local_irq_disable();
 	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+	if (x2apic_preenabled)
+		apic_printk(APIC_QUIET, KERN_INFO
+			    "Perhaps problem with the pre-enabled x2apic mode\n"
+			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
 	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 		"report.  Then try booting with the 'noapic' option.\n");
 out:
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d3473..5da1269e8dd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
 
 void __init setup_bios_corruption_check(void)
 {
-	u64 addr = PAGE_SIZE;	/* assume first page is reserved anyway */
+	phys_addr_t start, end;
+	u64 i;
 
 	if (memory_corruption_check == -1) {
 		memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
 
 	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
 
-	while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
-		u64 size;
-		addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE);
+	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
+		start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
+				PAGE_SIZE, corruption_check_size);
+		end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
+			      PAGE_SIZE, corruption_check_size);
+		if (start >= end)
+			continue;
 
-		if (addr == MEMBLOCK_ERROR)
-			break;
-
-		if (addr >= corruption_check_size)
-			break;
-
-		if ((addr + size) > corruption_check_size)
-			size = corruption_check_size - addr;
-
-		memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
-		scan_areas[num_scan_areas].addr = addr;
-		scan_areas[num_scan_areas].size = size;
-		num_scan_areas++;
+		memblock_reserve(start, end - start);
+		scan_areas[num_scan_areas].addr = start;
+		scan_areas[num_scan_areas].size = end - start;
 
 		/* Assume we've already mapped this early memory */
-		memset(__va(addr), 0, size);
+		memset(__va(start), 0, end - start);
 
-		addr += size;
+		if (++num_scan_areas >= MAX_SCAN_AREAS)
+			break;
 	}
 
 	if (num_scan_areas)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0bab2b18bb2..f4773f4aae3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 
 static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
 	if (!c->cpu_index)
 		return;
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 
 valid_k7:
 	;
-#endif
 }
 
 static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 	if (node == NUMA_NO_NODE)
 		node = per_cpu(cpu_llc_id, cpu);
 
+	/*
+	 * If core numbers are inconsistent, it's likely a multi-fabric platform,
+	 * so invoke platform-specific handler
+	 */
+	if (c->phys_proc_id != node)
+		x86_cpuinit.fixup_cpu_id(c, node);
+
 	if (!node_online(node)) {
 		/*
 		 * Two possibilities here:
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e58d978e075..159103c0b1f 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 	}
 #ifdef CONFIG_X86_32
 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
-	if (c->x86_model >= 6 && c->x86_model <= 9) {
+	if (c->x86_model >= 6 && c->x86_model <= 13) {
 		rdmsr(MSR_VIA_FCR, lo, hi);
 		lo |= (1<<1 | 1<<7);
 		wrmsr(MSR_VIA_FCR, lo, hi);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa003b13a83..850f2963a42 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	if (this_cpu->c_early_init)
 		this_cpu->c_early_init(c);
 
-#ifdef CONFIG_SMP
 	c->cpu_index = 0;
-#endif
 	filter_cpuid_features(c, false);
 
 	setup_smep(c);
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 		c->apicid = c->initial_apicid;
 # endif
 #endif
-
-#ifdef CONFIG_X86_HT
 		c->phys_proc_id = c->initial_apicid;
-#endif
 	}
 
 	setup_smep(c);
@@ -1141,6 +1136,15 @@ static void dbg_restore_debug_regs(void)
 #endif /* ! CONFIG_KGDB */
 
 /*
+ * Prints an error where the NUMA and configured core-number mismatch and the
+ * platform didn't override this to fix it up
+ */
+void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+	pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
+}
+
+/*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
  * and IDT. We reload them nevertheless, this function acts as a
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1b22dcc51af..8bacc7826fb 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,5 +1,4 @@
 #ifndef ARCH_X86_CPU_H
-
 #define ARCH_X86_CPU_H
 
 struct cpu_model_info {
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
-extern void get_cpu_cap(struct cpuinfo_x86 *c);
-
-#endif
+#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 523131213f0..3e6ff6cbf42 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
 
 static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
 	if (!c->cpu_index)
 		return;
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
 		WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
 				    "with B stepping processors.\n");
 	}
-#endif
 }
 
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 319882ef848..fc4beb39357 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/fs.h>
+#include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/notifier.h>
 #include <linux/kdebug.h>
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 	return NMI_HANDLED;
 }
 
+static void mce_irq_ipi(void *info)
+{
+	int cpu = smp_processor_id();
+	struct mce *m = &__get_cpu_var(injectm);
+
+	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
+			m->inject_flags & MCJ_EXCEPTION) {
+		cpumask_clear_cpu(cpu, mce_inject_cpumask);
+		raise_exception(m, NULL);
+	}
+}
+
 /* Inject mce on current CPU */
 static int raise_local(void)
 {
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m)
 		return;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	if (m->inject_flags & MCJ_NMI_BROADCAST) {
+	if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
 		unsigned long start;
 		int cpu;
+
 		get_online_cpus();
 		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
 		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m)
 			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
 				cpumask_clear_cpu(cpu, mce_inject_cpumask);
 		}
-		if (!cpumask_empty(mce_inject_cpumask))
-			apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
+		if (!cpumask_empty(mce_inject_cpumask)) {
+			if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
+				/*
+				 * don't wait because mce_irq_ipi is necessary
+				 * to be sync with following raise_local
+				 */
+				preempt_disable();
+				smp_call_function_many(mce_inject_cpumask,
+					mce_irq_ipi, NULL, 0);
+				preempt_enable();
+			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
+				apic->send_IPI_mask(mce_inject_cpumask,
+						NMI_VECTOR);
+		}
 		start = jiffies;
 		while (!cpumask_empty(mce_inject_cpumask)) {
 			if (!time_before(jiffies, start + 2*HZ)) {
 				printk(KERN_ERR
-				"Timeout waiting for mce inject NMI %lx\n",
+				"Timeout waiting for mce inject %lx\n",
 					*cpumask_bits(mce_inject_cpumask));
 				break;
 			}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2af127d4c3d..cbe82b5918c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
-/*
- * CPU/chipset specific EDAC code can register a notifier call here to print
- * MCE errors in a human-readable form.
- */
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
-EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
-
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 
 static DEFINE_PER_CPU(struct work_struct, mce_work);
 
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
 {
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m)
 	m->time = get_seconds();
 	m->cpuvendor = boot_cpu_data.x86_vendor;
 	m->cpuid = cpuid_eax(1);
-#ifdef CONFIG_SMP
 	m->socketid = cpu_data(m->extcpu).phys_proc_id;
-#endif
 	m->apicid = cpu_data(m->extcpu).initial_apicid;
 	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
 }
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce)
 	set_bit(0, &mce_need_notify);
 }
 
+static void drain_mcelog_buffer(void)
+{
+	unsigned int next, i, prev = 0;
+
+	next = rcu_dereference_check_mce(mcelog.next);
+
+	do {
+		struct mce *m;
+
+		/* drain what was logged during boot */
+		for (i = prev; i < next; i++) {
+			unsigned long start = jiffies;
+			unsigned retries = 1;
+
+			m = &mcelog.entry[i];
+
+			while (!m->finished) {
+				if (time_after_eq(jiffies, start + 2*retries))
+					retries++;
+
+				cpu_relax();
+
+				if (!m->finished && retries >= 4) {
+					pr_err("MCE: skipping error being logged currently!\n");
+					break;
+				}
+			}
+			smp_rmb();
+			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+		}
+
+		memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
+		prev = next;
+		next = cmpxchg(&mcelog.next, prev, 0);
+	} while (next != prev);
+}
+
+
+void mce_register_decode_chain(struct notifier_block *nb)
+{
+	atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
+	drain_mcelog_buffer();
+}
+EXPORT_SYMBOL_GPL(mce_register_decode_chain);
+
+void mce_unregister_decode_chain(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+
 static void print_mce(struct mce *m)
 {
 	int ret = 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f5474218cff..1d76872b6a4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -64,11 +64,9 @@ struct threshold_bank {
 };
 static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
 
-#ifdef CONFIG_SMP
 static unsigned char shared_bank[NR_BANKS] = {
 	0, 0, 0, 0, 1
 };
-#endif
 
 static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
 
@@ -202,10 +200,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 
 			if (!block)
 				per_cpu(bank_map, cpu) |= (1 << bank);
-#ifdef CONFIG_SMP
 			if (shared_bank[bank] && c->cpu_core_id)
 				break;
-#endif
+
 			offset = setup_APIC_mce(offset,
 						(high & MASK_LVTOFF_HI) >> 20);
 
@@ -531,7 +528,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 	sprintf(name, "threshold_bank%i", bank);
 
-#ifdef CONFIG_SMP
 	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
 		i = cpumask_first(cpu_llc_shared_mask(cpu));
 
@@ -558,7 +554,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 		goto out;
 	}
-#endif
 
 	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
 	if (!b) {
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea..39c6089891e 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device);
 
 #endif /* CONFIG_SYSFS */
 
-/*
- * Set up the most two significant bit to notify mce log that this thermal
- * event type.
- * This is a temp solution. May be changed in the future with mce log
- * infrasture.
- */
-#define CORE_THROTTLED		(0)
-#define CORE_POWER_LIMIT	((__u64)1 << 62)
-#define PACKAGE_THROTTLED	((__u64)2 << 62)
-#define PACKAGE_POWER_LIMIT	((__u64)3 << 62)
-
 static void notify_thresholds(__u64 msr_val)
 {
 	/* check whether the interrupt handler is defined;
@@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void)
 	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
 				THERMAL_THROTTLING_EVENT,
 				CORE_LEVEL) != 0)
-		mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
+		mce_log_therm_throt_event(msr_val);
 
 	if (this_cpu_has(X86_FEATURE_PLN))
-		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
+		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
-					CORE_LEVEL) != 0)
-			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
+					CORE_LEVEL);
 
 	if (this_cpu_has(X86_FEATURE_PTS)) {
 		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
-		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
+		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 					THERMAL_THROTTLING_EVENT,
-					PACKAGE_LEVEL) != 0)
-			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
+					PACKAGE_LEVEL);
 		if (this_cpu_has(X86_FEATURE_PLN))
-			if (therm_throt_process(msr_val &
+			therm_throt_process(msr_val &
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
-					PACKAGE_LEVEL) != 0)
-				mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
-							  | msr_val);
+					PACKAGE_LEVEL);
 	}
 }
 
@@ -397,8 +382,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
 
 asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
 {
-	exit_idle();
 	irq_enter();
+	exit_idle();
 	inc_irq_stat(irq_thermal_count);
 	smp_thermal_vector();
 	irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c..aa578cadb94 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
 
 asmlinkage void smp_threshold_interrupt(void)
 {
-	exit_idle();
 	irq_enter();
+	exit_idle();
 	inc_irq_stat(irq_threshold_count);
 	mce_threshold_vector();
 	irq_exit();
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212a001..5adce1040b1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
 	return event->pmu == &pmu;
 }
 
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+	int	weight;
+	int	event;		/* event index */
+	int	counter;	/* counter index */
+	int	unassigned;	/* number of events to be assigned left */
+	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define	SCHED_STATES_MAX	2
+
+struct perf_sched {
+	int			max_weight;
+	int			max_events;
+	struct event_constraint	**constraints;
+	struct sched_state	state;
+	int			saved_states;
+	struct sched_state	saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+			    int num, int wmin, int wmax)
+{
+	int idx;
+
+	memset(sched, 0, sizeof(*sched));
+	sched->max_events	= num;
+	sched->max_weight	= wmax;
+	sched->constraints	= c;
+
+	for (idx = 0; idx < num; idx++) {
+		if (c[idx]->weight == wmin)
+			break;
+	}
+
+	sched->state.event	= idx;		/* start with min weight */
+	sched->state.weight	= wmin;
+	sched->state.unassigned	= num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+	if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+		return;
+
+	sched->saved[sched->saved_states] = sched->state;
+	sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+	if (!sched->saved_states)
+		return false;
+
+	sched->saved_states--;
+	sched->state = sched->saved[sched->saved_states];
+
+	/* continue with next counter: */
+	clear_bit(sched->state.counter++, sched->state.used);
+
+	return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+	int idx;
+
+	if (!sched->state.unassigned)
+		return false;
+
+	if (sched->state.event >= sched->max_events)
+		return false;
+
+	c = sched->constraints[sched->state.event];
+
+	/* Prefer fixed purpose counters */
+	if (x86_pmu.num_counters_fixed) {
+		idx = X86_PMC_IDX_FIXED;
+		for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+			if (!__test_and_set_bit(idx, sched->state.used))
+				goto done;
+		}
+	}
+	/* Grab the first unused counter starting with idx */
+	idx = sched->state.counter;
+	for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+		if (!__test_and_set_bit(idx, sched->state.used))
+			goto done;
+	}
+
+	return false;
+
+done:
+	sched->state.counter = idx;
+
+	if (c->overlap)
+		perf_sched_save_state(sched);
+
+	return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+	while (!__perf_sched_find_counter(sched)) {
+		if (!perf_sched_restore_state(sched))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+	struct event_constraint *c;
+
+	if (!sched->state.unassigned || !--sched->state.unassigned)
+		return false;
+
+	do {
+		/* next event */
+		sched->state.event++;
+		if (sched->state.event >= sched->max_events) {
+			/* next weight */
+			sched->state.event = 0;
+			sched->state.weight++;
+			if (sched->state.weight > sched->max_weight)
+				return false;
+		}
+		c = sched->constraints[sched->state.event];
+	} while (c->weight != sched->state.weight);
+
+	sched->state.counter = 0;	/* start with first counter */
+
+	return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+			      int wmin, int wmax, int *assign)
+{
+	struct perf_sched sched;
+
+	perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+	do {
+		if (!perf_sched_find_counter(&sched))
+			break;	/* failed */
+		if (assign)
+			assign[sched.state.event] = sched.state.counter;
+	} while (perf_sched_next_event(&sched));
+
+	return sched.state.unassigned;
+}
+
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int i, j, w, wmax, num = 0;
+	int i, wmin, wmax, num = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-	for (i = 0; i < n; i++) {
+	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
 		constraints[i] = c;
+		wmin = min(wmin, c->weight);
+		wmax = max(wmax, c->weight);
 	}
 
 	/*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (assign)
 			assign[i] = hwc->idx;
 	}
-	if (i == n)
-		goto done;
-
-	/*
-	 * begin slow path
-	 */
-
-	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-	/*
-	 * weight = number of possible counters
-	 *
-	 * 1    = most constrained, only works on one counter
-	 * wmax = least constrained, works on any counter
-	 *
-	 * assign events to counters starting with most
-	 * constrained events.
-	 */
-	wmax = x86_pmu.num_counters;
+	/* slow path */
+	if (i != n)
+		num = perf_assign_events(constraints, n, wmin, wmax, assign);
 
 	/*
-	 * when fixed event counters are present,
-	 * wmax is incremented by 1 to account
-	 * for one more choice
-	 */
-	if (x86_pmu.num_counters_fixed)
-		wmax++;
-
-	for (w = 1, num = n; num && w <= wmax; w++) {
-		/* for each event */
-		for (i = 0; num && i < n; i++) {
-			c = constraints[i];
-			hwc = &cpuc->event_list[i]->hw;
-
-			if (c->weight != w)
-				continue;
-
-			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
-				if (!test_bit(j, used_mask))
-					break;
-			}
-
-			if (j == X86_PMC_IDX_MAX)
-				break;
-
-			__set_bit(j, used_mask);
-
-			if (assign)
-				assign[i] = j;
-			num--;
-		}
-	}
-done:
-	/*
 	 * scheduling failed or is just a simulation,
 	 * free resources if necessary
 	 */
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
 
 static int __init init_hw_perf_events(void)
 {
+	struct x86_pmu_quirk *quirk;
 	struct event_constraint *c;
 	int err;
 
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
-	if (x86_pmu.quirks)
-		x86_pmu.quirks();
+	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+		quirk->func();
 
 	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
 		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-				   0, x86_pmu.num_counters);
+				   0, x86_pmu.num_counters, 0);
 
 	if (x86_pmu.event_constraints) {
+		/*
+		 * event on fixed counter2 (REF_CYCLES) only works on this
+		 * counter, so do not extend mask to generic counters
+		 */
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != X86_RAW_EVENT_MASK)
+			if (c->cmask != X86_RAW_EVENT_MASK
+			    || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
 				continue;
+			}
 
 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
 			c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 
 	return misc;
 }
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+	cap->version		= x86_pmu.version;
+	cap->num_counters_gp	= x86_pmu.num_counters;
+	cap->num_counters_fixed	= x86_pmu.num_counters_fixed;
+	cap->bit_width_gp	= x86_pmu.cntval_bits;
+	cap->bit_width_fixed	= x86_pmu.cntval_bits;
+	cap->events_mask	= (unsigned int)x86_pmu.events_maskl;
+	cap->events_mask_len	= x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4..8944062f46e 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
 	u64	code;
 	u64	cmask;
 	int	weight;
+	int	overlap;
 };
 
 struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
 	void				*kfree_on_online;
 };
 
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
 	{ .idxmsk64 = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = (w),			\
+	.overlap = (o),			\
 }
 
 #define EVENT_CONSTRAINT(c, n, m)	\
-	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
 
 /*
  * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
 	u64	capabilities;
 };
 
+struct x86_pmu_quirk {
+	struct x86_pmu_quirk *next;
+	void (*func)(void);
+};
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -259,6 +290,11 @@ struct x86_pmu {
 	int		num_counters_fixed;
 	int		cntval_bits;
 	u64		cntval_mask;
+	union {
+			unsigned long events_maskl;
+			unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+	};
+	int		events_mask_len;
 	int		apic;
 	u64		max_period;
 	struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
-	void		(*quirks)(void);
+	struct x86_pmu_quirk *quirks;
 	int		perfctr_second_write;
 
 	int		(*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
+#define x86_add_quirk(func_)						\
+do {									\
+	static struct x86_pmu_quirk __quirk __initdata = {		\
+		.func = func_,						\
+	};								\
+	__quirk.next = x86_pmu.quirks;					\
+	x86_pmu.quirks = &__quirk;					\
+} while (0)
+
 #define ERF_NO_HT_SHARING	1
 #define ERF_HAS_RSP_1		2
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a..0397b23be8e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
 static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 121f1be4da1..3bd37bdf1b8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
   [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+  [PERF_COUNT_HW_REF_CPU_CYCLES]	= 0x0300, /* pseudo-encoding */
 };
 
 static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-	/*
-	 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
-	 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
-	 * ratio between these counters.
-	 */
-	/* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
 	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
 	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 	EVENT_CONSTRAINT_END
 };
 
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.guest_get_msrs		= intel_guest_get_msrs,
 };
 
-static void intel_clovertown_quirks(void)
+static __init void intel_clovertown_quirk(void)
 {
 	/*
 	 * PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
-static void intel_sandybridge_quirks(void)
+static __init void intel_sandybridge_quirk(void)
 {
 	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
 	x86_pmu.pebs = 0;
 	x86_pmu.pebs_constraints = NULL;
 }
 
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+	int bit;
+
+	/* disable event that reported as not presend by cpuid */
+	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+		printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
+				intel_arch_events_map[bit].name);
+	}
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+	union cpuid10_ebx ebx;
+
+	ebx.full = x86_pmu.events_maskl;
+	if (ebx.split.no_branch_misses_retired) {
+		/*
+		 * Erratum AAJ80 detected, we work it around by using
+		 * the BR_MISP_EXEC.ANY event. This will over-count
+		 * branch-misses, but it's still much better than the
+		 * architectural event which is often completely bogus:
+		 */
+		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+		ebx.split.no_branch_misses_retired = 0;
+		x86_pmu.events_maskl = ebx.full;
+		printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+	}
+}
+
 __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
+	union cpuid10_ebx ebx;
 	unsigned int unused;
-	unsigned int ebx;
 	int version;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
 	 * Check whether the Architectural PerfMon supports
 	 * Branch Misses Retired hw_event or not.
 	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
 		return -ENODEV;
 
 	version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
 	x86_pmu.cntval_bits		= eax.split.bit_width;
 	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
 
+	x86_pmu.events_maskl		= ebx.full;
+	x86_pmu.events_mask_len		= eax.split.mask_length;
+
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
 
 	intel_ds_init();
 
+	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
 	/*
 	 * Install the hw-cache-events table:
 	 */
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-		x86_pmu.quirks = intel_clovertown_quirks;
+		x86_add_quirk(intel_clovertown_quirk);
 	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
 	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
 	case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
 		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
-		if (ebx & 0x40) {
-			/*
-			 * Erratum AAJ80 detected, we work it around by using
-			 * the BR_MISP_EXEC.ANY event. This will over-count
-			 * branch-misses, but it's still much better than the
-			 * architectural event which is often completely bogus:
-			 */
-			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+		x86_add_quirk(intel_nehalem_quirk);
 
-			pr_cont("erratum AAJ80 worked around, ");
-		}
 		pr_cont("Nehalem events, ");
 		break;
 
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 42: /* SandyBridge */
-		x86_pmu.quirks = intel_sandybridge_quirks;
+		x86_add_quirk(intel_sandybridge_quirk);
 	case 45: /* SandyBridge, "Romely-EP" */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
 			break;
 		}
 	}
+
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 5abbea297e0..7b3fe56b1c2 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
 	"100mhzsteps",
 	"hwpstate",
 	"",	/* tsc invariant mapped to constant_tsc */
-		/* nothing */
+	"cpb",  /* core performance boost */
+	"eff_freq_ro", /* Readonly aperf/mperf */
 };
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 14b23140e81..8022c668148 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
-	unsigned int cpu = 0;
+	unsigned int cpu;
 	int i;
 
-#ifdef CONFIG_SMP
 	cpu = c->cpu_index;
-#endif
 	seq_printf(m, "processor\t: %u\n"
 		   "vendor_id\t: %s\n"
 		   "cpu family\t: %d\n"
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f07..8071e2f3d6e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
 /*
  * pre allocated 4k and reserved it in memblock and e820_saved
  */
-u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
+u64 __init early_reserve_e820(u64 size, u64 align)
 {
-	u64 size = 0;
 	u64 addr;
-	u64 start;
 
-	for (start = startt; ; start += size) {
-		start = memblock_x86_find_in_range_size(start, &size, align);
-		if (start == MEMBLOCK_ERROR)
-			return 0;
-		if (size >= sizet)
-			break;
+	addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+	if (addr) {
+		e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
+		printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+		update_e820_saved();
 	}
 
-#ifdef CONFIG_X86_32
-	if (start >= MAXMEM)
-		return 0;
-	if (start + size > MAXMEM)
-		size = MAXMEM - start;
-#endif
-
-	addr = round_down(start + size - sizet, align);
-	if (addr < start)
-		return 0;
-	memblock_x86_reserve_range(addr, addr + sizet, "new next");
-	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
-	printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
-	update_e820_saved();
-
 	return addr;
 }
 
@@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void)
 	 * We are safe to enable resizing, beause memblock_x86_fill()
 	 * is rather later for x86
 	 */
-	memblock_can_resize = 1;
+	memblock_allow_resize();
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
@@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void)
 		memblock_add(ei->addr, ei->size);
 	}
 
-	memblock_analyze();
 	memblock_dump_all();
 }
 
 void __init memblock_find_dma_reserve(void)
 {
 #ifdef CONFIG_X86_64
-	u64 free_size_pfn;
-	u64 mem_size_pfn;
+	u64 nr_pages = 0, nr_free_pages = 0;
+	unsigned long start_pfn, end_pfn;
+	phys_addr_t start, end;
+	int i;
+	u64 u;
+
 	/*
 	 * need to find out used area below MAX_DMA_PFN
 	 * need to use memblock to get free size in [0, MAX_DMA_PFN]
 	 * at first, and assume boot_mem will not take below MAX_DMA_PFN
 	 */
-	mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
-	free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
-	set_dma_reserve(mem_size_pfn - free_size_pfn);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+		start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
+		end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
+		nr_pages += end_pfn - start_pfn;
+	}
+
+	for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
+		start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
+		end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
+		if (start_pfn < end_pfn)
+			nr_free_pages += end_pfn - start_pfn;
+	}
+
+	set_dma_reserve(nr_pages - nr_free_pages);
 #endif
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f3f6f534400..22d0e21b4dd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -625,6 +625,8 @@ work_notifysig:				# deal with pending signals and
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %edx, %edx
 	call do_notify_resume
 	jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
 #else
 	movl %esp, %eax
 #endif
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	xorl %edx, %edx
 	call do_notify_resume
 	jmp resume_userspace_sig
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0..a20e1cb9dc8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
 	/*CFI_REL_OFFSET	ss,0*/
 	pushq_cfi %rax /* rsp */
 	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
 	/*CFI_REL_OFFSET	rflags,0*/
 	pushq_cfi $__KERNEL_CS /* cs */
 	/*CFI_REL_OFFSET	cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	je   int_ret_from_sys_call
+	jz   retint_restore_args
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
 	 * after the swapgs, so that it can do the swapgs
 	 * for the guest and jump here on syscall.
 	 */
-ENTRY(system_call_after_swapgs)
+GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(old_rsp)
 	movq	PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	GET_THREAD_INFO(%rcx)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
 system_call_fastpath:
 	cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
 	/* edi:	flagmask */
 sysret_check:
 	LOCKDEP_SYS_EXIT
-	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl TI_flags(%rcx),%edx
+	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
 	andl %edi,%edx
 	jnz  sysret_careful
 	CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
 	/* Do syscall tracing */
 tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz auditsys
 #endif
 	SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
 GLOBAL(int_ret_from_sys_call)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $3,CS-ARGOFFSET(%rsp)
-	je retint_restore_args
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
 GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
 ENTRY(\sym)
 	INTR_FRAME
 	pushq_cfi $~(\num)
+.Lcommon_\sym:
 	interrupt \do_sym
 	jmp ret_from_intr
 	CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_SMP
-.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+	ALIGN
+	INTR_FRAME
+.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
 	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 .if NUM_INVALIDATE_TLB_VECTORS > \idx
-apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
-	invalidate_interrupt\idx smp_invalidate_interrupt
+ENTRY(invalidate_interrupt\idx)
+	pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
+	jmp .Lcommon_invalidate_interrupt0
+	CFI_ADJUST_CFA_OFFSET -8
+END(invalidate_interrupt\idx)
 .endif
 .endr
+	CFI_ENDPROC
+apicinterrupt INVALIDATE_TLB_VECTOR_START, \
+	invalidate_interrupt0, smp_invalidate_interrupt
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48c..48d9d4ea102 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
 		lowmem = 0x9f000;
 
 	/* reserve all memory between lowmem and the 1MB mark */
-	memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
+	memblock_reserve(lowmem, 0x100000 - lowmem);
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a..51ff18616d5 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
 
 void __init i386_start_kernel(void)
 {
-	memblock_init();
-
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
 		u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 		u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 		u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 	}
 #endif
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272ad..3a3b779f41d 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
 {
 	copy_bootdata(__va(real_mode_data));
 
-	memblock_init();
-
-	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+	memblock_reserve(__pa_symbol(&_text),
+			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
 		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
 		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
 		unsigned long ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
+		memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 	}
 #endif
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1bb0bf4d92c..07b0a56a754 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -32,8 +32,6 @@
 #define HPET_MIN_CYCLES			128
 #define HPET_MIN_PROG_DELTA		(HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
 
-#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
-
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
@@ -55,6 +53,11 @@ struct hpet_dev {
 	char				name[10];
 };
 
+inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+{
+	return container_of(evtdev, struct hpet_dev, evt);
+}
+
 inline unsigned int hpet_readl(unsigned int a)
 {
 	return readl(hpet_virt_address + a);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924..7943e0c21bd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
 	seq_printf(p, "  IRQ work interrupts\n");
+	seq_printf(p, "%*s: ", prec, "RTR");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+	seq_printf(p, "  APIC ICR read retries\n");
 #endif
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_spurious_count;
 	sum += irq_stats(cpu)->apic_perf_irqs;
 	sum += irq_stats(cpu)->apic_irq_work_irqs;
+	sum += irq_stats(cpu)->icr_read_retry_count;
 #endif
 	if (x86_platform_ipi_callback)
 		sum += irq_stats(cpu)->x86_platform_ipis;
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	unsigned vector = ~regs->orig_ax;
 	unsigned irq;
 
-	exit_idle();
 	irq_enter();
+	exit_idle();
 
 	irq = __this_cpu_read(vector_irq[vector]);
 
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
 
 	ack_APIC_irq();
 
-	exit_idle();
-
 	irq_enter();
 
+	exit_idle();
+
 	inc_irq_stat(x86_platform_ipis);
 
 	if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f13e..2889b3d4388 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	put_online_cpus();
 }
 
-void arch_jump_label_transform_static(struct jump_entry *entry,
+__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
 				      enum jump_label_type type)
 {
 	__jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index d494799aafc..fe86493f3ed 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -1,14 +1,18 @@
 /*
  *  AMD CPU Microcode Update Driver for Linux
- *  Copyright (C) 2008 Advanced Micro Devices Inc.
+ *  Copyright (C) 2008-2011 Advanced Micro Devices Inc.
  *
  *  Author: Peter Oruba <peter.oruba@amd.com>
  *
  *  Based on work by:
  *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
  *
- *  This driver allows to upgrade microcode on AMD
- *  family 0x10 and 0x11 processors.
+ *  Maintainers:
+ *  Andreas Herrmann <andreas.herrmann3@amd.com>
+ *  Borislav Petkov <borislav.petkov@amd.com>
+ *
+ *  This driver allows to upgrade microcode on F10h AMD
+ *  CPUs and later.
  *
  *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
@@ -71,6 +75,9 @@ struct microcode_amd {
 
 static struct equiv_cpu_entry *equiv_cpu_table;
 
+/* page-sized ucode patch buffer */
+void *patch;
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -86,27 +93,76 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 	return 0;
 }
 
-static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
-				  int rev)
+static unsigned int verify_ucode_size(int cpu, u32 patch_size,
+				      unsigned int size)
 {
-	unsigned int current_cpu_id;
-	u16 equiv_cpu_id = 0;
-	unsigned int i = 0;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	u32 max_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+	switch (c->x86) {
+	case 0x14:
+		max_size = F14H_MPB_MAX_SIZE;
+		break;
+	case 0x15:
+		max_size = F15H_MPB_MAX_SIZE;
+		break;
+	default:
+		max_size = F1XH_MPB_MAX_SIZE;
+		break;
+	}
+
+	if (patch_size > min_t(u32, size, max_size)) {
+		pr_err("patch size mismatch\n");
+		return 0;
+	}
+
+	return patch_size;
+}
+
+static u16 find_equiv_id(void)
+{
+	unsigned int current_cpu_id, i = 0;
 
 	BUG_ON(equiv_cpu_table == NULL);
+
 	current_cpu_id = cpuid_eax(0x00000001);
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
-			equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
-			break;
-		}
+		if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
+			return equiv_cpu_table[i].equiv_cpu;
+
 		i++;
 	}
+	return 0;
+}
 
+/*
+ * we signal a good patch is found by returning its size > 0
+ */
+static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
+				  unsigned int leftover_size, int rev,
+				  unsigned int *current_size)
+{
+	struct microcode_header_amd *mc_hdr;
+	unsigned int actual_size;
+	u16 equiv_cpu_id;
+
+	/* size of the current patch we're staring at */
+	*current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE;
+
+	equiv_cpu_id = find_equiv_id();
 	if (!equiv_cpu_id)
 		return 0;
 
+	/*
+	 * let's look at the patch header itself now
+	 */
+	mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
+
 	if (mc_hdr->processor_rev_id != equiv_cpu_id)
 		return 0;
 
@@ -120,7 +176,20 @@ static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
 	if (mc_hdr->patch_id <= rev)
 		return 0;
 
-	return 1;
+	/*
+	 * now that the header looks sane, verify its size
+	 */
+	actual_size = verify_ucode_size(cpu, *current_size, leftover_size);
+	if (!actual_size)
+		return 0;
+
+	/* clear the patch buffer */
+	memset(patch, 0, PAGE_SIZE);
+
+	/* all looks ok, get the binary patch */
+	get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
+
+	return actual_size;
 }
 
 static int apply_microcode_amd(int cpu)
@@ -155,63 +224,6 @@ static int apply_microcode_amd(int cpu)
 	return 0;
 }
 
-static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	u32 max_size, actual_size;
-
-#define F1XH_MPB_MAX_SIZE 2048
-#define F14H_MPB_MAX_SIZE 1824
-#define F15H_MPB_MAX_SIZE 4096
-
-	switch (c->x86) {
-	case 0x14:
-		max_size = F14H_MPB_MAX_SIZE;
-		break;
-	case 0x15:
-		max_size = F15H_MPB_MAX_SIZE;
-		break;
-	default:
-		max_size = F1XH_MPB_MAX_SIZE;
-		break;
-	}
-
-	actual_size = *(u32 *)(buf + 4);
-
-	if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) {
-		pr_err("section size mismatch\n");
-		return 0;
-	}
-
-	return actual_size;
-}
-
-static struct microcode_header_amd *
-get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
-{
-	struct microcode_header_amd *mc = NULL;
-	unsigned int actual_size = 0;
-
-	if (*(u32 *)buf != UCODE_UCODE_TYPE) {
-		pr_err("invalid type field in container file section header\n");
-		goto out;
-	}
-
-	actual_size = verify_ucode_size(cpu, buf, size);
-	if (!actual_size)
-		goto out;
-
-	mc = vzalloc(actual_size);
-	if (!mc)
-		goto out;
-
-	get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size);
-	*mc_size = actual_size + SECTION_HDR_SIZE;
-
-out:
-	return mc;
-}
-
 static int install_equiv_cpu_table(const u8 *buf)
 {
 	unsigned int *ibuf = (unsigned int *)buf;
@@ -247,36 +259,38 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct microcode_header_amd *mc_hdr = NULL;
-	unsigned int mc_size, leftover;
+	unsigned int mc_size, leftover, current_size = 0;
 	int offset;
 	const u8 *ucode_ptr = data;
 	void *new_mc = NULL;
 	unsigned int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_OK;
+	enum ucode_state state = UCODE_ERROR;
 
 	offset = install_equiv_cpu_table(ucode_ptr);
 	if (offset < 0) {
 		pr_err("failed to create equivalent cpu table\n");
-		return UCODE_ERROR;
+		goto out;
 	}
-
 	ucode_ptr += offset;
 	leftover = size - offset;
 
-	while (leftover) {
-		mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
-		if (!mc_hdr)
-			break;
+	if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
+		pr_err("invalid type field in container file section header\n");
+		goto free_table;
+	}
 
-		if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
-			vfree(new_mc);
+	while (leftover) {
+		mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
+						 new_rev, &current_size);
+		if (mc_size) {
+			mc_hdr  = patch;
+			new_mc  = patch;
 			new_rev = mc_hdr->patch_id;
-			new_mc  = mc_hdr;
-		} else
-			vfree(mc_hdr);
+			goto out_ok;
+		}
 
-		ucode_ptr += mc_size;
-		leftover  -= mc_size;
+		ucode_ptr += current_size;
+		leftover  -= current_size;
 	}
 
 	if (!new_mc) {
@@ -284,19 +298,16 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
 		goto free_table;
 	}
 
-	if (!leftover) {
-		vfree(uci->mc);
-		uci->mc = new_mc;
-		pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
-			 cpu, uci->cpu_sig.rev, new_rev);
-	} else {
-		vfree(new_mc);
-		state = UCODE_ERROR;
-	}
+out_ok:
+	uci->mc = new_mc;
+	state = UCODE_OK;
+	pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+		 cpu, uci->cpu_sig.rev, new_rev);
 
 free_table:
 	free_equiv_cpu_table();
 
+out:
 	return state;
 }
 
@@ -337,7 +348,6 @@ static void microcode_fini_cpu_amd(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	vfree(uci->mc);
 	uci->mc = NULL;
 }
 
@@ -351,5 +361,14 @@ static struct microcode_ops microcode_amd_ops = {
 
 struct microcode_ops * __init init_amd_microcode(void)
 {
+	patch = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!patch)
+		return NULL;
+
 	return &microcode_amd_ops;
 }
+
+void __exit exit_amd_microcode(void)
+{
+	free_page((unsigned long)patch);
+}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9d46f5e43b5..9302e2d0eb4 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -563,6 +563,8 @@ module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
 	microcode_dev_exit();
 
 	unregister_hotcpu_notifier(&mc_cpu_notifier);
@@ -580,6 +582,9 @@ static void __exit microcode_exit(void)
 
 	microcode_ops = NULL;
 
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		exit_amd_microcode();
+
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
 module_exit(microcode_exit);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0741b062a30..ca470e4c92d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
 
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
 {
-	unsigned long size = get_mpc_size(mpf->physptr);
-
-	memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
+	memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
 }
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			       mpf, (u64)virt_to_phys(mpf));
 
 			mem = virt_to_phys(mpf);
-			memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
+			memblock_reserve(mem, sizeof(*mpf));
 			if (mpf->physptr)
 				smp_reserve_memory(mpf);
 
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
 
 void __init early_reserve_e820_mpc_new(void)
 {
-	if (enable_update_mptable && alloc_mptable) {
-		u64 startt = 0;
-		mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
-	}
+	if (enable_update_mptable && alloc_mptable)
+		mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
 }
 
 static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ee5d4fbd53b..15763af7bfe 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	regs.orig_ax = -1;
 	regs.ip = (unsigned long) kernel_thread_helper;
 	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | 0x2;
+	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
 
 	/* Ok, create the new process.. */
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c..485204f58cd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
+		rcu_idle_enter();
 		while (!need_resched()) {
 
 			check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
 				pm_idle();
 			start_critical_timings();
 		}
-		tick_nohz_restart_sched_tick();
+		rcu_idle_exit();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf3..9b9fe4a85c8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		tick_nohz_stop_sched_tick(1);
+		tick_nohz_idle_enter();
 		while (!need_resched()) {
 
 			rmb();
@@ -139,8 +139,14 @@ void cpu_idle(void)
 			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
+
+			/* enter_idle() needs rcu for notifiers */
+			rcu_idle_enter();
+
 			if (cpuidle_idle_call())
 				pm_idle();
+
+			rcu_idle_exit();
 			start_critical_timings();
 
 			/* In many cases the interrupt that ended idle
@@ -149,7 +155,7 @@ void cpu_idle(void)
 			__exit_idle();
 		}
 
-		tick_nohz_restart_sched_tick();
+		tick_nohz_idle_exit();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -293,13 +299,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
-		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
+						  IO_BITMAP_BYTES, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) {
 			p->thread.io_bitmap_max = 0;
 			return -ENOMEM;
 		}
-		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
-				IO_BITMAP_BYTES);
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
 	}
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 82528799c5d..89a04c7b5bb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -749,7 +749,8 @@ put:
 /*
  * Handle PTRACE_POKEUSR calls for the debug register area.
  */
-int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+static int ptrace_set_debugreg(struct task_struct *tsk, int n,
+			       unsigned long val)
 {
 	struct thread_struct *thread = &(tsk->thread);
 	int rc = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6..d05444ac2ae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
-		memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
+		memblock_reserve(__pa(_brk_start),
+				 __pa(_brk_end) - __pa(_brk_start));
 
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
 	ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
 					 PAGE_SIZE);
 
-	if (ramdisk_here == MEMBLOCK_ERROR)
+	if (!ramdisk_here)
 		panic("Cannot find place for new RAMDISK of size %lld\n",
 			 ramdisk_size);
 
 	/* Note: this includes all the lowmem currently occupied by
 	   the initrd, we rely on that fact to keep the data intact. */
-	memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
+	memblock_reserve(ramdisk_here, area_size);
 	initrd_start = ramdisk_here + PAGE_OFFSET;
 	initrd_end   = initrd_start + ramdisk_size;
 	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
 	initrd_start = 0;
 
 	if (ramdisk_size >= (end_of_lowmem>>1)) {
-		memblock_x86_free_range(ramdisk_image, ramdisk_end);
+		memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 		printk(KERN_ERR "initrd too large to handle, "
 		       "disabling initrd\n");
 		return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
 
 	relocate_initrd();
 
-	memblock_x86_free_range(ramdisk_image, ramdisk_end);
+	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
 #else
 static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 {
 	struct setup_data *data;
 	u64 pa_data;
-	char buf[32];
 
 	if (boot_params.hdr.version < 0x0209)
 		return;
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
 		data = early_memremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
 		early_iounmap(data, sizeof(*data));
 	}
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
 		crash_base = memblock_find_in_range(alignment,
 			       CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
 
-		if (crash_base == MEMBLOCK_ERROR) {
+		if (!crash_base) {
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
 			return;
 		}
 	}
-	memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
+	memblock_reserve(crash_base, crash_size);
 
 	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
 			"for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
 	addr = find_ibft_region(&size);
 
 	if (size)
-		memblock_x86_reserve_range(addr, addr + size, "* ibft");
+		memblock_reserve(addr, size);
 }
 
 static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb4a95..e38e21754ee 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -840,7 +840,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
 	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
-	    !physid_isset(apicid, phys_cpu_present_map)) {
+	    !physid_isset(apicid, phys_cpu_present_map) ||
+	    (!x2apic_mode && apicid >= 255)) {
 		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
 		return -EINVAL;
 	}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b4..a73b61055ad 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
 
 	/* Has to be in very low memory so we can execute real-mode AP code. */
 	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-	if (mem == MEMBLOCK_ERROR)
+	if (!mem)
 		panic("Cannot allocate trampoline\n");
 
 	x86_trampoline_base = __va(mem);
-	memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE");
+	memblock_reserve(mem, size);
 
 	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
 	       x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a8e3eb83466..fa1191fb679 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 			== NOTIFY_STOP)
 		return;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-#ifdef CONFIG_KPROBES
+
 	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 			== NOTIFY_STOP)
 		return;
-#else
-	if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
-			== NOTIFY_STOP)
-		return;
-#endif
 
 	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index db483369f10..2c9cf0fd78f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable;
    erroneous rdtsc usage on !cpu_has_tsc processors */
 static int __read_mostly tsc_disabled = -1;
 
-static int tsc_clocksource_reliable;
+int tsc_clocksource_reliable;
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
 }
 
 #define CAL_MS		10
-#define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_LATCH	(PIT_TICK_RATE / (1000 / CAL_MS))
 #define CAL_PIT_LOOPS	1000
 
 #define CAL2_MS		50
-#define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_LATCH	(PIT_TICK_RATE / (1000 / CAL2_MS))
 #define CAL2_PIT_LOOPS	5000
 
 
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0aa5fed8b9e..9eba29b46cb 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
-	if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+	if (tsc_clocksource_reliable) {
 		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
 			pr_info(
 			"Skipped synchronization checks as TSC is reliable.\n");
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void)
 {
 	int cpus = 2;
 
-	if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+	if (unsynchronized_tsc() || tsc_clocksource_reliable)
 		return;
 
 	/*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b9..b07ba939356 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
 	.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
 };
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
 
 static int __init vsyscall_setup(char *str)
 {
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
 	return nr;
 }
 
+static bool write_ok_or_segv(unsigned long ptr, size_t size)
+{
+	/*
+	 * XXX: if access_ok, get_user, and put_user handled
+	 * sig_on_uaccess_error, this could go away.
+	 */
+
+	if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
+		siginfo_t info;
+		struct thread_struct *thread = &current->thread;
+
+		thread->error_code	= 6;  /* user fault, no page, write */
+		thread->cr2		= ptr;
+		thread->trap_no		= 14;
+
+		memset(&info, 0, sizeof(info));
+		info.si_signo		= SIGSEGV;
+		info.si_errno		= 0;
+		info.si_code		= SEGV_MAPERR;
+		info.si_addr		= (void __user *)ptr;
+
+		force_sig_info(SIGSEGV, &info, current);
+		return false;
+	} else {
+		return true;
+	}
+}
+
 bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 {
 	struct task_struct *tsk;
 	unsigned long caller;
 	int vsyscall_nr;
+	int prev_sig_on_uaccess_error;
 	long ret;
 
 	/*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	if (seccomp_mode(&tsk->seccomp))
 		do_exit(SIGKILL);
 
+	/*
+	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
+	 * preserve that behavior to make writing exploits harder.
+	 */
+	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+	current_thread_info()->sig_on_uaccess_error = 1;
+
+	/*
+	 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
+	 * 64-bit, so we don't need to special-case it here.  For all the
+	 * vsyscalls, 0 means "don't write anything" not "write it at
+	 * address 0".
+	 */
+	ret = -EFAULT;
 	switch (vsyscall_nr) {
 	case 0:
+		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+		    !write_ok_or_segv(regs->si, sizeof(struct timezone)))
+			break;
+
 		ret = sys_gettimeofday(
 			(struct timeval __user *)regs->di,
 			(struct timezone __user *)regs->si);
 		break;
 
 	case 1:
+		if (!write_ok_or_segv(regs->di, sizeof(time_t)))
+			break;
+
 		ret = sys_time((time_t __user *)regs->di);
 		break;
 
 	case 2:
+		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+		    !write_ok_or_segv(regs->si, sizeof(unsigned)))
+			break;
+
 		ret = sys_getcpu((unsigned __user *)regs->di,
 				 (unsigned __user *)regs->si,
 				 0);
 		break;
 	}
 
+	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+
 	if (ret == -EFAULT) {
-		/*
-		 * Bad news -- userspace fed a bad pointer to a vsyscall.
-		 *
-		 * With a real vsyscall, that would have caused SIGSEGV.
-		 * To make writing reliable exploits using the emulated
-		 * vsyscalls harder, generate SIGSEGV here as well.
-		 */
+		/* Bad news -- userspace fed a bad pointer to a vsyscall. */
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall fault (exploit attempt?)");
-		goto sigsegv;
+
+		/*
+		 * If we failed to generate a signal for any reason,
+		 * generate one here.  (This should be impossible.)
+		 */
+		if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
+				 !sigismember(&tsk->pending.signal, SIGSEGV)))
+			goto sigsegv;
+
+		return true;  /* Don't emulate the ret. */
 	}
 
 	regs->ax = ret;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c1d6cd54939..91f83e21b98 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
 
 struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
 	.setup_percpu_clockev		= setup_secondary_APIC_clock,
+	.fixup_cpu_id			= x86_default_fixup_cpu_id,
 };
 
 static void default_nmi_init(void) { };
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 46fc4ee09fc..88ad5fbda6e 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
 	const insn_attr_t *table;
 	if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
 		return 0;
-	table = inat_avx_tables[vex_m][vex_p];
+	/* At first, this checks the master table */
+	table = inat_avx_tables[vex_m][0];
 	if (!table)
 		return 0;
+	if (!inat_is_group(table[opcode]) && vex_p) {
+		/* If this is not a group, get attribute directly */
+		table = inat_avx_tables[vex_m][vex_p];
+		if (!table)
+			return 0;
+	}
 	return table[opcode];
 }
 
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 374562ed670..5a1f9f3e3fb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
 		m = insn_vex_m_bits(insn);
 		p = insn_vex_p_bits(insn);
 		insn->attr = inat_get_avx_attribute(op, m, p);
-		if (!inat_accept_vex(insn->attr))
+		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
 			insn->attr = 0;	/* This instruction is bad */
 		goto end;	/* VEX has only 1 byte for opcode */
 	}
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
 			pfx = insn_last_prefix(insn);
 			insn->attr = inat_get_group_attribute(mod, pfx,
 							      insn->attr);
+			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+				insn->attr = 0;	/* This is bad */
 		}
 	}
 
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 82004d2bf05..bd59090825d 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
 size_t strlen(const char *s)
 {
 	int d0;
-	int res;
+	size_t res;
 	asm volatile("repne\n\t"
-		"scasb\n\t"
-		"notl %0\n\t"
-		"decl %0"
+		"scasb"
 		: "=c" (res), "=&D" (d0)
 		: "1" (s), "a" (0), "0" (0xffffffffu)
 		: "memory");
-	return res;
+	return ~res - 1;
 }
 EXPORT_SYMBOL(strlen);
 #endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index a793da5e560..5b83c51c12e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,5 +1,11 @@
 # x86 Opcode Maps
 #
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
+#   (#325383-040US, October 2011)
+# - Intel(R) Advanced Vector Extensions Programming Reference
+#   (#319433-011,JUNE 2011).
+#
 #<Opcode maps>
 # Table: table-name
 # Referrer: escaped-name
@@ -15,10 +21,13 @@
 # EndTable
 #
 # AVX Superscripts
-#  (VEX): this opcode can accept VEX prefix.
-#  (oVEX): this opcode requires VEX prefix.
-#  (o128): this opcode only supports 128bit VEX.
-#  (o256): this opcode only supports 256bit VEX.
+#  (v): this opcode requires VEX prefix.
+#  (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+#  - (66): the last prefix is 0x66
+#  - (F3): the last prefix is 0xF3
+#  - (F2): the last prefix is 0xF2
 #
 
 Table: one byte opcode
@@ -199,8 +208,8 @@ a0: MOV AL,Ob
 a1: MOV rAX,Ov
 a2: MOV Ob,AL
 a3: MOV Ov,rAX
-a4: MOVS/B Xb,Yb
-a5: MOVS/W/D/Q Xv,Yv
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
 a6: CMPS/B Xb,Yb
 a7: CMPS/W/D Xv,Yv
 a8: TEST AL,Ib
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A)
 c1: Grp2 Ev,Ib (1A)
 c2: RETN Iw (f64)
 c3: RETN
-c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
-c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
 c6: Grp11 Eb,Ib (1A)
 c7: Grp11 Ev,Iz (1A)
 c8: ENTER Iw,Ib
@@ -320,14 +329,19 @@ AVXcode: 1
 # 3DNow! uses the last imm byte as opcode extension.
 0f: 3DNow! Pq,Qq,Ib
 # 0x0f 0x10-0x1f
-10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
-11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
-12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
-13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
-14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
-15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
-16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
-17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
 18: Grp16 (1A)
 19:
 1a:
@@ -345,14 +359,14 @@ AVXcode: 1
 25:
 26:
 27:
-28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
-29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
-2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
-2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
-2c: cvttps2pi Ppi,Wps | cvttss2si  Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
-2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
-2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd  Vsd,Wsd (66),(VEX),(o128)
-2f: comiss Vss,Wss (VEX),(o128) | comisd  Vsd,Wsd (66),(VEX),(o128)
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd  Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd  Vsd,Wsd (66),(v1)
 # 0x0f 0x30-0x3f
 30: WRMSR
 31: RDTSC
@@ -388,65 +402,66 @@ AVXcode: 1
 4e: CMOVLE/NG Gv,Ev
 4f: CMOVNLE/G Gv,Ev
 # 0x0f 0x50-0x5f
-50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
-51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
-52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
-53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
-54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
-55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
-56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
-57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
-58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
-59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
-5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
-5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
-5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
-5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
-5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
-5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
 # 0x0f 0x60-0x6f
-60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
-61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
-62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
-63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
-64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
-65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
-66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
-67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
-68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
-69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
-6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
-6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
-6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
-6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
-6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
-6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
 # 0x0f 0x70-0x7f
-70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
 71: Grp12 (1A)
 72: Grp13 (1A)
 73: Grp14 (1A)
-74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
-75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
-76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
-77: emms/vzeroupper/vzeroall (VEX)
-78: VMREAD Ed/q,Gd/q
-79: VMWRITE Gd/q,Ed/q
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy
+79: VMWRITE Gy,Ey
 7a:
 7b:
-7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
-7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
-7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
-7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
 # 0x0f 0x80-0x8f
 80: JO Jz (f64)
 81: JNO Jz (f64)
-82: JB/JNAE/JC Jz (f64)
-83: JNB/JAE/JNC Jz (f64)
-84: JZ/JE Jz (f64)
-85: JNZ/JNE Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
 86: JBE/JNA Jz (f64)
-87: JNBE/JA Jz (f64)
+87: JA/JNBE Jz (f64)
 88: JS Jz (f64)
 89: JNS Jz (f64)
 8a: JP/JPE Jz (f64)
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
 b9: Grp10 (1A)
 ba: Grp8 Ev,Ib (1A)
 bb: BTC Ev,Gv
-bc: BSF Gv,Ev
-bd: BSR Gv,Ev
+bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
 be: MOVSX Gv,Eb
 bf: MOVSX Gv,Ew
 # 0x0f 0xc0-0xcf
 c0: XADD Eb,Gb
 c1: XADD Ev,Gv
-c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
-c3: movnti Md/q,Gd/q
-c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
-c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
-c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
 c7: Grp9 (1A)
 c8: BSWAP RAX/EAX/R8/R8D
 c9: BSWAP RCX/ECX/R9/R9D
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D
 ce: BSWAP RSI/ESI/R14/R14D
 cf: BSWAP RDI/EDI/R15/R15D
 # 0x0f 0xd0-0xdf
-d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
-d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
-d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
-d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
-d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
-d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
-d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
-d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
-d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
-d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
-da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
-db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
-dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
-dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
-de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
-df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
 # 0x0f 0xe0-0xef
-e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
-e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
-e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
-e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
-e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
-e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
-e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
-e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
-e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
-e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
-ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
-eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
-ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
-ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
-ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
-ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
 # 0x0f 0xf0-0xff
-f0: lddqu Vdq,Mdq (F2),(VEX)
-f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
-f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
-f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
-f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
-f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
-f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
-f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
-f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
-f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
-fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
-fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
-fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
-fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
-fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
 ff:
 EndTable
 
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38)
 Referrer: 3-byte escape 1
 AVXcode: 2
 # 0x0f 0x38 0x00-0x0f
-00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
-01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
-02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
-03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
-04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
-05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
-06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
-07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
-08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
-09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
-0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
-0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
-0c: Vpermilps /r (66),(oVEX)
-0d: Vpermilpd /r (66),(oVEX)
-0e: vtestps /r (66),(oVEX)
-0f: vtestpd /r (66),(oVEX)
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
 # 0x0f 0x38 0x10-0x1f
 10: pblendvb Vdq,Wdq (66)
 11:
 12:
-13:
+13: vcvtph2ps Vx,Wx,Ib (66),(v)
 14: blendvps Vdq,Wdq (66)
 15: blendvpd Vdq,Wdq (66)
-16:
-17: ptest Vdq,Wdq (66),(VEX)
-18: vbroadcastss /r (66),(oVEX)
-19: vbroadcastsd /r (66),(oVEX),(o256)
-1a: vbroadcastf128 /r (66),(oVEX),(o256)
+16: vpermps Vqq,Hqq,Wqq (66),(v)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v)
+1a: vbroadcastf128 Vqq,Mdq (66),(v)
 1b:
-1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
-1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
-1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
 1f:
 # 0x0f 0x38 0x20-0x2f
-20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
-21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
-22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
-23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
-24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
-25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1)
+21: vpmovsxbd Vx,Ux/Md (66),(v1)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1)
+24: vpmovsxwq Vx,Ux/Md (66),(v1)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1)
 26:
 27:
-28: pmuldq Vdq,Wdq (66),(VEX),(o128)
-29: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
-2a: movntdqa Vdq,Mdq (66),(VEX),(o128)
-2b: packusdw Vdq,Wdq (66),(VEX),(o128)
-2c: vmaskmovps(ld) /r (66),(oVEX)
-2d: vmaskmovpd(ld) /r (66),(oVEX)
-2e: vmaskmovps(st) /r (66),(oVEX)
-2f: vmaskmovpd(st) /r (66),(oVEX)
+28: vpmuldq Vx,Hx,Wx (66),(v1)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1)
+2a: vmovntdqa Vx,Mx (66),(v1)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
 # 0x0f 0x38 0x30-0x3f
-30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
-31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
-32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
-33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
-34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
-35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
-36:
-37: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
-38: pminsb Vdq,Wdq (66),(VEX),(o128)
-39: pminsd Vdq,Wdq (66),(VEX),(o128)
-3a: pminuw Vdq,Wdq (66),(VEX),(o128)
-3b: pminud Vdq,Wdq (66),(VEX),(o128)
-3c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
-3d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
-3e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
-3f: pmaxud Vdq,Wdq (66),(VEX),(o128)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1)
+31: vpmovzxbd Vx,Ux/Md (66),(v1)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1)
+34: vpmovzxwq Vx,Ux/Md (66),(v1)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1)
+36: vpermd Vqq,Hqq,Wqq (66),(v)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1)
+39: vpminsd Vx,Hx,Wx (66),(v1)
+3a: vpminuw Vx,Hx,Wx (66),(v1)
+3b: vpminud Vx,Hx,Wx (66),(v1)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1)
 # 0x0f 0x38 0x40-0x8f
-40: pmulld Vdq,Wdq (66),(VEX),(o128)
-41: phminposuw Vdq,Wdq (66),(VEX),(o128)
-80: INVEPT Gd/q,Mdq (66)
-81: INVPID Gd/q,Mdq (66)
+40: vpmulld Vx,Hx,Wx (66),(v1)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42:
+43:
+44:
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v)
+5a: vbroadcasti128 Vqq,Mdq (66),(v)
+# Skip 0x5b-0x77
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+# Skip 0x7a-0x7f
+80: INVEPT Gy,Mdq (66)
+81: INVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
 # 0x0f 0x38 0x90-0xbf (FMA)
-96: vfmaddsub132pd/ps /r (66),(VEX)
-97: vfmsubadd132pd/ps /r (66),(VEX)
-98: vfmadd132pd/ps /r (66),(VEX)
-99: vfmadd132sd/ss /r (66),(VEX),(o128)
-9a: vfmsub132pd/ps /r (66),(VEX)
-9b: vfmsub132sd/ss /r (66),(VEX),(o128)
-9c: vfnmadd132pd/ps /r (66),(VEX)
-9d: vfnmadd132sd/ss /r (66),(VEX),(o128)
-9e: vfnmsub132pd/ps /r (66),(VEX)
-9f: vfnmsub132sd/ss /r (66),(VEX),(o128)
-a6: vfmaddsub213pd/ps /r (66),(VEX)
-a7: vfmsubadd213pd/ps /r (66),(VEX)
-a8: vfmadd213pd/ps /r (66),(VEX)
-a9: vfmadd213sd/ss /r (66),(VEX),(o128)
-aa: vfmsub213pd/ps /r (66),(VEX)
-ab: vfmsub213sd/ss /r (66),(VEX),(o128)
-ac: vfnmadd213pd/ps /r (66),(VEX)
-ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
-ae: vfnmsub213pd/ps /r (66),(VEX)
-af: vfnmsub213sd/ss /r (66),(VEX),(o128)
-b6: vfmaddsub231pd/ps /r (66),(VEX)
-b7: vfmsubadd231pd/ps /r (66),(VEX)
-b8: vfmadd231pd/ps /r (66),(VEX)
-b9: vfmadd231sd/ss /r (66),(VEX),(o128)
-ba: vfmsub231pd/ps /r (66),(VEX)
-bb: vfmsub231sd/ss /r (66),(VEX),(o128)
-bc: vfnmadd231pd/ps /r (66),(VEX)
-bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
-be: vfnmsub231pd/ps /r (66),(VEX)
-bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
+90: vgatherdd/q Vx,Hx,Wx (66),(v)
+91: vgatherqd/q Vx,Hx,Wx (66),(v)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
-db: aesimc Vdq,Wdq (66),(VEX),(o128)
-dc: aesenc Vdq,Wdq (66),(VEX),(o128)
-dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
-de: aesdec Vdq,Wdq (66),(VEX),(o128)
-df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
-f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
-f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
+f3: ANDN Gy,By,Ey (v)
+f4: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
 EndTable
 
 Table: 3-byte opcode 2 (0x0f 0x3a)
 Referrer: 3-byte escape 2
 AVXcode: 3
 # 0x0f 0x3a 0x00-0xff
-04: vpermilps /r,Ib (66),(oVEX)
-05: vpermilpd /r,Ib (66),(oVEX)
-06: vperm2f128 /r,Ib (66),(oVEX),(o256)
-08: roundps Vdq,Wdq,Ib (66),(VEX)
-09: roundpd Vdq,Wdq,Ib (66),(VEX)
-0a: roundss Vss,Wss,Ib (66),(VEX),(o128)
-0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
-0c: blendps Vdq,Wdq,Ib (66),(VEX)
-0d: blendpd Vdq,Wdq,Ib (66),(VEX)
-0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
-0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
-14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
-15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
-16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
-17: extractps Ed,Vdq,Ib (66),(VEX),(o128)
-18: vinsertf128 /r,Ib (66),(oVEX),(o256)
-19: vextractf128 /r,Ib (66),(oVEX),(o256)
-20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
-21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
-22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
-40: dpps Vdq,Wdq,Ib (66),(VEX)
-41: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
-42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
-44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
-4a: vblendvps /r,Ib (66),(oVEX)
-4b: vblendvpd /r,Ib (66),(oVEX)
-4c: vpblendvb /r,Ib (66),(oVEX),(o128)
-60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
-61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
-62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
-63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
-df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03:
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66)
+09: vroundpd Vx,Wx,Ib (66)
+0a: vroundss Vss,Wss,Ib (66),(v1)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
+19: vextractf128 Wdq,Vqq,Ib (66),(v)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
+39: vextracti128 Wdq,Vqq,Ib (66),(v)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
 EndTable
 
 GrpTable: Grp1
@@ -790,7 +843,7 @@ GrpTable: Grp5
 2: CALLN Ev (f64)
 3: CALLF Ep
 4: JMPN Ev (f64)
-5: JMPF Ep
+5: JMPF Mp
 6: PUSH Ev (d64)
 7:
 EndTable
@@ -807,7 +860,7 @@ EndTable
 GrpTable: Grp7
 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
 5:
@@ -824,44 +877,45 @@ EndTable
 
 GrpTable: Grp9
 1: CMPXCHG8B/16B Mq/Mdq
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
-7: VMPTRST Mq
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3)
 EndTable
 
 GrpTable: Grp10
 EndTable
 
 GrpTable: Grp11
+# Note: the operands are given by group opcode
 0: MOV
 EndTable
 
 GrpTable: Grp12
-2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
-4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
-6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
 GrpTable: Grp13
-2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
-4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
-6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
 GrpTable: Grp14
-2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
-3: psrldq Udq,Ib (66),(11B),(VEX),(o128)
-6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
-7: pslldq Udq,Ib (66),(11B),(VEX),(o128)
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
 GrpTable: Grp15
-0: fxsave
-1: fxstor
-2: ldmxcsr (VEX)
-3: stmxcsr (VEX)
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
 4: XSAVE
 5: XRSTOR | lfence (11B)
-6: mfence (11B)
+6: XSAVEOPT | mfence (11B)
 7: clflush | sfence (11B)
 EndTable
 
@@ -872,6 +926,12 @@ GrpTable: Grp16
 3: prefetch T2
 EndTable
 
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab..23d8e5fecf7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
-obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
-
 obj-$(CONFIG_MEMTEST)		+= memtest.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index d0474ad2a6e..1fb85dbe390 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
 	if (fixup) {
 		/* If fixup is less than 16, it means uaccess error */
 		if (fixup->fixup < 16) {
-			current_thread_info()->uaccess_err = -EFAULT;
+			current_thread_info()->uaccess_err = 1;
 			regs->ip += fixup->fixup;
 			return 1;
 		}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5db0490deb0..9d74824a708 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
 
 static noinline void
 no_context(struct pt_regs *regs, unsigned long error_code,
-	   unsigned long address)
+	   unsigned long address, int signal, int si_code)
 {
 	struct task_struct *tsk = current;
 	unsigned long *stackend;
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	int sig;
 
 	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs))
+	if (fixup_exception(regs)) {
+		if (current_thread_info()->sig_on_uaccess_error && signal) {
+			tsk->thread.trap_no = 14;
+			tsk->thread.error_code = error_code | PF_USER;
+			tsk->thread.cr2 = address;
+
+			/* XXX: hwpoison faults will set the wrong code. */
+			force_sig_info_fault(signal, si_code, address, tsk, 0);
+		}
 		return;
+	}
 
 	/*
 	 * 32-bit:
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 	if (is_f00f_bug(regs, address))
 		return;
 
-	no_context(regs, error_code, address);
+	no_context(regs, error_code, address, SIGSEGV, si_code);
 }
 
 static noinline void
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 
 	/* Kernel mode? Handle exceptions or die: */
 	if (!(error_code & PF_USER)) {
-		no_context(regs, error_code, address);
+		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
 		return;
 	}
 
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 		if (!(fault & VM_FAULT_RETRY))
 			up_read(&current->mm->mmap_sem);
 		if (!(error_code & PF_USER))
-			no_context(regs, error_code, address);
+			no_context(regs, error_code, address, 0, 0);
 		return 1;
 	}
 	if (!(fault & VM_FAULT_ERROR))
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 		/* Kernel mode? Handle exceptions or die: */
 		if (!(error_code & PF_USER)) {
 			up_read(&current->mm->mmap_sem);
-			no_context(regs, error_code, address);
+			no_context(regs, error_code, address,
+				   SIGSEGV, SEGV_MAPERR);
 			return 1;
 		}
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 87488b93a65..a298914058f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 	good_end = max_pfn_mapped << PAGE_SHIFT;
 
 	base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
-	if (base == MEMBLOCK_ERROR)
+	if (!base)
 		panic("Cannot find space for the kernel page tables");
 
 	pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 
 void __init native_pagetable_reserve(u64 start, u64 end)
 {
-	memblock_x86_reserve_range(start, end, "PGTABLE");
+	memblock_reserve(start, end - start);
 }
 
 struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
 	 * so that they can be reused for other purposes.
 	 *
-	 * On native it just means calling memblock_x86_reserve_range, on Xen it
-	 * also means marking RW the pagetable pages that we allocated before
+	 * On native it just means calling memblock_reserve, on Xen it also
+	 * means marking RW the pagetable pages that we allocated before
 	 * but that haven't been used.
 	 *
 	 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29f7c6d9817..0c1da394a63 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page)
 void __init add_highpages_with_active_regions(int nid,
 			 unsigned long start_pfn, unsigned long end_pfn)
 {
-	struct range *range;
-	int nr_range;
-	int i;
-
-	nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
-
-	for (i = 0; i < nr_range; i++) {
-		struct page *page;
-		int node_pfn;
-
-		for (node_pfn = range[i].start; node_pfn < range[i].end;
-		     node_pfn++) {
-			if (!pfn_valid(node_pfn))
-				continue;
-			page = pfn_to_page(node_pfn);
-			add_one_highpage_init(page);
-		}
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_free_mem_range(i, nid, &start, &end, NULL) {
+		unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
+					    start_pfn, end_pfn);
+		unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
+					      start_pfn, end_pfn);
+		for ( ; pfn < e_pfn; pfn++)
+			if (pfn_valid(pfn))
+				add_one_highpage_init(pfn_to_page(pfn));
 	}
 }
 #else
@@ -650,18 +644,18 @@ void __init initmem_init(void)
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memblock_x86_register_active_regions(0, 0, highend_pfn);
-	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memblock_x86_register_active_regions(0, 0, max_low_pfn);
-	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
+
+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
+	sparse_memory_present_with_active_regions(0);
+
 #ifdef CONFIG_FLATMEM
 	max_mapnr = num_physpages;
 #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa005bf0..a8a56ce3a96 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
 #ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
-	memblock_x86_register_active_regions(0, 0, max_pfn);
+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
 }
 #endif
 
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 992da5ec5a6..00000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,348 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/memblock.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/range.h>
-
-/* Check for already reserved areas */
-bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
-{
-	struct memblock_region *r;
-	u64 addr = *addrp, last;
-	u64 size = *sizep;
-	bool changed = false;
-
-again:
-	last = addr + size;
-	for_each_memblock(reserved, r) {
-		if (last > r->base && addr < r->base) {
-			size = r->base - addr;
-			changed = true;
-			goto again;
-		}
-		if (last > (r->base + r->size) && addr < (r->base + r->size)) {
-			addr = round_up(r->base + r->size, align);
-			size = last - addr;
-			changed = true;
-			goto again;
-		}
-		if (last <= (r->base + r->size) && addr >= r->base) {
-			*sizep = 0;
-			return false;
-		}
-	}
-	if (changed) {
-		*addrp = addr;
-		*sizep = size;
-	}
-	return changed;
-}
-
-/*
- * Find next free range after start, and size is returned in *sizep
- */
-u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
-{
-	struct memblock_region *r;
-
-	for_each_memblock(memory, r) {
-		u64 ei_start = r->base;
-		u64 ei_last = ei_start + r->size;
-		u64 addr;
-
-		addr = round_up(ei_start, align);
-		if (addr < start)
-			addr = round_up(start, align);
-		if (addr >= ei_last)
-			continue;
-		*sizep = ei_last - addr;
-		while (memblock_x86_check_reserved_size(&addr, sizep, align))
-			;
-
-		if (*sizep)
-			return addr;
-	}
-
-	return MEMBLOCK_ERROR;
-}
-
-static __init struct range *find_range_array(int count)
-{
-	u64 end, size, mem;
-	struct range *range;
-
-	size = sizeof(struct range) * count;
-	end = memblock.current_limit;
-
-	mem = memblock_find_in_range(0, end, size, sizeof(struct range));
-	if (mem == MEMBLOCK_ERROR)
-		panic("can not find more space for range array");
-
-	/*
-	 * This range is tempoaray, so don't reserve it, it will not be
-	 * overlapped because We will not alloccate new buffer before
-	 * We discard this one
-	 */
-	range = __va(mem);
-	memset(range, 0, size);
-
-	return range;
-}
-
-static void __init memblock_x86_subtract_reserved(struct range *range, int az)
-{
-	u64 final_start, final_end;
-	struct memblock_region *r;
-
-	/* Take out region array itself at first*/
-	memblock_free_reserved_regions();
-
-	memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
-
-	for_each_memblock(reserved, r) {
-		memblock_dbg("  [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
-		final_start = PFN_DOWN(r->base);
-		final_end = PFN_UP(r->base + r->size);
-		if (final_start >= final_end)
-			continue;
-		subtract_range(range, az, final_start, final_end);
-	}
-
-	/* Put region array back ? */
-	memblock_reserve_reserved_regions();
-}
-
-struct count_data {
-	int nr;
-};
-
-static int __init count_work_fn(unsigned long start_pfn,
-				unsigned long end_pfn, void *datax)
-{
-	struct count_data *data = datax;
-
-	data->nr++;
-
-	return 0;
-}
-
-static int __init count_early_node_map(int nodeid)
-{
-	struct count_data data;
-
-	data.nr = 0;
-	work_with_active_regions(nodeid, count_work_fn, &data);
-
-	return data.nr;
-}
-
-int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
-			 unsigned long start_pfn, unsigned long end_pfn)
-{
-	int count;
-	struct range *range;
-	int nr_range;
-
-	count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
-
-	range = find_range_array(count);
-	nr_range = 0;
-
-	/*
-	 * Use early_node_map[] and memblock.reserved.region to get range array
-	 * at first
-	 */
-	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
-	subtract_range(range, count, 0, start_pfn);
-	subtract_range(range, count, end_pfn, -1ULL);
-
-	memblock_x86_subtract_reserved(range, count);
-	nr_range = clean_sort_range(range, count);
-
-	*rangep = range;
-	return nr_range;
-}
-
-int __init get_free_all_memory_range(struct range **rangep, int nodeid)
-{
-	unsigned long end_pfn = -1UL;
-
-#ifdef CONFIG_X86_32
-	end_pfn = max_low_pfn;
-#endif
-	return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
-}
-
-static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
-{
-	int i, count;
-	struct range *range;
-	int nr_range;
-	u64 final_start, final_end;
-	u64 free_size;
-	struct memblock_region *r;
-
-	count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
-
-	range = find_range_array(count);
-	nr_range = 0;
-
-	addr = PFN_UP(addr);
-	limit = PFN_DOWN(limit);
-
-	for_each_memblock(memory, r) {
-		final_start = PFN_UP(r->base);
-		final_end = PFN_DOWN(r->base + r->size);
-		if (final_start >= final_end)
-			continue;
-		if (final_start >= limit || final_end <= addr)
-			continue;
-
-		nr_range = add_range(range, count, nr_range, final_start, final_end);
-	}
-	subtract_range(range, count, 0, addr);
-	subtract_range(range, count, limit, -1ULL);
-
-	/* Subtract memblock.reserved.region in range ? */
-	if (!get_free)
-		goto sort_and_count_them;
-	for_each_memblock(reserved, r) {
-		final_start = PFN_DOWN(r->base);
-		final_end = PFN_UP(r->base + r->size);
-		if (final_start >= final_end)
-			continue;
-		if (final_start >= limit || final_end <= addr)
-			continue;
-
-		subtract_range(range, count, final_start, final_end);
-	}
-
-sort_and_count_them:
-	nr_range = clean_sort_range(range, count);
-
-	free_size = 0;
-	for (i = 0; i < nr_range; i++)
-		free_size += range[i].end - range[i].start;
-
-	return free_size << PAGE_SHIFT;
-}
-
-u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
-{
-	return __memblock_x86_memory_in_range(addr, limit, true);
-}
-
-u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
-{
-	return __memblock_x86_memory_in_range(addr, limit, false);
-}
-
-void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
-{
-	if (start == end)
-		return;
-
-	if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
-		return;
-
-	memblock_dbg("    memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
-
-	memblock_reserve(start, end - start);
-}
-
-void __init memblock_x86_free_range(u64 start, u64 end)
-{
-	if (start == end)
-		return;
-
-	if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
-		return;
-
-	memblock_dbg("       memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
-
-	memblock_free(start, end - start);
-}
-
-/*
- * Need to call this function after memblock_x86_register_active_regions,
- * so early_node_map[] is filled already.
- */
-u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
-{
-	u64 addr;
-	addr = find_memory_core_early(nid, size, align, start, end);
-	if (addr != MEMBLOCK_ERROR)
-		return addr;
-
-	/* Fallback, should already have start end within node range */
-	return memblock_find_in_range(start, end, size, align);
-}
-
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
- */
-static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
-				  unsigned long start_pfn,
-				  unsigned long last_pfn,
-				  unsigned long *ei_startpfn,
-				  unsigned long *ei_endpfn)
-{
-	u64 align = PAGE_SIZE;
-
-	*ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
-	*ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
-
-	/* Skip map entries smaller than a page */
-	if (*ei_startpfn >= *ei_endpfn)
-		return 0;
-
-	/* Skip if map is outside the node */
-	if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
-		return 0;
-
-	/* Check for overlaps */
-	if (*ei_startpfn < start_pfn)
-		*ei_startpfn = start_pfn;
-	if (*ei_endpfn > last_pfn)
-		*ei_endpfn = last_pfn;
-
-	return 1;
-}
-
-/* Walk the memblock.memory map and register active regions within a node */
-void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
-					 unsigned long last_pfn)
-{
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	struct memblock_region *r;
-
-	for_each_memblock(memory, r)
-		if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
-					   &ei_startpfn, &ei_endpfn))
-			add_active_range(nid, ei_startpfn, ei_endpfn);
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-u64 __init memblock_x86_hole_size(u64 start, u64 end)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long last_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn, ei_endpfn, ram = 0;
-	struct memblock_region *r;
-
-	for_each_memblock(memory, r)
-		if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
-					   &ei_startpfn, &ei_endpfn))
-			ram += ei_endpfn - ei_startpfn;
-
-	return end - start - ((u64)ram << PAGE_SHIFT);
-}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 92faf3a1c53..c80b9fb9573 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
 	       (unsigned long long) pattern,
 	       (unsigned long long) start_bad,
 	       (unsigned long long) end_bad);
-	memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM");
+	memblock_reserve(start_bad, end_bad - start_bad);
 }
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
 
 static void __init do_one_pass(u64 pattern, u64 start, u64 end)
 {
-	u64 size = 0;
-
-	while (start < end) {
-		start = memblock_x86_find_in_range_size(start, &size, 1);
-
-		/* done ? */
-		if (start >= end)
-			break;
-		if (start + size > end)
-			size = end - start;
-
-		printk(KERN_INFO "  %010llx - %010llx pattern %016llx\n",
-		       (unsigned long long) start,
-		       (unsigned long long) start + size,
-		       (unsigned long long) cpu_to_be64(pattern));
-		memtest(pattern, start, size);
-
-		start += size;
+	u64 i;
+	phys_addr_t this_start, this_end;
+
+	for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
+		this_start = clamp_t(phys_addr_t, this_start, start, end);
+		this_end = clamp_t(phys_addr_t, this_end, start, end);
+		if (this_start < this_end) {
+			printk(KERN_INFO "  %010llx - %010llx pattern %016llx\n",
+			       (unsigned long long)this_start,
+			       (unsigned long long)this_end,
+			       (unsigned long long)cpu_to_be64(pattern));
+			memtest(pattern, this_start, this_end - this_start);
+		}
 	}
 }
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fbeaaf41661..496f494593b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 /* Initialize NODE_DATA for a node on the local memory */
 static void __init setup_node_data(int nid, u64 start, u64 end)
 {
-	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
-	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
 	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	bool remapped = false;
 	u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
 		nd_pa = __pa(nd);
 		remapped = true;
 	} else {
-		nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
-						nd_size, SMP_CACHE_BYTES);
-		if (nd_pa == MEMBLOCK_ERROR)
-			nd_pa = memblock_find_in_range(nd_low, nd_high,
-						nd_size, SMP_CACHE_BYTES);
-		if (nd_pa == MEMBLOCK_ERROR) {
+		nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+		if (!nd_pa) {
 			pr_err("Cannot find %zu bytes in node %d\n",
 			       nd_size, nid);
 			return;
 		}
-		memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
 		nd = __va(nd_pa);
 	}
 
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void)
 
 	/* numa_distance could be 1LU marking allocation failure, test cnt */
 	if (numa_distance_cnt)
-		memblock_x86_free_range(__pa(numa_distance),
-					__pa(numa_distance) + size);
+		memblock_free(__pa(numa_distance), size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;	/* enable table creation */
 }
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void)
 
 	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 				      size, PAGE_SIZE);
-	if (phys == MEMBLOCK_ERROR) {
+	if (!phys) {
 		pr_warning("NUMA: Warning: can't allocate distance table!\n");
 		/* don't retry until explicitly reset */
 		numa_distance = (void *)1LU;
 		return -ENOMEM;
 	}
-	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
+	memblock_reserve(phys, size);
 
 	numa_distance = __va(phys);
 	numa_distance_cnt = cnt;
@@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
 			numaram = 0;
 	}
 
-	e820ram = max_pfn - (memblock_x86_hole_size(0,
-					PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
+	e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
+
 	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
 	if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
 		printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 	if (WARN_ON(nodes_empty(node_possible_map)))
 		return -EINVAL;
 
-	for (i = 0; i < mi->nr_blks; i++)
-		memblock_x86_register_active_regions(mi->blk[i].nid,
-					mi->blk[i].start >> PAGE_SHIFT,
-					mi->blk[i].end >> PAGE_SHIFT);
-
-	/* for out of order entries */
-	sort_node_map();
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *mb = &mi->blk[i];
+		memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
+	}
 
 	/*
 	 * If sections array is gonna be used for pfn -> nid mapping, check
@@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 			setup_node_data(nid, start, end);
 	}
 
+	/* Dump memblock with node info and return. */
+	memblock_dump_all();
 	return 0;
 }
 
@@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void))
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-	remove_all_active_ranges();
+	WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
 	numa_reset_distance();
 
 	ret = init_func();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3adebe7e536..534255a36b6 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
 
 	/* allocate node memory and the lowmem remap area */
 	node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
-	if (node_pa == MEMBLOCK_ERROR) {
+	if (!node_pa) {
 		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
 			   size, nid);
 		return;
 	}
-	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+	memblock_reserve(node_pa, size);
 
 	remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
 					  max_low_pfn << PAGE_SHIFT,
 					  size, LARGE_PAGE_BYTES);
-	if (remap_pa == MEMBLOCK_ERROR) {
+	if (!remap_pa) {
 		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
 			   size, nid);
-		memblock_x86_free_range(node_pa, node_pa + size);
+		memblock_free(node_pa, size);
 		return;
 	}
-	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+	memblock_reserve(remap_pa, size);
 	remap_va = phys_to_virt(remap_pa);
 
 	/* perform actual remap */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a..92e27119ee1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
 	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
 
-	pages += free_all_memory_core_early(MAX_NUMNODES);
+	pages += free_low_memory_core_early(MAX_NUMNODES);
 
 	return pages;
 }
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index d0ed086b624..46db56845f1 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
 	return -ENOENT;
 }
 
+static u64 mem_hole_size(u64 start, u64 end)
+{
+	unsigned long start_pfn = PFN_UP(start);
+	unsigned long end_pfn = PFN_DOWN(end);
+
+	if (start_pfn < end_pfn)
+		return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
+	return 0;
+}
+
 /*
  * Sets up nid to range from @start to @end.  The return value is -errno if
  * something went wrong, 0 otherwise.
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 	 * Calculate target node size.  x86_32 freaks on __udivdi3() so do
 	 * the division in ulong number of pages and convert back.
 	 */
-	size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
+	size = max_addr - addr - mem_hole_size(addr, max_addr);
 	size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
 
 	/*
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 			 * Continue to add memory to this fake node if its
 			 * non-reserved memory is less than the per-node size.
 			 */
-			while (end - start -
-			       memblock_x86_hole_size(start, end) < size) {
+			while (end - start - mem_hole_size(start, end) < size) {
 				end += FAKE_NODE_MIN_SIZE;
 				if (end > limit) {
 					end = limit;
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 			 * this one must extend to the boundary.
 			 */
 			if (end < dma32_end && dma32_end - end -
-			    memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+			    mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
 				end = dma32_end;
 
 			/*
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 			 * next node, this one must extend to the end of the
 			 * physical node.
 			 */
-			if (limit - end -
-			    memblock_x86_hole_size(end, limit) < size)
+			if (limit - end - mem_hole_size(end, limit) < size)
 				end = limit;
 
 			ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
 {
 	u64 end = start + size;
 
-	while (end - start - memblock_x86_hole_size(start, end) < size) {
+	while (end - start - mem_hole_size(start, end) < size) {
 		end += FAKE_NODE_MIN_SIZE;
 		if (end > max_addr) {
 			end = max_addr;
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	 * creates a uniform distribution of node sizes across the entire
 	 * machine (but not necessarily over physical nodes).
 	 */
-	min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
-						MAX_NUMNODES;
+	min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
 	min_size = max(min_size, FAKE_NODE_MIN_SIZE);
 	if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
 		min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 			 * this one must extend to the boundary.
 			 */
 			if (end < dma32_end && dma32_end - end -
-			    memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+			    mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
 				end = dma32_end;
 
 			/*
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 			 * next node, this one must extend to the end of the
 			 * physical node.
 			 */
-			if (limit - end -
-			    memblock_x86_hole_size(end, limit) < size)
+			if (limit - end - mem_hole_size(end, limit) < size)
 				end = limit;
 
 			ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 
 		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
-		if (phys == MEMBLOCK_ERROR) {
+		if (!phys) {
 			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
 			goto no_emu;
 		}
-		memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
+		memblock_reserve(phys, phys_size);
 		phys_dist = __va(phys);
 
 		for (i = 0; i < numa_dist_cnt; i++)
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 
 	/* free the copied physical distance table */
 	if (phys_dist)
-		memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
+		memblock_free(__pa(phys_dist), phys_size);
 	return;
 
 no_emu:
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa..eda2acbb6e8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -998,7 +998,7 @@ out_err:
 }
 EXPORT_SYMBOL(set_memory_uc);
 
-int _set_memory_array(unsigned long *addr, int addrinarray,
+static int _set_memory_array(unsigned long *addr, int addrinarray,
 		unsigned long new_type)
 {
 	int i, j;
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 81dbfdeb080..fd61b3fb734 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -69,6 +69,12 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
 		return;
 	pxm = pa->proximity_domain;
+	apic_id = pa->apic_id;
+	if (!cpu_has_x2apic && (apic_id >= 0xff)) {
+		printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
+			 pxm, apic_id);
+		return;
+	}
 	node = setup_node(pxm);
 	if (node < 0) {
 		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -76,7 +82,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 		return;
 	}
 
-	apic_id = pa->apic_id;
 	if (apic_id >= MAX_LOCAL_APIC) {
 		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
 		return;
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 446902b2a6b..1599f568f0e 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprof.o cpu_buffer.o buffer_sync.o \
 		event_buffer.o oprofile_files.o \
 		oprofilefs.o oprofile_stats.o  \
-		timer_int.o )
+		timer_int.o nmi_timer_int.o )
 
 oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
 oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_amd.o \
 					   op_model_ppro.o op_model_p4.o
-oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index f148cf65267..9e138d00ad3 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -16,37 +16,23 @@
  * with the NMI mode driver.
  */
 
+#ifdef CONFIG_X86_LOCAL_APIC
 extern int op_nmi_init(struct oprofile_operations *ops);
-extern int op_nmi_timer_init(struct oprofile_operations *ops);
 extern void op_nmi_exit(void);
-extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+#else
+static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
+static void op_nmi_exit(void) { }
+#endif
 
-static int nmi_timer;
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-	int ret;
-
-	ret = -ENODEV;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	ret = op_nmi_init(ops);
-#endif
-	nmi_timer = (ret != 0);
-#ifdef CONFIG_X86_IO_APIC
-	if (nmi_timer)
-		ret = op_nmi_timer_init(ops);
-#endif
 	ops->backtrace = x86_backtrace;
-
-	return ret;
+	return op_nmi_init(ops);
 }
 
-
 void oprofile_arch_exit(void)
 {
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!nmi_timer)
-		op_nmi_exit();
-#endif
+	op_nmi_exit();
 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 75f9528e037..26b8a8514ee 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
 	return 0;
 }
 
-static int force_arch_perfmon;
-static int force_cpu_type(const char *str, struct kernel_param *kp)
+enum __force_cpu_type {
+	reserved = 0,		/* do not force */
+	timer,
+	arch_perfmon,
+};
+
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
 {
-	if (!strcmp(str, "arch_perfmon")) {
-		force_arch_perfmon = 1;
+	if (!strcmp(str, "timer")) {
+		force_cpu_type = timer;
+		printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
+	} else if (!strcmp(str, "arch_perfmon")) {
+		force_cpu_type = arch_perfmon;
 		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+	} else {
+		force_cpu_type = 0;
 	}
 
 	return 0;
 }
-module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
 
 static int __init ppro_init(char **cpu_type)
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 	struct op_x86_model_spec *spec = &op_ppro_spec;	/* default */
 
-	if (force_arch_perfmon && cpu_has_arch_perfmon)
+	if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
 		return 0;
 
 	/*
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	if (!cpu_has_apic)
 		return -ENODEV;
 
+	if (force_cpu_type == timer)
+		return -ENODEV;
+
 	switch (vendor) {
 	case X86_VENDOR_AMD:
 		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
deleted file mode 100644
index 7f8052cd662..00000000000
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * @file nmi_timer_int.c
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Zwane Mwaikambo <zwane@linuxpower.ca>
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/oprofile.h>
-#include <linux/rcupdate.h>
-#include <linux/kdebug.h>
-
-#include <asm/nmi.h>
-#include <asm/apic.h>
-#include <asm/ptrace.h>
-
-static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
-{
-	oprofile_add_sample(regs, 0);
-	return NMI_HANDLED;
-}
-
-static int timer_start(void)
-{
-	if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
-					0, "oprofile-timer"))
-		return 1;
-	return 0;
-}
-
-
-static void timer_stop(void)
-{
-	unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
-	synchronize_sched();  /* Allow already-started NMIs to complete. */
-}
-
-
-int __init op_nmi_timer_init(struct oprofile_operations *ops)
-{
-	ops->start = timer_start;
-	ops->stop = timer_stop;
-	ops->cpu_type = "timer";
-	printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
-	return 0;
-}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053..4cf9bd0a165 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 
 	spin_lock_irqsave(&rtc_lock, flags);
 	efi_call_phys_prelog();
-	status = efi_call_phys2(efi_phys.get_time, tm, tc);
+	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+				virt_to_phys(tc));
 	efi_call_phys_epilog();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	return status;
@@ -352,8 +353,7 @@ void __init efi_memblock_x86_reserve_range(void)
 		boot_params.efi_info.efi_memdesc_size;
 	memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
 	memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
-	memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
-		      "EFI memmap");
+	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
 }
 
 #if EFI_DEBUG
@@ -397,16 +397,14 @@ void __init efi_reserve_boot_services(void)
 		if ((start+size >= virt_to_phys(_text)
 				&& start <= virt_to_phys(_end)) ||
 			!e820_all_mapped(start, start+size, E820_RAM) ||
-			memblock_x86_check_reserved_size(&start, &size,
-							1<<EFI_PAGE_SHIFT)) {
+			memblock_is_region_reserved(start, size)) {
 			/* Could not reserve, skip it */
 			md->num_pages = 0;
 			memblock_dbg(PFX "Could not reserve boot range "
 					"[0x%010llx-0x%010llx]\n",
 						start, start+size-1);
 		} else
-			memblock_x86_reserve_range(start, start+size,
-							"EFI Boot");
+			memblock_reserve(start, size);
 	}
 }
 
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index f8208267733..d511aa97533 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
 quiet_cmd_posttest = TEST    $@
       cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
 
-posttest: $(obj)/test_get_len vmlinux
+quiet_cmd_sanitytest = TEST    $@
+      cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
+
+posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
 	$(call cmd,posttest)
+	$(call cmd,sanitytest)
 
-hostprogs-y	:= test_get_len
+hostprogs-y	+= test_get_len insn_sanity
 
 # -I needed for generated C source and C source which in the kernel tree.
 HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
 
+HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+
 # Dependencies are also needed.
 $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
 
+$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index eaf11f52fc0..5f6a5b6c3a1 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
 	sep_expr = "^\\|$"
 	group_expr = "^Grp[0-9A-Za-z]+"
 
-	imm_expr = "^[IJAO][a-z]"
+	imm_expr = "^[IJAOL][a-z]"
 	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
 	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
 	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,6 +59,7 @@ BEGIN {
 	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
 	imm_flag["Ob"] = "INAT_MOFFSET"
 	imm_flag["Ov"] = "INAT_MOFFSET"
+	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
 
 	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
 	force64_expr = "\\([df]64\\)"
@@ -70,8 +71,12 @@ BEGIN {
 	lprefix3_expr = "\\(F2\\)"
 	max_lprefix = 4
 
-	vexok_expr = "\\(VEX\\)"
-	vexonly_expr = "\\(oVEX\\)"
+	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# accepts VEX prefix
+	vexok_opcode_expr = "^v.*"
+	vexok_expr = "\\(v1\\)"
+	# All opcodes with (v) superscript supports *only* VEX prefix
+	vexonly_expr = "\\(v\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -85,8 +90,8 @@ BEGIN {
 	prefix_num["SEG=GS"] = "INAT_PFX_GS"
 	prefix_num["SEG=SS"] = "INAT_PFX_SS"
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
-	prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
-	prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
+	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
 
 	clear_vars()
 }
@@ -310,12 +315,10 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 		if (match(opcode, fpu_expr))
 			flags = add_flags(flags, "INAT_MODRM")
 
-		# check VEX only code
+		# check VEX codes
 		if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
-
-		# check VEX only code
-		if (match(ext, vexok_expr))
+		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
 
 		# check prefixes
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
new file mode 100644
index 00000000000..cc2f8c13128
--- /dev/null
+++ b/arch/x86/tools/insn_sanity.c
@@ -0,0 +1,275 @@
+/*
+ * x86 decoder sanity test - based on test_get_insn.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) Hitachi, Ltd., 2011
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define unlikely(cond) (cond)
+#define ARRAY_SIZE(a)	(sizeof(a)/sizeof(a[0]))
+
+#include <asm/insn.h>
+#include <inat.c>
+#include <insn.c>
+
+/*
+ * Test of instruction analysis against tampering.
+ * Feed random binary to instruction decoder and ensure not to
+ * access out-of-instruction-buffer.
+ */
+
+#define DEFAULT_MAX_ITER	10000
+#define INSN_NOP 0x90
+
+static const char	*prog;		/* Program name */
+static int		verbose;	/* Verbosity */
+static int		x86_64;		/* x86-64 bit mode flag */
+static unsigned int	seed;		/* Random seed */
+static unsigned long	iter_start;	/* Start of iteration number */
+static unsigned long	iter_end = DEFAULT_MAX_ITER;	/* End of iteration number */
+static FILE		*input_file;	/* Input file name */
+
+static void usage(const char *err)
+{
+	if (err)
+		fprintf(stderr, "Error: %s\n\n", err);
+	fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
+	fprintf(stderr, "\t-y	64bit mode\n");
+	fprintf(stderr, "\t-n	32bit mode\n");
+	fprintf(stderr, "\t-v	Verbosity(-vv dumps any decoded result)\n");
+	fprintf(stderr, "\t-s	Give a random seed (and iteration number)\n");
+	fprintf(stderr, "\t-m	Give a maximum iteration number\n");
+	fprintf(stderr, "\t-i	Give an input file with decoded binary\n");
+	exit(1);
+}
+
+static void dump_field(FILE *fp, const char *name, const char *indent,
+		       struct insn_field *field)
+{
+	fprintf(fp, "%s.%s = {\n", indent, name);
+	fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
+		indent, field->value, field->bytes[0], field->bytes[1],
+		field->bytes[2], field->bytes[3]);
+	fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
+		field->got, field->nbytes);
+}
+
+static void dump_insn(FILE *fp, struct insn *insn)
+{
+	fprintf(fp, "Instruction = {\n");
+	dump_field(fp, "prefixes", "\t",	&insn->prefixes);
+	dump_field(fp, "rex_prefix", "\t",	&insn->rex_prefix);
+	dump_field(fp, "vex_prefix", "\t",	&insn->vex_prefix);
+	dump_field(fp, "opcode", "\t",		&insn->opcode);
+	dump_field(fp, "modrm", "\t",		&insn->modrm);
+	dump_field(fp, "sib", "\t",		&insn->sib);
+	dump_field(fp, "displacement", "\t",	&insn->displacement);
+	dump_field(fp, "immediate1", "\t",	&insn->immediate1);
+	dump_field(fp, "immediate2", "\t",	&insn->immediate2);
+	fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
+		insn->attr, insn->opnd_bytes, insn->addr_bytes);
+	fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
+		insn->length, insn->x86_64, insn->kaddr);
+}
+
+static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
+			unsigned char *insn_buf, struct insn *insn)
+{
+	int i;
+
+	fprintf(fp, "%s:\n", msg);
+
+	dump_insn(fp, insn);
+
+	fprintf(fp, "You can reproduce this with below command(s);\n");
+
+	/* Input a decoded instruction sequence directly */
+	fprintf(fp, " $ echo ");
+	for (i = 0; i < MAX_INSN_SIZE; i++)
+		fprintf(fp, " %02x", insn_buf[i]);
+	fprintf(fp, " | %s -i -\n", prog);
+
+	if (!input_file) {
+		fprintf(fp, "Or \n");
+		/* Give a seed and iteration number */
+		fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
+	}
+}
+
+static void init_random_seed(void)
+{
+	int fd;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd < 0)
+		goto fail;
+
+	if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
+		goto fail;
+
+	close(fd);
+	return;
+fail:
+	usage("Failed to open /dev/urandom");
+}
+
+/* Read given instruction sequence from the input file */
+static int read_next_insn(unsigned char *insn_buf)
+{
+	char buf[256]  = "", *tmp;
+	int i;
+
+	tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
+	if (tmp == NULL || feof(input_file))
+		return 0;
+
+	for (i = 0; i < MAX_INSN_SIZE; i++) {
+		insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+		if (*tmp != ' ')
+			break;
+	}
+
+	return i;
+}
+
+static int generate_insn(unsigned char *insn_buf)
+{
+	int i;
+
+	if (input_file)
+		return read_next_insn(insn_buf);
+
+	/* Fills buffer with random binary up to MAX_INSN_SIZE */
+	for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
+		*(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+
+	while (i < MAX_INSN_SIZE)
+		insn_buf[i++] = random() & 0xff;
+
+	return i;
+}
+
+static void parse_args(int argc, char **argv)
+{
+	int c;
+	char *tmp = NULL;
+	int set_seed = 0;
+
+	prog = argv[0];
+	while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
+		switch (c) {
+		case 'y':
+			x86_64 = 1;
+			break;
+		case 'n':
+			x86_64 = 0;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'i':
+			if (strcmp("-", optarg) == 0)
+				input_file = stdin;
+			else
+				input_file = fopen(optarg, "r");
+			if (!input_file)
+				usage("Failed to open input file");
+			break;
+		case 's':
+			seed = (unsigned int)strtoul(optarg, &tmp, 0);
+			if (*tmp == ',') {
+				optarg = tmp + 1;
+				iter_start = strtoul(optarg, &tmp, 0);
+			}
+			if (*tmp != '\0' || tmp == optarg)
+				usage("Failed to parse seed");
+			set_seed = 1;
+			break;
+		case 'm':
+			iter_end = strtoul(optarg, &tmp, 0);
+			if (*tmp != '\0' || tmp == optarg)
+				usage("Failed to parse max_iter");
+			break;
+		default:
+			usage(NULL);
+		}
+	}
+
+	/* Check errors */
+	if (iter_end < iter_start)
+		usage("Max iteration number must be bigger than iter-num");
+
+	if (set_seed && input_file)
+		usage("Don't use input file (-i) with random seed (-s)");
+
+	/* Initialize random seed */
+	if (!input_file) {
+		if (!set_seed)	/* No seed is given */
+			init_random_seed();
+		srand(seed);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct insn insn;
+	int insns = 0;
+	int errors = 0;
+	unsigned long i;
+	unsigned char insn_buf[MAX_INSN_SIZE * 2];
+
+	parse_args(argc, argv);
+
+	/* Prepare stop bytes with NOPs */
+	memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+
+	for (i = 0; i < iter_end; i++) {
+		if (generate_insn(insn_buf) <= 0)
+			break;
+
+		if (i < iter_start)	/* Skip to given iteration number */
+			continue;
+
+		/* Decode an instruction */
+		insn_init(&insn, insn_buf, x86_64);
+		insn_get_length(&insn);
+
+		if (insn.next_byte <= insn.kaddr ||
+		    insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
+			/* Access out-of-range memory */
+			dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+			errors++;
+		} else if (verbose && !insn_complete(&insn))
+			dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+		else if (verbose >= 2)
+			dump_insn(stdout, &insn);
+		insns++;
+	}
+
+	fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
+
+	return errors ? 1 : 0;
+}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c33..12eb07bfb26 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
 	local_irq_disable();
 	early_boot_irqs_disabled = true;
 
-	memblock_init();
-
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 	xen_ident_map_ISA();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673b120..f4bf8aa574f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	__xen_write_cr3(true, __pa(pgd));
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
-	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
-		      __pa(xen_start_info->pt_base +
-			   xen_start_info->nr_pt_frames * PAGE_SIZE),
-		      "XEN PAGETABLES");
+	memblock_reserve(__pa(xen_start_info->pt_base),
+			 xen_start_info->nr_pt_frames * PAGE_SIZE);
 
 	return pgd;
 }
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 			  PFN_DOWN(__pa(initial_page_table)));
 	xen_write_cr3(__pa(initial_page_table));
 
-	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
-		      __pa(xen_start_info->pt_base +
-			   xen_start_info->nr_pt_frames * PAGE_SIZE),
-		      "XEN PAGETABLES");
+	memblock_reserve(__pa(xen_start_info->pt_base),
+			 xen_start_info->nr_pt_frames * PAGE_SIZE));
 
 	return initial_page_table;
 }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b2c7179fa26..e03c6369217 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
 	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
 		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
-	memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
+	memblock_reserve(start, size);
 
 	xen_max_p2m_pfn = PFN_DOWN(start + size);
 
@@ -311,9 +311,8 @@ char * __init xen_memory_setup(void)
 	 *  - xen_start_info
 	 * See comment above "struct start_info" in <xen/interface/xen.h>
 	 */
-	memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
-		      __pa(xen_start_info->pt_base),
-			"XEN START INFO");
+	memblock_reserve(__pa(xen_start_info->mfn_list),
+			 xen_start_info->pt_base - xen_start_info->mfn_list);
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index f3e5eb43f71..ac62f9cf1e1 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -41,14 +41,6 @@ static struct clocksource ccount_clocksource = {
 	.rating = 200,
 	.read = ccount_read,
 	.mask = CLOCKSOURCE_MASK(32),
-	/*
-	 * With a shift of 22 the lower limit of the cpu clock is
-	 * 1MHz, where NSEC_PER_CCOUNT is 1000 or a bit less than
-	 * 2^10: Since we have 32 bits and the multiplicator can
-	 * already take up as much as 10 bits, this leaves us with
-	 * remaining upper 22 bits.
-	 */
-	.shift = 22,
 };
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id);
@@ -66,10 +58,7 @@ void __init time_init(void)
 	printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ),
 			(int)(ccount_per_jiffy/(10000/HZ))%100);
 #endif
-	ccount_clocksource.mult =
-		clocksource_hz2mult(CCOUNT_PER_JIFFY * HZ,
-				ccount_clocksource.shift);
-	clocksource_register(&ccount_clocksource);
+	clocksource_register_hz(&ccount_clocksource, CCOUNT_PER_JIFFY * HZ);
 
 	/* Initialize the linux timer interrupt. */