Merge tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes

Omap fixes and minor defconfig updates that would be good to get in before -rc1. * tag 'omap-for-v3.11/fixes-for-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: ARM: OMAP2+: omap2plus_defconfig: Enable appended DTB support ARM: OMAP2+: Enable TI_EDMA in omap2plus_defconfig ARM: OMAP2+: omap2plus_defconfig: enable DRA752 thermal support by default ARM: OMAP2+: omap2plus_defconfig: enable TI bandgap driver ARM: OMAP2+: devices: remove duplicated include from devices.c ARM: OMAP3: igep0020: Set DSS pins in correct mux mode. ARM: OMAP2+: N900: enable N900-specific drivers even if device tree is enabled ARM: OMAP2+: Cocci spatch "ptr_ret.spatch" ARM: OMAP2+: Remove obsolete Makefile line ARM: OMAP5: Enable Cortex A15 errata 798181 ARM: scu: provide inline dummy functions when SCU is not present ARM: OMAP4: sleep: build OMAP4 specific functions only for OMAP4 ARM: OMAP2+: timer: initialize before using oh_name Signed-off-by: Olof Johansson <olof@lixom.net> Add/move/change conflicts in arch/arm/mach-omap2/Kconfig resolved.
author: Olof Johansson <olof@lixom.net> 2013-07-12 10:59:39 -0700
committer: Olof Johansson <olof@lixom.net> 2013-07-12 10:59:39 -0700
commit: f4b96f5e4ff8d86699c851c10245e102809b0331 (patch)
tree: f766102263bed71738431cabb4d4f6f086005cd8 /arch
parent: 9d8812df35be58a5da0c44182c1e4ba2507cc6a7 (diff)
parent: c24a6ae18abde53b048372b066b93b71b1b91154 (diff)
593 files changed, 13932 insertions, 10349 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index a4429bcd609..8d2ae24b9f4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -365,6 +365,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	bool
 
+config HAVE_ARCH_SOFT_DIRTY
+	bool
+
 config HAVE_MOD_ARCH_SPECIFIC
 	bool
 	help
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index c5b5d6bac9e..14ce27bccd2 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -71,8 +71,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
 
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 
-#define VALID_PAGE(page)	(((page) - mem_map) < max_mapnr)
-
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> 32))
 #define pgd_page(pgd)		(pfn_to_page(pgd_val(pgd) >> 32))
 #define pte_pfn(pte)		(pte_val(pte) >> 32)
diff --git a/arch/alpha/include/uapi/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h
index 6d9e805f18a..dfdadb0b4be 100644
--- a/arch/alpha/include/uapi/asm/fcntl.h
+++ b/arch/alpha/include/uapi/asm/fcntl.h
@@ -32,6 +32,7 @@
 #define O_SYNC		(__O_SYNC|O_DSYNC)
 
 #define O_PATH		040000000
+#define O_TMPFILE	0100000000
 
 #define F_GETLK		7
 #define F_SETLK		8
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 1d4aabfcf9a..837c0fa5831 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -238,8 +238,8 @@ nautilus_init_pci(void)
 	if (pci_mem < memtop)
 		memtop = pci_mem;
 	if (memtop > alpha_mv.min_mem_address) {
-		free_reserved_area((unsigned long)__va(alpha_mv.min_mem_address),
-				   (unsigned long)__va(memtop), 0, NULL);
+		free_reserved_area(__va(alpha_mv.min_mem_address),
+				   __va(memtop), -1, NULL);
 		printk("nautilus_init_pci: %ldk freed\n",
 			(memtop - alpha_mv.min_mem_address) >> 10);
 	}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 0ba85ee4a46..a1bea91df56 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -276,56 +276,25 @@ srm_paging_stop (void)
 }
 #endif
 
-#ifndef CONFIG_DISCONTIGMEM
-static void __init
-printk_memory_info(void)
-{
-	unsigned long codesize, reservedpages, datasize, initsize, tmp;
-	extern int page_is_ram(unsigned long) __init;
-
-	/* printk all informations */
-	reservedpages = 0;
-	for (tmp = 0; tmp < max_low_pfn; tmp++)
-		/*
-		 * Only count reserved RAM pages
-		 */
-		if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
-			reservedpages++;
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_data;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       max_mapnr << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10);
-}
-
 void __init
 mem_init(void)
 {
-	max_mapnr = num_physpages = max_low_pfn;
-	totalram_pages += free_all_bootmem();
+	set_max_mapnr(max_low_pfn);
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
-	printk_memory_info();
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 }
-#endif /* CONFIG_DISCONTIGMEM */
 
 void
 free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void
 free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 33885048fa3..d543d71c28b 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -129,8 +129,6 @@ setup_memory_node(int nid, void *kernel_end)
 	if (node_max_pfn > max_low_pfn)
 		max_pfn = max_low_pfn = node_max_pfn;
 
-	num_physpages += node_max_pfn - node_min_pfn;
-
 #if 0 /* we'll try this one again in a little while */
 	/* Cute trick to make sure our local node data is on local memory */
 	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
@@ -321,41 +319,3 @@ void __init paging_init(void)
 	/* Initialize the kernel's ZERO_PGE. */
 	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
 }
-
-void __init mem_init(void)
-{
-	unsigned long codesize, reservedpages, datasize, initsize, pfn;
-	extern int page_is_ram(unsigned long) __init;
-	unsigned long nid, i;
-	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
-
-	reservedpages = 0;
-	for_each_online_node(nid) {
-		/*
-		 * This will free up the bootmem, ie, slot 0 memory
-		 */
-		totalram_pages += free_all_bootmem_node(NODE_DATA(nid));
-
-		pfn = NODE_DATA(nid)->node_start_pfn;
-		for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
-			if (page_is_ram(pfn) &&
-			    PageReserved(nid_page_nr(nid, i)))
-				reservedpages++;
-	}
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_data;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
-	       "%luk data, %luk init)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       num_physpages << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10);
-#if 0
-	mem_stress();
-#endif
-}
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5917099470e..4a0e54fc01b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -184,6 +184,7 @@ config ARC_CACHE_PAGES
 
 config ARC_CACHE_VIPT_ALIASING
 	bool "Support VIPT Aliasing D$"
+	depends on ARC_HAS_DCACHE
 	default n
 
 endif	#ARC_CACHE
@@ -361,13 +362,6 @@ config ARC_MISALIGN_ACCESS
 	  Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide
 	  potential bugs in code
 
-config ARC_STACK_NONEXEC
-	bool "Make stack non-executable"
-	default n
-	help
-	  To disable the execute permissions of stack/heap of processes
-	  which are enabled by default.
-
 config HZ
 	int "Timer Frequency"
 	default 100
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 183397fd289..8c0b1aa56f7 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -9,25 +9,27 @@
 UTS_MACHINE := arc
 
 ifeq ($(CROSS_COMPILE),)
-CROSS_COMPILE := arc-elf32-
+CROSS_COMPILE := arc-linux-uclibc-
 endif
 
 KBUILD_DEFCONFIG := fpga_defconfig
 
 cflags-y	+= -mA7 -fno-common -pipe -fno-builtin -D__linux__
 
-LINUXINCLUDE	+=  -include ${src}/arch/arc/include/asm/defines.h
-
 ifdef CONFIG_ARC_CURR_IN_REG
 # For a global register defintion, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
 # any kernel headers, and missing the r25 global register
-# Can't do unconditionally (like above) because of recursive include issues
+# Can't do unconditionally because of recursive include issues
 # due to <linux/thread_info.h>
 LINUXINCLUDE	+=  -include ${src}/arch/arc/include/asm/current.h
 endif
 
-atleast_gcc44 :=  $(call cc-ifversion, -gt, 0402, y)
+upto_gcc42    :=  $(call cc-ifversion, -le, 0402, y)
+upto_gcc44    :=  $(call cc-ifversion, -le, 0404, y)
+atleast_gcc44 :=  $(call cc-ifversion, -ge, 0404, y)
+atleast_gcc48 :=  $(call cc-ifversion, -ge, 0408, y)
+
 cflags-$(atleast_gcc44)			+= -fsection-anchors
 
 cflags-$(CONFIG_ARC_HAS_LLSC)		+= -mlock
@@ -35,6 +37,11 @@ cflags-$(CONFIG_ARC_HAS_SWAPE)		+= -mswape
 cflags-$(CONFIG_ARC_HAS_RTSC)		+= -mrtsc
 cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
 
+# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
+ifeq ($(atleast_gcc48),y)
+cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
+endif
+
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
 cflags-y  += -O3
@@ -48,11 +55,10 @@ cflags-$(disable_small_data)		+= -mno-sdata -fcall-used-gp
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)	+= -EB
 
-# STAR 9000518362:
+# STAR 9000518362: (fixed with binutils shipping with gcc 4.8)
 # arc-linux-uclibc-ld (buildroot) or arceb-elf32-ld (EZChip) don't accept
-# --build-id w/o "-marclinux".
-# Default arc-elf32-ld is OK
-ldflags-y				+= -marclinux
+# --build-id w/o "-marclinux". Default arc-elf32-ld is OK
+ldflags-$(upto_gcc44)			+= -marclinux
 
 ARC_LIBGCC				:= -mA7
 cflags-$(CONFIG_ARC_HAS_HW_MPY)		+= -multcost=16
@@ -66,8 +72,8 @@ ifndef CONFIG_ARC_HAS_HW_MPY
 # With gcc 4.4.7, -mno-mpy is enough to make any other related adjustments,
 # e.g. increased cost of MPY. With gcc 4.2.1 this had to be explicitly hinted
 
-	ARC_LIBGCC		:= -marc600
-	ifneq ($(atleast_gcc44),y)
+	ifeq ($(upto_gcc42),y)
+		ARC_LIBGCC	:= -marc600
 		cflags-y	+= -multcost=30
 	endif
 endif
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index 95350be6ef6..c109af32027 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-elf32-"
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 446c96c24ef..451af30914f 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-elf32-"
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 4fa5cd9f220..6be6492442d 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-elf32-"
+CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="tb10x"
 CONFIG_SYSVIPC=y
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 1b907c46566..355cb470c2a 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -20,7 +20,6 @@
 #define ARC_REG_PERIBASE_BCR	0x69
 #define ARC_REG_FP_BCR		0x6B	/* Single-Precision FPU */
 #define ARC_REG_DPFP_BCR	0x6C	/* Dbl Precision FPU */
-#define ARC_REG_MMU_BCR		0x6f
 #define ARC_REG_DCCM_BCR	0x74	/* DCCM Present + SZ */
 #define ARC_REG_TIMERS_BCR	0x75
 #define ARC_REG_ICCM_BCR	0x78
@@ -34,22 +33,12 @@
 #define ARC_REG_D_UNCACH_BCR	0x6A
 
 /* status32 Bits Positions */
-#define STATUS_H_BIT		0	/* CPU Halted */
-#define STATUS_E1_BIT		1	/* Int 1 enable */
-#define STATUS_E2_BIT		2	/* Int 2 enable */
-#define STATUS_A1_BIT		3	/* Int 1 active */
-#define STATUS_A2_BIT		4	/* Int 2 active */
 #define STATUS_AE_BIT		5	/* Exception active */
 #define STATUS_DE_BIT		6	/* PC is in delay slot */
 #define STATUS_U_BIT		7	/* User/Kernel mode */
 #define STATUS_L_BIT		12	/* Loop inhibit */
 
 /* These masks correspond to the status word(STATUS_32) bits */
-#define STATUS_H_MASK		(1<<STATUS_H_BIT)
-#define STATUS_E1_MASK		(1<<STATUS_E1_BIT)
-#define STATUS_E2_MASK		(1<<STATUS_E2_BIT)
-#define STATUS_A1_MASK		(1<<STATUS_A1_BIT)
-#define STATUS_A2_MASK		(1<<STATUS_A2_BIT)
 #define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_DE_MASK		(1<<STATUS_DE_BIT)
 #define STATUS_U_MASK		(1<<STATUS_U_BIT)
@@ -71,6 +60,7 @@
 #define ECR_V_ITLB_MISS			0x21
 #define ECR_V_DTLB_MISS			0x22
 #define ECR_V_PROTV			0x23
+#define ECR_V_TRAP			0x25
 
 /* Protection Violation Exception Cause Code Values */
 #define ECR_C_PROTV_INST_FETCH		0x00
@@ -79,94 +69,23 @@
 #define ECR_C_PROTV_XCHG		0x03
 #define ECR_C_PROTV_MISALIG_DATA	0x04
 
+#define ECR_C_BIT_PROTV_MISALIG_DATA	10
+
+/* Machine Check Cause Code Values */
+#define ECR_C_MCHK_DUP_TLB		0x01
+
 /* DTLB Miss Exception Cause Code Values */
 #define ECR_C_BIT_DTLB_LD_MISS		8
 #define ECR_C_BIT_DTLB_ST_MISS		9
 
+/* Dummy ECR values for Interrupts */
+#define event_IRQ1		0x0031abcd
+#define event_IRQ2		0x0032abcd
 
 /* Auxiliary registers */
 #define AUX_IDENTITY		4
 #define AUX_INTR_VEC_BASE	0x25
-#define AUX_IRQ_LEV		0x200	/* IRQ Priority: L1 or L2 */
-#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
-#define AUX_IRQ_LV12		0x43	/* interrupt level register */
-
-#define AUX_IENABLE		0x40c
-#define AUX_ITRIGGER		0x40d
-#define AUX_IPULSE		0x415
-
-/* Timer related Aux registers */
-#define ARC_REG_TIMER0_LIMIT	0x23	/* timer 0 limit */
-#define ARC_REG_TIMER0_CTRL	0x22	/* timer 0 control */
-#define ARC_REG_TIMER0_CNT	0x21	/* timer 0 count */
-#define ARC_REG_TIMER1_LIMIT	0x102	/* timer 1 limit */
-#define ARC_REG_TIMER1_CTRL	0x101	/* timer 1 control */
-#define ARC_REG_TIMER1_CNT	0x100	/* timer 1 count */
-
-#define TIMER_CTRL_IE		(1 << 0) /* Interupt when Count reachs limit */
-#define TIMER_CTRL_NH		(1 << 1) /* Count only when CPU NOT halted */
-
-/* MMU Management regs */
-#define ARC_REG_TLBPD0		0x405
-#define ARC_REG_TLBPD1		0x406
-#define ARC_REG_TLBINDEX	0x407
-#define ARC_REG_TLBCOMMAND	0x408
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-
-/* Bits in MMU PID register */
-#define MMU_ENABLE		(1 << 31)	/* Enable MMU for process */
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR		0x80000000
-
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
-#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
-#else
-#undef TLBWriteNI		/* These cmds don't exist on older MMU */
-#undef TLBIVUTLB
-#endif
 
-/* Instruction cache related Auxiliary registers */
-#define ARC_REG_IC_BCR		0x77	/* Build Config reg */
-#define ARC_REG_IC_IVIC		0x10
-#define ARC_REG_IC_CTRL		0x11
-#define ARC_REG_IC_IVIL		0x19
-#if (CONFIG_ARC_MMU_VER > 2)
-#define ARC_REG_IC_PTAG		0x1E
-#endif
-
-/* Bit val in IC_CTRL */
-#define IC_CTRL_CACHE_DISABLE   0x1
-
-/* Data cache related Auxiliary registers */
-#define ARC_REG_DC_BCR		0x72
-#define ARC_REG_DC_IVDC		0x47
-#define ARC_REG_DC_CTRL		0x48
-#define ARC_REG_DC_IVDL		0x4A
-#define ARC_REG_DC_FLSH		0x4B
-#define ARC_REG_DC_FLDL		0x4C
-#if (CONFIG_ARC_MMU_VER > 2)
-#define ARC_REG_DC_PTAG		0x5C
-#endif
-
-/* Bit val in DC_CTRL */
-#define DC_CTRL_INV_MODE_FLUSH  0x40
-#define DC_CTRL_FLUSH_STATUS    0x100
-
-/* MMU Management regs */
-#define ARC_REG_PID		0x409
-#define ARC_REG_SCRATCH_DATA0	0x418
-
-/* Bits in MMU PID register */
-#define MMU_ENABLE		(1 << 31)	/* Enable MMU for process */
 
 /*
  * Floating Pt Registers
@@ -293,24 +212,6 @@ struct bcr_identity {
 #endif
 };
 
-struct bcr_mmu_1_2 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
-#else
-	unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
-#endif
-};
-
-struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4,
-		     u_itlb:4, u_dtlb:4;
-#else
-	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4,
-		     ways:4, ver:8;
-#endif
-};
-
 #define EXTN_SWAP_VALID     0x1
 #define EXTN_NORM_VALID     0x2
 #define EXTN_MINMAX_VALID   0x2
@@ -343,14 +244,6 @@ struct bcr_extn_xymem {
 #endif
 };
 
-struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-	unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-};
-
 struct bcr_perip {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int start:8, pad2:8, sz:8, pad:8;
@@ -403,7 +296,7 @@ struct cpuinfo_arc_mmu {
 };
 
 struct cpuinfo_arc_cache {
-	unsigned int has_aliasing, sz, line_len, assoc, ver;
+	unsigned int sz, line_len, assoc, ver;
 };
 
 struct cpuinfo_arc_ccm {
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 2ad8f9b1c54..5b18e94c667 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -18,9 +18,8 @@ struct task_struct;
 void show_regs(struct pt_regs *regs);
 void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs);
 void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
-			    unsigned long address, unsigned long cause_reg);
-void die(const char *str, struct pt_regs *regs, unsigned long address,
-	 unsigned long cause_reg);
+			    unsigned long address);
+void die(const char *str, struct pt_regs *regs, unsigned long address);
 
 #define BUG()	do {				\
 	dump_stack();					\
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index d5555fe4742..5802849a6ca 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -18,21 +18,19 @@
 
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
-#define ARC_ICACHE_WAYS	2
-#define ARC_DCACHE_WAYS	4
-
-/* Helpers */
+/* For a rare case where customers have differently config I/D */
 #define ARC_ICACHE_LINE_LEN	L1_CACHE_BYTES
 #define ARC_DCACHE_LINE_LEN	L1_CACHE_BYTES
 
 #define ICACHE_LINE_MASK	(~(ARC_ICACHE_LINE_LEN - 1))
 #define DCACHE_LINE_MASK	(~(ARC_DCACHE_LINE_LEN - 1))
 
-#if ARC_ICACHE_LINE_LEN != ARC_DCACHE_LINE_LEN
-#error "Need to fix some code as I/D cache lines not same"
-#else
-#define is_not_cache_aligned(p)	((unsigned long)p & (~DCACHE_LINE_MASK))
-#endif
+/*
+ * ARC700 doesn't cache any access in top 256M.
+ * Ideal for wiring memory mapped peripherals as we don't need to do
+ * explicit uncached accesses (LD.di/ST.di) hence more portable drivers
+ */
+#define ARC_UNCACHED_ADDR_SPACE	0xc0000000
 
 #ifndef __ASSEMBLY__
 
@@ -57,16 +55,10 @@
 
 #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
 
-/*
- * ARC700 doesn't cache any access in top 256M.
- * Ideal for wiring memory mapped peripherals as we don't need to do
- * explicit uncached accesses (LD.di/ST.di) hence more portable drivers
- */
-#define ARC_UNCACHED_ADDR_SPACE	0xc0000000
-
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
 extern void __init read_decode_cache_bcr(void);
-#endif
+
+#endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index ef62682e8d9..6abc4972bc9 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -81,16 +81,19 @@ void flush_anon_page(struct vm_area_struct *vma,
 #endif	/* CONFIG_ARC_CACHE_VIPT_ALIASING */
 
 /*
+ * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default
+ * This works around some PIO based drivers which don't call flush_dcache_page
+ * to record that they dirtied the dcache
+ */
+#define PG_dc_clean	PG_arch_1
+
+/*
  * Simple wrapper over config option
  * Bootup code ensures that hardware matches kernel configuration
  */
 static inline int cache_is_vipt_aliasing(void)
 {
-#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
-	return 1;
-#else
-	return 0;
-#endif
+	return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
 }
 
 #define CACHE_COLOR(addr)	(((unsigned long)(addr) >> (PAGE_SHIFT)) & 1)
diff --git a/arch/arc/include/asm/defines.h b/arch/arc/include/asm/defines.h
deleted file mode 100644
index 6097bb439cc..00000000000
--- a/arch/arc/include/asm/defines.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ARC_ASM_DEFINES_H__
-#define __ARC_ASM_DEFINES_H__
-
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-#define __CONFIG_ARC_HAS_LLSC_VAL 1
-#else
-#define __CONFIG_ARC_HAS_LLSC_VAL 0
-#endif
-
-#ifdef CONFIG_ARC_HAS_SWAPE
-#define __CONFIG_ARC_HAS_SWAPE_VAL 1
-#else
-#define __CONFIG_ARC_HAS_SWAPE_VAL 0
-#endif
-
-#ifdef CONFIG_ARC_HAS_RTSC
-#define __CONFIG_ARC_HAS_RTSC_VAL 1
-#else
-#define __CONFIG_ARC_HAS_RTSC_VAL 0
-#endif
-
-#ifdef CONFIG_ARC_MMU_SASID
-#define __CONFIG_ARC_MMU_SASID_VAL 1
-#else
-#define __CONFIG_ARC_MMU_SASID_VAL 0
-#endif
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-#define __CONFIG_ARC_HAS_ICACHE	1
-#else
-#define __CONFIG_ARC_HAS_ICACHE	0
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-#define __CONFIG_ARC_HAS_DCACHE	1
-#else
-#define __CONFIG_ARC_HAS_DCACHE	0
-#endif
-
-#endif /* __ARC_ASM_DEFINES_H__ */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index eb2ae53187d..8943c028d4b 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -50,194 +50,177 @@
  *      Eff Addr for load = [reg2]
  */
 
+.macro PUSH reg
+	st.a	\reg, [sp, -4]
+.endm
+
+.macro PUSHAX aux
+	lr	r9, [\aux]
+	PUSH	r9
+.endm
+
+.macro POP reg
+	ld.ab	\reg, [sp, 4]
+.endm
+
+.macro POPAX aux
+	POP	r9
+	sr	r9, [\aux]
+.endm
+
 /*--------------------------------------------------------------
- * Save caller saved registers (scratch registers) ( r0 - r12 )
- * Registers are pushed / popped in the order defined in struct ptregs
- * in asm/ptrace.h
+ * Helpers to save/restore Scratch Regs:
+ * used by Interrupt/Exception Prologue/Epilogue
  *-------------------------------------------------------------*/
-.macro  SAVE_CALLER_SAVED
-	st.a    r0, [sp, -4]
-	st.a    r1, [sp, -4]
-	st.a    r2, [sp, -4]
-	st.a    r3, [sp, -4]
-	st.a    r4, [sp, -4]
-	st.a    r5, [sp, -4]
-	st.a    r6, [sp, -4]
-	st.a    r7, [sp, -4]
-	st.a    r8, [sp, -4]
-	st.a    r9, [sp, -4]
-	st.a    r10, [sp, -4]
-	st.a    r11, [sp, -4]
-	st.a    r12, [sp, -4]
+.macro  SAVE_R0_TO_R12
+	PUSH	r0
+	PUSH	r1
+	PUSH	r2
+	PUSH	r3
+	PUSH	r4
+	PUSH	r5
+	PUSH	r6
+	PUSH	r7
+	PUSH	r8
+	PUSH	r9
+	PUSH	r10
+	PUSH	r11
+	PUSH	r12
+.endm
+
+.macro RESTORE_R12_TO_R0
+	POP	r12
+	POP	r11
+	POP	r10
+	POP	r9
+	POP	r8
+	POP	r7
+	POP	r6
+	POP	r5
+	POP	r4
+	POP	r3
+	POP	r2
+	POP	r1
+	POP	r0
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	ld	r25, [sp, 12]
+#endif
 .endm
 
 /*--------------------------------------------------------------
- * Restore caller saved registers (scratch registers)
+ * Helpers to save/restore callee-saved regs:
+ * used by several macros below
  *-------------------------------------------------------------*/
-.macro RESTORE_CALLER_SAVED
-	ld.ab   r12, [sp, 4]
-	ld.ab   r11, [sp, 4]
-	ld.ab   r10, [sp, 4]
-	ld.ab   r9, [sp, 4]
-	ld.ab   r8, [sp, 4]
-	ld.ab   r7, [sp, 4]
-	ld.ab   r6, [sp, 4]
-	ld.ab   r5, [sp, 4]
-	ld.ab   r4, [sp, 4]
-	ld.ab   r3, [sp, 4]
-	ld.ab   r2, [sp, 4]
-	ld.ab   r1, [sp, 4]
-	ld.ab   r0, [sp, 4]
+.macro SAVE_R13_TO_R24
+	PUSH	r13
+	PUSH	r14
+	PUSH	r15
+	PUSH	r16
+	PUSH	r17
+	PUSH	r18
+	PUSH	r19
+	PUSH	r20
+	PUSH	r21
+	PUSH	r22
+	PUSH	r23
+	PUSH	r24
+.endm
+
+.macro RESTORE_R24_TO_R13
+	POP	r24
+	POP	r23
+	POP	r22
+	POP	r21
+	POP	r20
+	POP	r19
+	POP	r18
+	POP	r17
+	POP	r16
+	POP	r15
+	POP	r14
+	POP	r13
 .endm
 
+#define OFF_USER_R25_FROM_R24	(SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4
 
 /*--------------------------------------------------------------
- * Save callee saved registers (non scratch registers) ( r13 - r25 )
- *  on kernel stack.
- * User mode callee regs need to be saved in case of
- *    -fork and friends for replicating from parent to child
- *    -before going into do_signal( ) for ptrace/core-dump
- * Special case handling is required for r25 in case it is used by kernel
- *  for caching task ptr. Low level exception/ISR save user mode r25
- *  into task->thread.user_r25. So it needs to be retrieved from there and
- *  saved into kernel stack with rest of callee reg-file
+ * Collect User Mode callee regs as struct callee_regs - needed by
+ * fork/do_signal/unaligned-access-emulation.
+ * (By default only scratch regs are saved on entry to kernel)
+ *
+ * Special handling for r25 if used for caching Task Pointer.
+ * It would have been saved in task->thread.user_r25 already, but to keep
+ * the interface same it is copied into regular r25 placeholder in
+ * struct callee_regs.
  *-------------------------------------------------------------*/
 .macro SAVE_CALLEE_SAVED_USER
-	st.a    r13, [sp, -4]
-	st.a    r14, [sp, -4]
-	st.a    r15, [sp, -4]
-	st.a    r16, [sp, -4]
-	st.a    r17, [sp, -4]
-	st.a    r18, [sp, -4]
-	st.a    r19, [sp, -4]
-	st.a    r20, [sp, -4]
-	st.a    r21, [sp, -4]
-	st.a    r22, [sp, -4]
-	st.a    r23, [sp, -4]
-	st.a    r24, [sp, -4]
+
+	SAVE_R13_TO_R24
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 	; Retrieve orig r25 and save it on stack
-	ld      r12, [r25, TASK_THREAD + THREAD_USER_R25]
+	ld.as   r12, [sp, OFF_USER_R25_FROM_R24]
 	st.a    r12, [sp, -4]
 #else
-	st.a    r25, [sp, -4]
+	PUSH	r25
 #endif
 
-	/* move up by 1 word to "create" callee_regs->"stack_place_holder" */
-	sub sp, sp, 4
 .endm
 
 /*--------------------------------------------------------------
- * Save callee saved registers (non scratch registers) ( r13 - r25 )
- * kernel mode callee regs needed to be saved in case of context switch
- * If r25 is used for caching task pointer then that need not be saved
- * as it can be re-created from current task global
+ * Save kernel Mode callee regs at the time of Contect Switch.
+ *
+ * Special handling for r25 if used for caching Task Pointer.
+ * Kernel simply skips saving it since it will be loaded with
+ * incoming task pointer anyways
  *-------------------------------------------------------------*/
 .macro SAVE_CALLEE_SAVED_KERNEL
-	st.a    r13, [sp, -4]
-	st.a    r14, [sp, -4]
-	st.a    r15, [sp, -4]
-	st.a    r16, [sp, -4]
-	st.a    r17, [sp, -4]
-	st.a    r18, [sp, -4]
-	st.a    r19, [sp, -4]
-	st.a    r20, [sp, -4]
-	st.a    r21, [sp, -4]
-	st.a    r22, [sp, -4]
-	st.a    r23, [sp, -4]
-	st.a    r24, [sp, -4]
+
+	SAVE_R13_TO_R24
+
 #ifdef CONFIG_ARC_CURR_IN_REG
-	sub     sp, sp, 8
-#else
-	st.a    r25, [sp, -4]
 	sub     sp, sp, 4
+#else
+	PUSH	r25
 #endif
 .endm
 
 /*--------------------------------------------------------------
- * RESTORE_CALLEE_SAVED_KERNEL:
- * Loads callee (non scratch) Reg File by popping from Kernel mode stack.
- *  This is reverse of SAVE_CALLEE_SAVED,
- *
- * NOTE:
- * Ideally this shd only be called in switch_to for loading
- *  switched-IN task's CALLEE Reg File.
- *  For all other cases RESTORE_CALLEE_SAVED_FAST must be used
- *  which simply pops the stack w/o touching regs.
+ * Opposite of SAVE_CALLEE_SAVED_KERNEL
  *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_KERNEL
 
-
 #ifdef CONFIG_ARC_CURR_IN_REG
-	add     sp, sp, 8  /* skip callee_reg gutter and user r25 placeholder */
+	add     sp, sp, 4  /* skip usual r25 placeholder */
 #else
-	add     sp, sp, 4   /* skip "callee_regs->stack_place_holder" */
-	ld.ab   r25, [sp, 4]
+	POP	r25
 #endif
-
-	ld.ab   r24, [sp, 4]
-	ld.ab   r23, [sp, 4]
-	ld.ab   r22, [sp, 4]
-	ld.ab   r21, [sp, 4]
-	ld.ab   r20, [sp, 4]
-	ld.ab   r19, [sp, 4]
-	ld.ab   r18, [sp, 4]
-	ld.ab   r17, [sp, 4]
-	ld.ab   r16, [sp, 4]
-	ld.ab   r15, [sp, 4]
-	ld.ab   r14, [sp, 4]
-	ld.ab   r13, [sp, 4]
-
+	RESTORE_R24_TO_R13
 .endm
 
 /*--------------------------------------------------------------
- * RESTORE_CALLEE_SAVED_USER:
- * This is called after do_signal where tracer might have changed callee regs
- * thus we need to restore the reg file.
- * Special case handling is required for r25 in case it is used by kernel
- *  for caching task ptr. Ptrace would have modified on-kernel-stack value of
- *  r25, which needs to be shoved back into task->thread.user_r25 where from
- *  Low level exception/ISR return code will retrieve to populate with rest of
- *  callee reg-file.
+ * Opposite of SAVE_CALLEE_SAVED_USER
+ *
+ * ptrace tracer or unaligned-access fixup might have changed a user mode
+ * callee reg which is saved back to usual r25 storage location
  *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_USER
 
-	add     sp, sp, 4   /* skip "callee_regs->stack_place_holder" */
-
 #ifdef CONFIG_ARC_CURR_IN_REG
 	ld.ab   r12, [sp, 4]
-	st      r12, [r25, TASK_THREAD + THREAD_USER_R25]
+	st.as   r12, [sp, OFF_USER_R25_FROM_R24]
 #else
-	ld.ab   r25, [sp, 4]
+	POP	r25
 #endif
-
-	ld.ab   r24, [sp, 4]
-	ld.ab   r23, [sp, 4]
-	ld.ab   r22, [sp, 4]
-	ld.ab   r21, [sp, 4]
-	ld.ab   r20, [sp, 4]
-	ld.ab   r19, [sp, 4]
-	ld.ab   r18, [sp, 4]
-	ld.ab   r17, [sp, 4]
-	ld.ab   r16, [sp, 4]
-	ld.ab   r15, [sp, 4]
-	ld.ab   r14, [sp, 4]
-	ld.ab   r13, [sp, 4]
+	RESTORE_R24_TO_R13
 .endm
 
 /*--------------------------------------------------------------
  * Super FAST Restore callee saved regs by simply re-adjusting SP
  *-------------------------------------------------------------*/
 .macro DISCARD_CALLEE_SAVED_USER
-	add     sp, sp, 14 * 4
-.endm
-
-/*--------------------------------------------------------------
- * Restore User mode r25 saved in task_struct->thread.user_r25
- *-------------------------------------------------------------*/
-.macro RESTORE_USER_R25
-	ld  r25, [r25, TASK_THREAD + THREAD_USER_R25]
+	add     sp, sp, SZ_CALLEE_REGS
 .endm
 
 /*-------------------------------------------------------------
@@ -252,7 +235,7 @@
 	ld  \out, [\tsk, TASK_THREAD_INFO]
 
 	/* Go to end of page where stack begins (grows upwards) */
-	add2 \out, \out, (THREAD_SIZE - 4)/4   /* one word GUTTER */
+	add2 \out, \out, (THREAD_SIZE)/4
 
 .endm
 
@@ -305,33 +288,28 @@
 	 * safe-keeping not really needed, but it keeps the epilogue code
 	 * (SP restore) simpler/uniform.
 	 */
-	b.d	77f
-
-	st.a	sp, [sp, -12]	; Make room for orig_r0 and orig_r8
+	b.d	66f
+	mov	r9, sp
 
 88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
 
 	GET_CURR_TASK_ON_CPU   r9
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-
-	/* If current task pointer cached in r25, time to
-	 *  -safekeep USER r25 in task->thread_struct->user_r25
-	 *  -load r25 with current task ptr
-	 */
-	st.as	r25, [r9, (TASK_THREAD + THREAD_USER_R25)/4]
-	mov	r25, r9
-#endif
-
 	/* With current tsk in r9, get it's kernel mode stack base */
 	GET_TSK_STACK_BASE  r9, r9
 
-#ifdef PT_REGS_CANARY
-	st	0xabcdabcd, [r9, 0]
+66:
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/*
+	 * Treat r25 as scratch reg, save it on stack first
+	 * Load it with current task pointer
+	 */
+	st	r25, [r9, -4]
+	GET_CURR_TASK_ON_CPU   r25
 #endif
 
 	/* Save Pre Intr/Exception User SP on kernel stack */
-	st.a    sp, [r9, -12]	; Make room for orig_r0 and orig_r8
+	st.a    sp, [r9, -16]	; Make room for orig_r0, ECR, user_r25
 
 	/* CAUTION:
 	 * SP should be set at the very end when we are done with everything
@@ -342,7 +320,7 @@
 	/* set SP to point to kernel mode stack */
 	mov sp, r9
 
-77: /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */
+	/* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */
 
 .endm
 
@@ -369,7 +347,7 @@
  * @reg [OUT] &thread_info of "current"
  */
 .macro GET_CURR_THR_INFO_FROM_SP  reg
-	and \reg, sp, ~(THREAD_SIZE - 1)
+	bic \reg, sp, (THREAD_SIZE - 1)
 .endm
 
 /*
@@ -413,62 +391,25 @@
  * Note that syscalls are implemented via TRAP which is also a exception
  * from CPU's point of view
  *-------------------------------------------------------------*/
-.macro SAVE_ALL_EXCEPTION   marker
+.macro SAVE_ALL_SYS
 
-	st      \marker, [sp, 8]	/* orig_r8 */
+	lr	r9, [ecr]
+	st      r9, [sp, 8]    /* ECR */
 	st      r0, [sp, 4]    /* orig_r0, needed only for sys calls */
 
 	/* Restore r9 used to code the early prologue */
 	EXCPN_PROLOG_RESTORE_REG  r9
 
-	SAVE_CALLER_SAVED
-	st.a    r26, [sp, -4]   /* gp */
-	st.a    fp, [sp, -4]
-	st.a    blink, [sp, -4]
-	lr	r9, [eret]
-	st.a    r9, [sp, -4]
-	lr	r9, [erstatus]
-	st.a    r9, [sp, -4]
-	st.a    lp_count, [sp, -4]
-	lr	r9, [lp_end]
-	st.a    r9, [sp, -4]
-	lr	r9, [lp_start]
-	st.a    r9, [sp, -4]
-	lr	r9, [erbta]
-	st.a    r9, [sp, -4]
-
-#ifdef PT_REGS_CANARY
-	mov   r9, 0xdeadbeef
-	st    r9, [sp, -4]
-#endif
-
-	/* move up by 1 word to "create" pt_regs->"stack_place_holder" */
-	sub sp, sp, 4
-.endm
-
-/*--------------------------------------------------------------
- * Save scratch regs for exceptions
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_SYS
-	SAVE_ALL_EXCEPTION  orig_r8_IS_EXCPN
-.endm
-
-/*--------------------------------------------------------------
- * Save scratch regs for sys calls
- *-------------------------------------------------------------*/
-.macro SAVE_ALL_TRAP
-	/*
-	 * Setup pt_regs->orig_r8.
-	 * Encode syscall number (r8) in upper short word of event type (r9)
-	 * N.B. #1: This is already endian safe (see ptrace.h)
-	 *      #2: Only r9 can be used as scratch as it is already clobbered
-	 *          and it's contents are no longer needed by the latter part
-	 *          of exception prologue
-	 */
-	lsl  r9, r8, 16
-	or   r9, r9, orig_r8_IS_SCALL
-
-	SAVE_ALL_EXCEPTION  r9
+	SAVE_R0_TO_R12
+	PUSH	gp
+	PUSH	fp
+	PUSH	blink
+	PUSHAX	eret
+	PUSHAX	erstatus
+	PUSH	lp_count
+	PUSHAX	lp_end
+	PUSHAX	lp_start
+	PUSHAX	erbta
 .endm
 
 /*--------------------------------------------------------------
@@ -483,28 +424,22 @@
  * by hardware and that is not good.
  *-------------------------------------------------------------*/
 .macro RESTORE_ALL_SYS
+	POPAX	erbta
+	POPAX	lp_start
+	POPAX	lp_end
+
+	POP	r9
+	mov	lp_count, r9	;LD to lp_count is not allowed
 
-	add sp, sp, 4       /* hop over unused "pt_regs->stack_place_holder" */
-
-	ld.ab   r9, [sp, 4]
-	sr	r9, [erbta]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_start]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_end]
-	ld.ab   r9, [sp, 4]
-	mov	lp_count, r9
-	ld.ab   r9, [sp, 4]
-	sr	r9, [erstatus]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [eret]
-	ld.ab   blink, [sp, 4]
-	ld.ab   fp, [sp, 4]
-	ld.ab   r26, [sp, 4]    /* gp */
-	RESTORE_CALLER_SAVED
+	POPAX	erstatus
+	POPAX	eret
+	POP	blink
+	POP	fp
+	POP	gp
+	RESTORE_R12_TO_R0
 
 	ld  sp, [sp] /* restore original sp */
-	/* orig_r0 and orig_r8 skipped automatically */
+	/* orig_r0, ECR, user_r25 skipped automatically */
 .endm
 
 
@@ -513,9 +448,7 @@
  *-------------------------------------------------------------*/
 .macro SAVE_ALL_INT1
 
-	/* restore original r9 , saved in int1_saved_reg
-	* It will be saved on stack in macro: SAVE_CALLER_SAVED
-	*/
+	/* restore original r9 to be saved as part of reg-file */
 #ifdef CONFIG_SMP
 	lr  r9, [ARC_REG_SCRATCH_DATA0]
 #else
@@ -523,29 +456,19 @@
 #endif
 
 	/* now we are ready to save the remaining context :) */
-	st      orig_r8_IS_IRQ1, [sp, 8]    /* Event Type */
+	st      event_IRQ1, [sp, 8]    /* Dummy ECR */
 	st      0, [sp, 4]    /* orig_r0 , N/A for IRQ */
-	SAVE_CALLER_SAVED
-	st.a    r26, [sp, -4]   /* gp */
-	st.a    fp, [sp, -4]
-	st.a    blink, [sp, -4]
-	st.a    ilink1, [sp, -4]
-	lr	r9, [status32_l1]
-	st.a    r9, [sp, -4]
-	st.a    lp_count, [sp, -4]
-	lr	r9, [lp_end]
-	st.a    r9, [sp, -4]
-	lr	r9, [lp_start]
-	st.a    r9, [sp, -4]
-	lr	r9, [bta_l1]
-	st.a    r9, [sp, -4]
-
-#ifdef PT_REGS_CANARY
-	mov   r9, 0xdeadbee1
-	st    r9, [sp, -4]
-#endif
-	/* move up by 1 word to "create" pt_regs->"stack_place_holder" */
-	sub sp, sp, 4
+
+	SAVE_R0_TO_R12
+	PUSH	gp
+	PUSH	fp
+	PUSH	blink
+	PUSH	ilink1
+	PUSHAX	status32_l1
+	PUSH	lp_count
+	PUSHAX	lp_end
+	PUSHAX	lp_start
+	PUSHAX	bta_l1
 .endm
 
 .macro SAVE_ALL_INT2
@@ -558,30 +481,19 @@
 	ld  r9, [@int2_saved_reg]
 
 	/* now we are ready to save the remaining context :) */
-	st      orig_r8_IS_IRQ2, [sp, 8]    /* Event Type */
+	st      event_IRQ2, [sp, 8]    /* Dummy ECR */
 	st      0, [sp, 4]    /* orig_r0 , N/A for IRQ */
-	SAVE_CALLER_SAVED
-	st.a    r26, [sp, -4]   /* gp */
-	st.a    fp, [sp, -4]
-	st.a    blink, [sp, -4]
-	st.a    ilink2, [sp, -4]
-	lr	r9, [status32_l2]
-	st.a    r9, [sp, -4]
-	st.a    lp_count, [sp, -4]
-	lr	r9, [lp_end]
-	st.a    r9, [sp, -4]
-	lr	r9, [lp_start]
-	st.a    r9, [sp, -4]
-	lr	r9, [bta_l2]
-	st.a    r9, [sp, -4]
-
-#ifdef PT_REGS_CANARY
-	mov   r9, 0xdeadbee2
-	st    r9, [sp, -4]
-#endif
 
-	/* move up by 1 word to "create" pt_regs->"stack_place_holder" */
-	sub sp, sp, 4
+	SAVE_R0_TO_R12
+	PUSH	gp
+	PUSH	fp
+	PUSH	blink
+	PUSH	ilink2
+	PUSHAX	status32_l2
+	PUSH	lp_count
+	PUSHAX	lp_end
+	PUSHAX	lp_start
+	PUSHAX	bta_l2
 .endm
 
 /*--------------------------------------------------------------
@@ -595,52 +507,41 @@
  *-------------------------------------------------------------*/
 
 .macro RESTORE_ALL_INT1
-	add sp, sp, 4       /* hop over unused "pt_regs->stack_place_holder" */
-
-	ld.ab   r9, [sp, 4] /* Actual reg file */
-	sr	r9, [bta_l1]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_start]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_end]
-	ld.ab   r9, [sp, 4]
-	mov	lp_count, r9
-	ld.ab   r9, [sp, 4]
-	sr	r9, [status32_l1]
-	ld.ab   r9, [sp, 4]
-	mov	ilink1, r9
-	ld.ab   blink, [sp, 4]
-	ld.ab   fp, [sp, 4]
-	ld.ab   r26, [sp, 4]    /* gp */
-	RESTORE_CALLER_SAVED
+	POPAX	bta_l1
+	POPAX	lp_start
+	POPAX	lp_end
+
+	POP	r9
+	mov	lp_count, r9	;LD to lp_count is not allowed
+
+	POPAX	status32_l1
+	POP	ilink1
+	POP	blink
+	POP	fp
+	POP	gp
+	RESTORE_R12_TO_R0
 
 	ld  sp, [sp] /* restore original sp */
-	/* orig_r0 and orig_r8 skipped automatically */
+	/* orig_r0, ECR, user_r25 skipped automatically */
 .endm
 
 .macro RESTORE_ALL_INT2
-	add sp, sp, 4       /* hop over unused "pt_regs->stack_place_holder" */
-
-	ld.ab   r9, [sp, 4]
-	sr	r9, [bta_l2]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_start]
-	ld.ab   r9, [sp, 4]
-	sr	r9, [lp_end]
-	ld.ab   r9, [sp, 4]
-	mov	lp_count, r9
-	ld.ab   r9, [sp, 4]
-	sr	r9, [status32_l2]
-	ld.ab   r9, [sp, 4]
-	mov	ilink2, r9
-	ld.ab   blink, [sp, 4]
-	ld.ab   fp, [sp, 4]
-	ld.ab   r26, [sp, 4]    /* gp */
-	RESTORE_CALLER_SAVED
+	POPAX	bta_l2
+	POPAX	lp_start
+	POPAX	lp_end
 
-	ld  sp, [sp] /* restore original sp */
-	/* orig_r0 and orig_r8 skipped automatically */
+	POP	r9
+	mov	lp_count, r9	;LD to lp_count is not allowed
 
+	POPAX	status32_l2
+	POP	ilink2
+	POP	blink
+	POP	fp
+	POP	gp
+	RESTORE_R12_TO_R0
+
+	ld  sp, [sp] /* restore original sp */
+	/* orig_r0, ECR, user_r25 skipped automatically */
 .endm
 
 
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 57898a17eb8..c0a72105ee0 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -21,6 +21,6 @@
 extern void __init arc_init_IRQ(void);
 extern int __init get_hw_config_num_irq(void);
 
-void __cpuinit arc_local_timer_setup(unsigned int cpu);
+void arc_local_timer_setup(unsigned int cpu);
 
 #endif
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index eac07166820..d99f79bcf86 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -19,6 +19,26 @@
 
 #include <asm/arcregs.h>
 
+/* status32 Reg bits related to Interrupt Handling */
+#define STATUS_E1_BIT		1	/* Int 1 enable */
+#define STATUS_E2_BIT		2	/* Int 2 enable */
+#define STATUS_A1_BIT		3	/* Int 1 active */
+#define STATUS_A2_BIT		4	/* Int 2 active */
+
+#define STATUS_E1_MASK		(1<<STATUS_E1_BIT)
+#define STATUS_E2_MASK		(1<<STATUS_E2_BIT)
+#define STATUS_A1_MASK		(1<<STATUS_A1_BIT)
+#define STATUS_A2_MASK		(1<<STATUS_A2_BIT)
+
+/* Other Interrupt Handling related Aux regs */
+#define AUX_IRQ_LEV		0x200	/* IRQ Priority: L1 or L2 */
+#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
+#define AUX_IRQ_LV12		0x43	/* interrupt level register */
+
+#define AUX_IENABLE		0x40c
+#define AUX_ITRIGGER		0x40d
+#define AUX_IPULSE		0x415
+
 #ifndef __ASSEMBLY__
 
 /******************************************************************
diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index 4930957ca3d..b65fca7ffeb 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -31,7 +31,7 @@ static inline void arch_kgdb_breakpoint(void)
 	__asm__ __volatile__ ("trap_s	0x4\n");
 }
 
-extern void kgdb_trap(struct pt_regs *regs, int param);
+extern void kgdb_trap(struct pt_regs *regs);
 
 enum arc700_linux_regnums {
 	_R0		= 0,
@@ -53,7 +53,7 @@ enum arc700_linux_regnums {
 };
 
 #else
-#define kgdb_trap(regs, param)
+#define kgdb_trap(regs)
 #endif
 
 #endif	/* __ARC_KGDB_H__ */
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 4d9c211fce7..944dbedb38b 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -50,11 +50,9 @@ struct kprobe_ctlblk {
 
 int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
 void kretprobe_trampoline(void);
-void trap_is_kprobe(unsigned long cause, unsigned long address,
-			   struct pt_regs *regs);
+void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 #else
-static void trap_is_kprobe(unsigned long cause, unsigned long address,
-			   struct pt_regs *regs)
+static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
 {
 }
 #endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 56b02320f1a..7c03fe61759 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -9,6 +9,40 @@
 #ifndef _ASM_ARC_MMU_H
 #define _ASM_ARC_MMU_H
 
+#if defined(CONFIG_ARC_MMU_V1)
+#define CONFIG_ARC_MMU_VER 1
+#elif defined(CONFIG_ARC_MMU_V2)
+#define CONFIG_ARC_MMU_VER 2
+#elif defined(CONFIG_ARC_MMU_V3)
+#define CONFIG_ARC_MMU_VER 3
+#endif
+
+/* MMU Management regs */
+#define ARC_REG_MMU_BCR		0x06f
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+
+/* Bits in MMU PID register */
+#define MMU_ENABLE		(1 << 31)	/* Enable MMU for process */
+
+/* Error code if probe fails */
+#define TLB_LKUP_ERR		0x80000000
+
+/* TLB Commands */
+#define TLBWrite    0x1
+#define TLBRead     0x2
+#define TLBGetIndex 0x3
+#define TLBProbe    0x4
+
+#if (CONFIG_ARC_MMU_VER >= 2)
+#define TLBWriteNI  0x5		/* write JTLB without inv uTLBs */
+#define TLBIVUTLB   0x6		/* explicitly inv uTLBs */
+#endif
+
 #ifndef __ASSEMBLY__
 
 typedef struct {
@@ -18,6 +52,16 @@ typedef struct {
 #endif
 } mm_context_t;
 
+#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
+#else
+#define tlb_paranoid_check(a, b)
 #endif
 
+void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+void __init read_decode_mmu_bcr(void);
+
+#endif	/* !__ASSEMBLY__ */
+
 #endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index ab84bf131fe..9c8aa41e45c 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -96,13 +96,8 @@ typedef unsigned long pgtable_t;
 
 #define virt_addr_valid(kaddr)  pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
-/* Default Permissions for page, used in mmap.c */
-#ifdef CONFIG_ARC_STACK_NONEXEC
+/* Default Permissions for stack/heaps pages (Non Executable) */
 #define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE)
-#else
-#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#endif
 
 #define WANT_PAGE_VIRTUAL   1
 
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index c110ac87d22..4749a0eee1c 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -135,6 +135,12 @@
 /* ioremap */
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
+			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
+			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
+
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
  *
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 5f26b2c1cba..15334ab66b5 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -19,6 +19,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/arcregs.h>	/* for STATUS_E1_MASK et all */
+#include <asm/ptrace.h>
 
 /* Arch specific stuff which needs to be saved per task.
  * However these items are not so important so as to earn a place in
@@ -28,10 +29,6 @@ struct thread_struct {
 	unsigned long ksp;	/* kernel mode stack pointer */
 	unsigned long callee_reg;	/* pointer to callee regs */
 	unsigned long fault_address;	/* dbls as brkpt holder as well */
-	unsigned long cause_code;	/* Exception Cause Code (ECR) */
-#ifdef CONFIG_ARC_CURR_IN_REG
-	unsigned long user_r25;
-#endif
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
@@ -50,7 +47,7 @@ struct task_struct;
 unsigned long thread_saved_pc(struct task_struct *t);
 
 #define task_pt_regs(p) \
-	((struct pt_regs *)(THREAD_SIZE - 4 + (void *)task_stack_page(p)) - 1)
+	((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
 
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while (0)
@@ -75,11 +72,15 @@ unsigned long thread_saved_pc(struct task_struct *t);
 
 /*
  * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode.
- * These can't be derived from pt_regs as that would give correp user-mode val
+ * Look in process.c for details of kernel stack layout
  */
 #define KSTK_ESP(tsk)   (tsk->thread.ksp)
-#define KSTK_BLINK(tsk) (*((unsigned int *)((KSTK_ESP(tsk)) + (13+1+1)*4)))
-#define KSTK_FP(tsk)    (*((unsigned int *)((KSTK_ESP(tsk)) + (13+1)*4)))
+
+#define KSTK_REG(tsk, off)	(*((unsigned int *)(KSTK_ESP(tsk) + \
+					sizeof(struct callee_regs) + off)))
+
+#define KSTK_BLINK(tsk) KSTK_REG(tsk, 4)
+#define KSTK_FP(tsk)    KSTK_REG(tsk, 0)
 
 /*
  * Do necessary setup to start up a newly executed thread.
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 6179de7e07c..c9938e7a7db 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -17,12 +17,6 @@
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
 struct pt_regs {
-	/*
-	 * 1 word gutter after reg-file has been saved
-	 * Technically not needed, Since SP always points to a "full" location
-	 * (vs. "empty"). But pt_regs is shared with tools....
-	 */
-	long res;
 
 	/* Real registers */
 	long bta;	/* bta_l1, bta_l2, erbta */
@@ -50,22 +44,32 @@ struct pt_regs {
 	long sp;	/* user/kernel sp depending on where we came from  */
 	long orig_r0;
 
-	/*to distinguish bet excp, syscall, irq */
+	/*
+	 * To distinguish bet excp, syscall, irq
+	 * For traps and exceptions, Exception Cause Register.
+	 * 	ECR: <00> <VV> <CC> <PP>
+	 * 	Last word used by Linux for extra state mgmt (syscall-restart)
+	 * For interrupts, use artificial ECR values to note current prio-level
+	 */
 	union {
+		struct {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-		/* so that assembly code is same for LE/BE */
-		unsigned long orig_r8:16, event:16;
+			unsigned long state:8, ecr_vec:8,
+				      ecr_cause:8, ecr_param:8;
 #else
-		unsigned long event:16, orig_r8:16;
+			unsigned long ecr_param:8, ecr_cause:8,
+				      ecr_vec:8, state:8;
 #endif
-		long orig_r8_word;
+		};
+		unsigned long event;
 	};
+
+	long user_r25;
 };
 
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long res;	/* Again this is not needed */
 	long r25;
 	long r24;
 	long r23;
@@ -99,18 +103,20 @@ struct callee_regs {
 /* return 1 if PC in delay slot */
 #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
 
-#define in_syscall(regs)    (regs->event & orig_r8_IS_SCALL)
-#define in_brkpt_trap(regs) (regs->event & orig_r8_IS_BRKPT)
+#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
+#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+
+#define STATE_SCALL_RESTARTED	0x01
 
-#define syscall_wont_restart(regs) (regs->event |= orig_r8_IS_SCALL_RESTARTED)
-#define syscall_restartable(regs) !(regs->event &  orig_r8_IS_SCALL_RESTARTED)
+#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
 
 #define current_pt_regs()					\
 ({								\
 	/* open-coded current_thread_info() */			\
 	register unsigned long sp asm ("sp");			\
 	unsigned long pg_start = (sp & ~(THREAD_SIZE - 1));	\
-	(struct pt_regs *)(pg_start + THREAD_SIZE - 4) - 1;	\
+	(struct pt_regs *)(pg_start + THREAD_SIZE) - 1;	\
 })
 
 static inline long regs_return_value(struct pt_regs *regs)
@@ -120,11 +126,4 @@ static inline long regs_return_value(struct pt_regs *regs)
 
 #endif /* !__ASSEMBLY__ */
 
-#define orig_r8_IS_SCALL		0x0001
-#define orig_r8_IS_SCALL_RESTARTED	0x0002
-#define orig_r8_IS_BRKPT		0x0004
-#define orig_r8_IS_EXCPN		0x0008
-#define orig_r8_IS_IRQ1			0x0010
-#define orig_r8_IS_IRQ2			0x0020
-
 #endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 33ab3048e9b..29de0980430 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -18,7 +18,7 @@ static inline long
 syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
 	if (user_mode(regs) && in_syscall(regs))
-		return regs->orig_r8;
+		return regs->r8;
 	else
 		return -1;
 }
@@ -26,8 +26,7 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 static inline void
 syscall_rollback(struct task_struct *task, struct pt_regs *regs)
 {
-	/* XXX: I can't fathom how pt_regs->r8 will be clobbered ? */
-	regs->r8 = regs->orig_r8;
+	regs->r0 = regs->orig_r0;
 }
 
 static inline long
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
index a5ff961b1ef..8a1ec96012a 100644
--- a/arch/arc/include/asm/tlb-mmu1.h
+++ b/arch/arc/include/asm/tlb-mmu1.h
@@ -9,9 +9,9 @@
 #ifndef __ASM_TLB_MMU_V1_H__
 #define __ASM_TLB_MMU_V1_H__
 
-#if defined(__ASSEMBLY__) && defined(CONFIG_ARC_MMU_VER == 1)
+#include <asm/mmu.h>
 
-#include <asm/tlb.h>
+#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
 
 .macro TLB_WRITE_HEURISTICS
 
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index cb0c708ca66..a9db5f62aaf 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -9,18 +9,6 @@
 #ifndef _ASM_ARC_TLB_H
 #define _ASM_ARC_TLB_H
 
-#ifdef __KERNEL__
-
-#include <asm/pgtable.h>
-
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
-#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
-			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
-			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
-
-#ifndef __ASSEMBLY__
-
 #define tlb_flush(tlb)				\
 do {						\
 	if (tlb->fullmm)			\
@@ -56,18 +44,4 @@ do {									\
 #include <linux/pagemap.h>
 #include <asm-generic/tlb.h>
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
-#endif
-
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void __init read_decode_mmu_bcr(void);
-
-#endif	/* __ASSEMBLY__ */
-
-#endif	/* __KERNEL__ */
-
 #endif /* _ASM_ARC_TLB_H */
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 5dbe63f17b6..60702f3751d 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -16,11 +16,11 @@
 
 #ifdef CONFIG_ARC_MISALIGN_ACCESS
 int misaligned_fixup(unsigned long address, struct pt_regs *regs,
-		     unsigned long cause, struct callee_regs *cregs);
+		     struct callee_regs *cregs);
 #else
 static inline int
 misaligned_fixup(unsigned long address, struct pt_regs *regs,
-		 unsigned long cause, struct callee_regs *cregs)
+		 struct callee_regs *cregs)
 {
 	return 0;
 }
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 30333cec0fe..2618cc13ba7 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -20,28 +20,31 @@
  *
  * This is to decouple pt_regs from user-space ABI, to be able to change it
  * w/o affecting the ABI.
- * Although the layout (initial padding) is similar to pt_regs to have some
- * optimizations when copying pt_regs to/from user_regs_struct.
+ *
+ * The intermediate pad,pad2 are relics of initial layout based on pt_regs
+ * for optimizations when copying pt_regs to/from user_regs_struct.
+ * We no longer need them, but can't be changed as they are part of ABI now.
  *
  * Also, sigcontext only care about the scratch regs as that is what we really
- * save/restore for signal handling.
+ * save/restore for signal handling. However gdb also uses the same struct
+ * hence callee regs need to be in there too.
 */
 struct user_regs_struct {
 
+	long pad;
 	struct {
-		long pad;
 		long bta, lp_start, lp_end, lp_count;
 		long status32, ret, blink, fp, gp;
 		long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
 		long sp;
 	} scratch;
+	long pad2;
 	struct {
-		long pad;
 		long r25, r24, r23, r22, r21, r20;
 		long r19, r18, r17, r16, r15, r14, r13;
 	} callee;
 	long efa;	/* break pt addr, for break points in delay slots */
-	long stop_pc;	/* give dbg stop_pc directly after checking orig_r8 */
+	long stop_pc;	/* give dbg stop_pc after ensuring brkpt trap */
 };
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 7dcda702524..6c3aa0edb9b 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -24,9 +24,6 @@ int main(void)
 
 	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
 	DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
-#ifdef CONFIG_ARC_CURR_IN_REG
-	DEFINE(THREAD_USER_R25, offsetof(struct thread_struct, user_r25));
-#endif
 	DEFINE(THREAD_FAULT_ADDR,
 	       offsetof(struct thread_struct, fault_address));
 
@@ -49,7 +46,7 @@ int main(void)
 	BLANK();
 
 	DEFINE(PT_status32, offsetof(struct pt_regs, status32));
-	DEFINE(PT_orig_r8, offsetof(struct pt_regs, orig_r8_word));
+	DEFINE(PT_event, offsetof(struct pt_regs, event));
 	DEFINE(PT_sp, offsetof(struct pt_regs, sp));
 	DEFINE(PT_r0, offsetof(struct pt_regs, r0));
 	DEFINE(PT_r1, offsetof(struct pt_regs, r1));
@@ -60,5 +57,7 @@ int main(void)
 	DEFINE(PT_r6, offsetof(struct pt_regs, r6));
 	DEFINE(PT_r7, offsetof(struct pt_regs, r7));
 
+	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
 	return 0;
 }
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 60844dac613..34410eb1a30 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -23,10 +23,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 	unsigned int tmp;
 	unsigned int prev = (unsigned int)prev_task;
 	unsigned int next = (unsigned int)next_task;
-	int num_words_to_skip = 1;
-#ifdef CONFIG_ARC_CURR_IN_REG
-	num_words_to_skip++;
-#endif
 
 	__asm__ __volatile__(
 		/* FP/BLINK save generated by gcc (standard function prologue */
@@ -44,8 +40,9 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 		"st.a    r24, [sp, -4]   \n\t"
 #ifndef CONFIG_ARC_CURR_IN_REG
 		"st.a    r25, [sp, -4]   \n\t"
+#else
+		"sub     sp, sp, 4      \n\t"	/* usual r25 placeholder */
 #endif
-		"sub     sp, sp, %4      \n\t"	/* create gutter at top */
 
 		/* set ksp of outgoing task in tsk->thread.ksp */
 		"st.as   sp, [%3, %1]    \n\t"
@@ -76,10 +73,10 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 
 		/* start loading it's CALLEE reg file */
 
-		"add    sp, sp, %4     \n\t"	/* skip gutter at top */
-
 #ifndef CONFIG_ARC_CURR_IN_REG
 		"ld.ab   r25, [sp, 4]   \n\t"
+#else
+		"add    sp, sp, 4       \n\t"
 #endif
 		"ld.ab   r24, [sp, 4]   \n\t"
 		"ld.ab   r23, [sp, 4]   \n\t"
@@ -100,8 +97,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 		/* FP/BLINK restore generated by gcc (standard func epilogue */
 
 		: "=r"(tmp)
-		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev),
-		  "n"(num_words_to_skip * 4)
+		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev)
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 0c6d664d4a8..1d7165156e1 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -142,7 +142,7 @@ VECTOR   reserved                ; Reserved Exceptions
 .endr
 
 #include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
-#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,SYS...} */
 #include <asm/errno.h>
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
@@ -274,10 +274,8 @@ ARC_ENTRY instr_service
 	SWITCH_TO_KERNEL_STK
 	SAVE_ALL_SYS
 
-	lr  r0, [ecr]
-	lr  r1, [efa]
-
-	mov r2, sp
+	lr  r0, [efa]
+	mov r1, sp
 
 	FAKE_RET_FROM_EXCPN r9
 
@@ -298,9 +296,8 @@ ARC_ENTRY mem_service
 	SWITCH_TO_KERNEL_STK
 	SAVE_ALL_SYS
 
-	lr  r0, [ecr]
-	lr  r1, [efa]
-	mov r2, sp
+	lr  r0, [efa]
+	mov r1, sp
 	bl  do_memory_error
 	b   ret_from_exception
 ARC_EXIT mem_service
@@ -317,11 +314,14 @@ ARC_ENTRY EV_MachineCheck
 	SWITCH_TO_KERNEL_STK
 	SAVE_ALL_SYS
 
-	lr  r0, [ecr]
-	lr  r1, [efa]
-	mov r2, sp
+	lr  r2, [ecr]
+	lr  r0, [efa]
+	mov r1, sp
+
+	lsr  	r3, r2, 8
+	bmsk 	r3, r3, 7
+	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
 
-	brne    r0, 0x200100, 1f
 	bl      do_tlb_overlap_fault
 	b       ret_from_exception
 
@@ -355,8 +355,8 @@ ARC_ENTRY EV_TLBProtV
 	;  ecr and efa were not saved in case an Intr sneaks in
 	;  after fake rtie
 	;
-	lr  r3, [ecr]
-	lr  r4, [efa]
+	lr  r2, [ecr]
+	lr  r1, [efa]	; Faulting Data address
 
 	; --------(4) Return from CPU Exception Mode ---------
 	;  Fake a rtie, but rtie to next label
@@ -368,31 +368,25 @@ ARC_ENTRY EV_TLBProtV
 	;------ (5) Type of Protection Violation? ----------
 	;
 	; ProtV Hardware Exception is triggered for Access Faults of 2 types
-	;   -Access Violaton (WRITE to READ ONLY Page) - for linux COW
-	;   -Unaligned Access (READ/WRITE on odd boundary)
+	;   -Access Violaton	: 00_23_(00|01|02|03)_00
+	;			         x  r  w  r+w
+	;   -Unaligned Access	: 00_23_04_00
 	;
-	cmp r3, 0x230400    ; Misaligned data access ?
-	beq 4f
+	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
 	;========= (6a) Access Violation Processing ========
-	cmp r3, 0x230100
-	mov r1, 0x0              ; if LD exception ? write = 0
-	mov.ne r1, 0x1           ; else write = 1
-
-	mov r2, r4              ; faulting address
 	mov r0, sp              ; pt_regs
 	bl  do_page_fault
 	b   ret_from_exception
 
 	;========== (6b) Non aligned access ============
 4:
-	mov r0, r3              ; cause code
-	mov r1, r4              ; faulting address
-	mov r2, sp              ; pt_regs
+	mov r0, r1
+	mov r1, sp              ; pt_regs
 
 #ifdef  CONFIG_ARC_MISALIGN_ACCESS
 	SAVE_CALLEE_SAVED_USER
-	mov r3, sp              ; callee_regs
+	mov r2, sp              ; callee_regs
 
 	bl  do_misaligned_access
 
@@ -419,9 +413,8 @@ ARC_ENTRY EV_PrivilegeV
 	SWITCH_TO_KERNEL_STK
 	SAVE_ALL_SYS
 
-	lr  r0, [ecr]
-	lr  r1, [efa]
-	mov r2, sp
+	lr  r0, [efa]
+	mov r1, sp
 
 	FAKE_RET_FROM_EXCPN r9
 
@@ -440,9 +433,8 @@ ARC_ENTRY EV_Extension
 	SWITCH_TO_KERNEL_STK
 	SAVE_ALL_SYS
 
-	lr  r0, [ecr]
-	lr  r1, [efa]
-	mov r2, sp
+	lr  r0, [efa]
+	mov r1, sp
 	bl  do_extension_fault
 	b   ret_from_exception
 ARC_EXIT EV_Extension
@@ -498,11 +490,8 @@ tracesys_exit:
 trap_with_param:
 
 	; stop_pc info by gdb needs this info
-	stw orig_r8_IS_BRKPT, [sp, PT_orig_r8]
-
-	mov r0, r12
-	lr  r1, [efa]
-	mov r2, sp
+	lr  r0, [efa]
+	mov r1, sp
 
 	; Now that we have read EFA, its safe to do "fake" rtie
 	;   and get out of CPU exception mode
@@ -544,11 +533,11 @@ ARC_ENTRY EV_Trap
 	lr  r9, [erstatus]
 
 	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_TRAP
+	SAVE_ALL_SYS
 
 	;------- (4) What caused the Trap --------------
 	lr     r12, [ecr]
-	and.f  0, r12, ECR_PARAM_MASK
+	bmsk.f 0, r12, 7
 	bnz    trap_with_param
 
 	; ======= (5a) Trap is due to System Call ========
@@ -589,11 +578,7 @@ ARC_ENTRY ret_from_exception
 	; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32
 	ld  r8, [sp, PT_status32]   ; returning to User/Kernel Mode
 
-#ifdef CONFIG_PREEMPT
 	bbit0  r8, STATUS_U_BIT, resume_kernel_mode
-#else
-	bbit0  r8, STATUS_U_BIT, restore_regs
-#endif
 
 	; Before returning to User mode check-for-and-complete any pending work
 	; such as rescheduling/signal-delivery etc.
@@ -653,10 +638,10 @@ resume_user_mode_begin:
 	b      resume_user_mode_begin	; unconditionally back to U mode ret chks
 					; for single exit point from this block
 
-#ifdef CONFIG_PREEMPT
-
 resume_kernel_mode:
 
+#ifdef CONFIG_PREEMPT
+
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
@@ -687,17 +672,6 @@ restore_regs :
 	; XXX can this be optimised out
 	IRQ_DISABLE_SAVE    r9, r10	;@r10 has prisitine (pre-disable) copy
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-	; Restore User R25
-	; Earlier this used to be only for returning to user mode
-	; However with 2 levels of IRQ this can also happen even if
-	; in kernel mode
-	ld r9, [sp, PT_sp]
-	brhs r9, VMALLOC_START, 8f
-	RESTORE_USER_R25
-8:
-#endif
-
 	; Restore REG File. In case multiple Events outstanding,
 	; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
 	; Note that we use realtime STATUS32 (not pt_regs->status32) to
@@ -714,28 +688,33 @@ not_exception:
 
 #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
 
+	; Level 2 interrupt return Path - from hardware standpoint
 	bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
 
 	;------------------------------------------------------------------
+	; However the context returning might not have taken L2 intr itself
+	; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+	; Special considerations needed for the context which took L2 intr
+
+	ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
+	brne r9, event_IRQ2, 149f
+
+	;------------------------------------------------------------------
 	; if L2 IRQ interrupted a L1 ISR,  we'd disbaled preemption earlier
 	; so that sched doesnt move to new task, causing L1 to be delayed
 	; undeterministically. Now that we've achieved that, lets reset
 	; things to what they were, before returning from L2 context
 	;----------------------------------------------------------------
 
-	ldw  r9, [sp, PT_orig_r8]      ; get orig_r8 to make sure it is
-	brne r9, orig_r8_IS_IRQ2, 149f ; infact a L2 ISR ret path
-
 	ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
 	bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
 
-	; A1 is set in status32_l2
 	; decrement thread_info->preempt_count (re-enable preemption)
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
 
 	; paranoid check, given A1 was active when A2 happened, preempt count
-	; must not be 0 beccause we would have incremented it.
+	; must not be 0 because we would have incremented it.
 	; If this does happen we simply HALT as it means a BUG !!!
 	cmp     r9, 0
 	bnz     2f
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 006dec3fc35..2a913f85a74 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -27,6 +27,8 @@ stext:
 	; Don't clobber r0-r4 yet. It might have bootloader provided info
 	;-------------------------------------------------------------------
 
+	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+
 #ifdef CONFIG_SMP
 	; Only Boot (Master) proceeds. Others wait in platform dependent way
 	;	IDENTITY Reg [ 3  2  1  0 ]
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 8115fa53157..305b3f866aa 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -28,25 +28,17 @@
  * -Disable all IRQs (on CPU side)
  * -Optionally, setup the High priority Interrupts as Level 2 IRQs
  */
-void __cpuinit arc_init_IRQ(void)
+void arc_init_IRQ(void)
 {
 	int level_mask = 0;
 
-	write_aux_reg(AUX_INTR_VEC_BASE, _int_vec_base_lds);
-
 	/* Disable all IRQs: enable them as devices request */
 	write_aux_reg(AUX_IENABLE, 0);
 
        /* setup any high priority Interrupts (Level2 in ARCompact jargon) */
-#ifdef CONFIG_ARC_IRQ3_LV2
-	level_mask |= (1 << 3);
-#endif
-#ifdef CONFIG_ARC_IRQ5_LV2
-	level_mask |= (1 << 5);
-#endif
-#ifdef CONFIG_ARC_IRQ6_LV2
-	level_mask |= (1 << 6);
-#endif
+	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3;
+	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
+	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
 
 	if (level_mask) {
 		pr_info("Level-2 interrupts bitset %x\n", level_mask);
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index 52bdc83c149..a7698fb1481 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -169,7 +169,7 @@ int kgdb_arch_init(void)
 	return 0;
 }
 
-void kgdb_trap(struct pt_regs *regs, int param)
+void kgdb_trap(struct pt_regs *regs)
 {
 	/* trap_s 3 is used for breakpoints that overwrite existing
 	 * instructions, while trap_s 4 is used for compiled breakpoints.
@@ -181,7 +181,7 @@ void kgdb_trap(struct pt_regs *regs, int param)
 	 * with trap_s 4 (compiled) breakpoints, continuation needs to
 	 * start after the breakpoint.
 	 */
-	if (param == 3)
+	if (regs->ecr_param == 3)
 		instruction_pointer(regs) -= BREAK_INSTR_SIZE;
 
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 5a7b80e2d88..72f97822784 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -517,8 +517,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 	return 0;
 }
 
-void trap_is_kprobe(unsigned long cause, unsigned long address,
-		    struct pt_regs *regs)
+void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
 {
-	notify_die(DIE_TRAP, "kprobe_trap", regs, address, cause, SIGTRAP);
+	notify_die(DIE_TRAP, "kprobe_trap", regs, address, 0, SIGTRAP);
 }
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index cad66851e0c..07a3a968fe4 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -55,10 +55,8 @@ asmlinkage void ret_from_fork(void);
  * |     ...        |
  * |    unused      |
  * |                |
- * ------------------  <==== top of Stack (thread.ksp)
- * |   UNUSED 1 word|
  * ------------------
- * |     r25        |
+ * |     r25        |   <==== top of Stack (thread.ksp)
  * ~                ~
  * |    --to--      |   (CALLEE Regs of user mode)
  * |     r13        |
@@ -76,7 +74,10 @@ asmlinkage void ret_from_fork(void);
  * |    --to--      |   (scratch Regs of user mode)
  * |     r0         |
  * ------------------
- * |   UNUSED 1 word|
+ * |      SP        |
+ * |    orig_r0     |
+ * |    event/ECR   |
+ * |    user_r25    |
  * ------------------  <===== END of PAGE
  */
 int copy_thread(unsigned long clone_flags,
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index c6a81c58d0f..333238564b6 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -40,7 +40,15 @@ static int genregs_get(struct task_struct *target,
 			offsetof(struct user_regs_struct, LOC), \
 			offsetof(struct user_regs_struct, LOC) + 4);
 
+#define REG_O_ZERO(LOC)		\
+	if (!ret)		\
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+	REG_O_ZERO(pad);
 	REG_O_CHUNK(scratch, callee, ptregs);
+	REG_O_ZERO(pad2);
 	REG_O_CHUNK(callee, efa, cregs);
 	REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address);
 
@@ -88,8 +96,10 @@ static int genregs_set(struct task_struct *target,
 			offsetof(struct user_regs_struct, LOC), \
 			offsetof(struct user_regs_struct, LOC) + 4);
 
-	/* TBD: disallow updates to STATUS32, orig_r8 etc*/
-	REG_IN_CHUNK(scratch, callee, ptregs);	/* pt_regs[bta..orig_r8] */
+	REG_IGNORE_ONE(pad);
+	/* TBD: disallow updates to STATUS32 etc*/
+	REG_IN_CHUNK(scratch, pad2, ptregs);	/* pt_regs[bta..sp] */
+	REG_IGNORE_ONE(pad2);
 	REG_IN_CHUNK(callee, efa, cregs);	/* callee_regs[r25..r13] */
 	REG_IGNORE_ONE(efa);			/* efa update invalid */
 	REG_IN_ONE(stop_pc, &ptregs->ret);	/* stop_pc: PC update */
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index b2b3731dd1e..6b083454d03 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -31,14 +31,14 @@
 int running_on_hw = 1;	/* vs. on ISS */
 
 char __initdata command_line[COMMAND_LINE_SIZE];
-struct machine_desc *machine_desc __cpuinitdata;
+struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
 
-void __cpuinit read_arc_build_cfg_regs(void)
+void read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -182,7 +182,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	FIX_PTR(cpu);
 #define IS_AVAIL1(var, str)	((var) ? str : "")
 #define IS_AVAIL2(var, str)	((var == 0x2) ? str : "")
-#define IS_USED(var)		((var) ? "(in-use)" : "(not used)")
+#define IS_USED(cfg)		(IS_ENABLED(cfg) ? "(in-use)" : "(not used)")
 
 	n += scnprintf(buf + n, len - n,
 		       "Extn [700-Base]\t: %s %s %s %s %s %s\n",
@@ -202,9 +202,9 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	if (cpu->core.family == 0x34) {
 		n += scnprintf(buf + n, len - n,
 		"Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n",
-			       IS_USED(__CONFIG_ARC_HAS_LLSC_VAL),
-			       IS_USED(__CONFIG_ARC_HAS_SWAPE_VAL),
-			       IS_USED(__CONFIG_ARC_HAS_RTSC_VAL));
+			       IS_USED(CONFIG_ARC_HAS_LLSC),
+			       IS_USED(CONFIG_ARC_HAS_SWAPE),
+			       IS_USED(CONFIG_ARC_HAS_RTSC));
 	}
 
 	n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s",
@@ -237,7 +237,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 	return buf;
 }
 
-void __cpuinit arc_chk_ccms(void)
+void arc_chk_ccms(void)
 {
 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
@@ -272,7 +272,7 @@ void __cpuinit arc_chk_ccms(void)
  * hardware has dedicated regs which need to be saved/restored on ctx-sw
  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
  */
-void __cpuinit arc_chk_fpu(void)
+void arc_chk_fpu(void)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
 
@@ -293,7 +293,7 @@ void __cpuinit arc_chk_fpu(void)
  *    such as only for boot CPU etc
  */
 
-void __cpuinit setup_processor(void)
+void setup_processor(void)
 {
 	char str[512];
 	int cpu_id = smp_processor_id();
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 5c7fd603d21..bca3052c956 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -117,7 +117,7 @@ const char *arc_platform_smp_cpuinfo(void)
  * Called from asm stub in head.S
  * "current"/R25 already setup by low level boot code
  */
-void __cpuinit start_kernel_secondary(void)
+void start_kernel_secondary(void)
 {
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();
@@ -154,7 +154,7 @@ void __cpuinit start_kernel_secondary(void)
  *
  * Essential requirements being where to run from (PC) and stack (SP)
 */
-int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned long wait_till;
 
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index ca0207b9d5b..f8b7d880304 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -79,7 +79,7 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
 		 * assembly code
 		 */
 		frame_info->regs.r27 = 0;
-		frame_info->regs.r28 += 64;
+		frame_info->regs.r28 += 60;
 		frame_info->call_frame = 0;
 
 	} else {
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 09f4309aa2c..0e51e69cf30 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -44,13 +44,24 @@
 #include <asm/clk.h>
 #include <asm/mach_desc.h>
 
+/* Timer related Aux registers */
+#define ARC_REG_TIMER0_LIMIT	0x23	/* timer 0 limit */
+#define ARC_REG_TIMER0_CTRL	0x22	/* timer 0 control */
+#define ARC_REG_TIMER0_CNT	0x21	/* timer 0 count */
+#define ARC_REG_TIMER1_LIMIT	0x102	/* timer 1 limit */
+#define ARC_REG_TIMER1_CTRL	0x101	/* timer 1 control */
+#define ARC_REG_TIMER1_CNT	0x100	/* timer 1 count */
+
+#define TIMER_CTRL_IE		(1 << 0) /* Interupt when Count reachs limit */
+#define TIMER_CTRL_NH		(1 << 1) /* Count only when CPU NOT halted */
+
 #define ARC_TIMER_MAX	0xFFFFFFFF
 
 /********** Clock Source Device *********/
 
 #ifdef CONFIG_ARC_HAS_RTSC
 
-int __cpuinit arc_counter_setup(void)
+int arc_counter_setup(void)
 {
 	/* RTSC insn taps into cpu clk, needs no setup */
 
@@ -105,7 +116,7 @@ static bool is_usable_as_clocksource(void)
 /*
  * set 32bit TIMER1 to keep counting monotonically and wraparound
  */
-int __cpuinit arc_counter_setup(void)
+int arc_counter_setup(void)
 {
 	write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX);
 	write_aux_reg(ARC_REG_TIMER1_CNT, 0);
@@ -212,7 +223,7 @@ static struct irqaction arc_timer_irq = {
  * Setup the local event timer for @cpu
  * N.B. weak so that some exotic ARC SoCs can completely override it
  */
-void __attribute__((weak)) __cpuinit arc_local_timer_setup(unsigned int cpu)
+void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu)
 {
 	struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
 
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 0471d9c9dd5..e21692d2fda 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -28,10 +28,9 @@ void __init trap_init(void)
 	return;
 }
 
-void die(const char *str, struct pt_regs *regs, unsigned long address,
-	 unsigned long cause_reg)
+void die(const char *str, struct pt_regs *regs, unsigned long address)
 {
-	show_kernel_fault_diag(str, regs, address, cause_reg);
+	show_kernel_fault_diag(str, regs, address);
 
 	/* DEAD END */
 	__asm__("flag 1");
@@ -42,14 +41,13 @@ void die(const char *str, struct pt_regs *regs, unsigned long address,
  *  -for user faults enqueues requested signal
  *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
  */
-static noinline int handle_exception(unsigned long cause, char *str,
-				     struct pt_regs *regs, siginfo_t *info)
+static noinline int
+handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
 {
 	if (user_mode(regs)) {
 		struct task_struct *tsk = current;
 
 		tsk->thread.fault_address = (__force unsigned int)info->si_addr;
-		tsk->thread.cause_code = cause;
 
 		force_sig_info(info->si_signo, info, tsk);
 
@@ -58,14 +56,14 @@ static noinline int handle_exception(unsigned long cause, char *str,
 		if (fixup_exception(regs))
 			return 0;
 
-		die(str, regs, (unsigned long)info->si_addr, cause);
+		die(str, regs, (unsigned long)info->si_addr);
 	}
 
 	return 1;
 }
 
 #define DO_ERROR_INFO(signr, str, name, sicode) \
-int name(unsigned long cause, unsigned long address, struct pt_regs *regs) \
+int name(unsigned long address, struct pt_regs *regs) \
 {						\
 	siginfo_t info = {			\
 		.si_signo = signr,		\
@@ -73,7 +71,7 @@ int name(unsigned long cause, unsigned long address, struct pt_regs *regs) \
 		.si_code  = sicode,		\
 		.si_addr = (void __user *)address,	\
 	};					\
-	return handle_exception(cause, str, regs, &info);\
+	return handle_exception(str, regs, &info);\
 }
 
 /*
@@ -90,11 +88,11 @@ DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
 /*
  * Entry Point for Misaligned Data access Exception, for emulating in software
  */
-int do_misaligned_access(unsigned long cause, unsigned long address,
-			 struct pt_regs *regs, struct callee_regs *cregs)
+int do_misaligned_access(unsigned long address, struct pt_regs *regs,
+			 struct callee_regs *cregs)
 {
-	if (misaligned_fixup(address, regs, cause, cregs) != 0)
-		return do_misaligned_error(cause, address, regs);
+	if (misaligned_fixup(address, regs, cregs) != 0)
+		return do_misaligned_error(address, regs);
 
 	return 0;
 }
@@ -104,10 +102,9 @@ int do_misaligned_access(unsigned long cause, unsigned long address,
  * Entry point for miscll errors such as Nested Exceptions
  *  -Duplicate TLB entry is handled seperately though
  */
-void do_machine_check_fault(unsigned long cause, unsigned long address,
-			    struct pt_regs *regs)
+void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
 {
-	die("Machine Check Exception", regs, address, cause);
+	die("Machine Check Exception", regs, address);
 }
 
 
@@ -120,23 +117,22 @@ void do_machine_check_fault(unsigned long cause, unsigned long address,
  *  -1 used for software breakpointing (gdb)
  *  -2 used by kprobes
  */
-void do_non_swi_trap(unsigned long cause, unsigned long address,
-			struct pt_regs *regs)
+void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
-	unsigned int param = cause & 0xff;
+	unsigned int param = regs->ecr_param;
 
 	switch (param) {
 	case 1:
-		trap_is_brkpt(cause, address, regs);
+		trap_is_brkpt(address, regs);
 		break;
 
 	case 2:
-		trap_is_kprobe(param, address, regs);
+		trap_is_kprobe(address, regs);
 		break;
 
 	case 3:
 	case 4:
-		kgdb_trap(regs, param);
+		kgdb_trap(regs);
 		break;
 
 	default:
@@ -149,14 +145,14 @@ void do_non_swi_trap(unsigned long cause, unsigned long address,
  *  -For a corner case, ARC kprobes implementation resorts to using
  *   this exception, hence the check
  */
-void do_insterror_or_kprobe(unsigned long cause,
-				       unsigned long address,
-				       struct pt_regs *regs)
+void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
 {
+	int rc;
+
 	/* Check if this exception is caused by kprobes */
-	if (notify_die(DIE_IERR, "kprobe_ierr", regs, address,
-		       cause, SIGILL) == NOTIFY_STOP)
+	rc = notify_die(DIE_IERR, "kprobe_ierr", regs, address, 0, SIGILL);
+	if (rc == NOTIFY_STOP)
 		return;
 
-	insterror_is_error(cause, address, regs);
+	insterror_is_error(address, regs);
 }
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 11c301b81c9..73a7450ee62 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -101,7 +101,7 @@ static void show_faulting_vma(unsigned long address, char *buf)
 		if (file) {
 			struct path *path = &file->f_path;
 			nm = d_path(path, buf, PAGE_SIZE - 1);
-			inode = vma->vm_file->f_path.dentry->d_inode;
+			inode = file_inode(vma->vm_file);
 			dev = inode->i_sb->s_dev;
 			ino = inode->i_ino;
 		}
@@ -117,23 +117,22 @@ static void show_faulting_vma(unsigned long address, char *buf)
 
 static void show_ecr_verbose(struct pt_regs *regs)
 {
-	unsigned int vec, cause_code, cause_reg;
+	unsigned int vec, cause_code;
 	unsigned long address;
 
-	cause_reg = current->thread.cause_code;
-	pr_info("\n[ECR   ]: 0x%08x => ", cause_reg);
+	pr_info("\n[ECR   ]: 0x%08lx => ", regs->event);
 
 	/* For Data fault, this is data address not instruction addr */
 	address = current->thread.fault_address;
 
-	vec = cause_reg >> 16;
-	cause_code = (cause_reg >> 8) & 0xFF;
+	vec = regs->ecr_vec;
+	cause_code = regs->ecr_cause;
 
 	/* For DTLB Miss or ProtV, display the memory involved too */
 	if (vec == ECR_V_DTLB_MISS) {
-		pr_cont("Invalid %s 0x%08lx by insn @ 0x%08lx\n",
-		       (cause_code == 0x01) ? "Read From" :
-		       ((cause_code == 0x02) ? "Write to" : "EX"),
+		pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n",
+		       (cause_code == 0x01) ? "Read" :
+		       ((cause_code == 0x02) ? "Write" : "EX"),
 		       address, regs->ret);
 	} else if (vec == ECR_V_ITLB_MISS) {
 		pr_cont("Insn could not be fetched\n");
@@ -144,14 +143,12 @@ static void show_ecr_verbose(struct pt_regs *regs)
 	} else if (vec == ECR_V_PROTV) {
 		if (cause_code == ECR_C_PROTV_INST_FETCH)
 			pr_cont("Execute from Non-exec Page\n");
-		else if (cause_code == ECR_C_PROTV_LOAD)
-			pr_cont("Read from Non-readable Page\n");
-		else if (cause_code == ECR_C_PROTV_STORE)
-			pr_cont("Write to Non-writable Page\n");
-		else if (cause_code == ECR_C_PROTV_XCHG)
-			pr_cont("Data exchange protection violation\n");
 		else if (cause_code == ECR_C_PROTV_MISALIG_DATA)
 			pr_cont("Misaligned r/w from 0x%08lx\n", address);
+		else
+			pr_cont("%s access not allowed on page\n",
+				(cause_code == 0x01) ? "Read" :
+				((cause_code == 0x02) ? "Write" : "EX"));
 	} else if (vec == ECR_V_INSN_ERR) {
 		pr_cont("Illegal Insn\n");
 	} else {
@@ -176,8 +173,7 @@ void show_regs(struct pt_regs *regs)
 	print_task_path_n_nm(tsk, buf);
 	show_regs_print_info(KERN_INFO);
 
-	if (current->thread.cause_code)
-		show_ecr_verbose(regs);
+	show_ecr_verbose(regs);
 
 	pr_info("[EFA   ]: 0x%08lx\n[BLINK ]: %pS\n[ERET  ]: %pS\n",
 		current->thread.fault_address,
@@ -213,10 +209,9 @@ void show_regs(struct pt_regs *regs)
 }
 
 void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
-			    unsigned long address, unsigned long cause_reg)
+			    unsigned long address)
 {
 	current->thread.fault_address = address;
-	current->thread.cause_code = cause_reg;
 
 	/* Caller and Callee regs */
 	show_regs(regs);
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index 4cd81633feb..c0f832f595d 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -187,7 +187,7 @@ fault:	state->fault = 1;
  * Returns 0 if successfully handled, 1 if some error happened
  */
 int misaligned_fixup(unsigned long address, struct pt_regs *regs,
-		     unsigned long cause, struct callee_regs *cregs)
+		     struct callee_regs *cregs)
 {
 	struct disasm_state state;
 	char buf[TASK_COMM_LEN];
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index a8d02223da4..e550b117ec4 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -289,6 +289,8 @@ static void __init setup_unwind_table(struct unwind_table *table,
 			 * instead of the initial loc addr
 			 * return;
 			 */
+			WARN(1, "unwinder: FDE->initial_location NULL %p\n",
+				(const u8 *)(fde + 1) + *fde);
 		}
 		++n;
 	}
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index d3c92f52d44..2555f5886af 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -125,6 +125,11 @@ SECTIONS
 		*(.debug_frame)
 		__end_unwind = .;
 	}
+	/*
+	 * gcc 4.8 generates this for -fasynchonous-unwind-tables,
+	 * while we still use the .debug_frame based unwinder
+	 */
+	/DISCARD/ : {	*(.eh_frame) }
 #else
 	/DISCARD/ : {	*(.debug_frame) }
 #endif
@@ -142,15 +147,18 @@ SECTIONS
 		*(.arcextmap.*)
 	}
 
+#ifndef CONFIG_DEBUG_INFO
 	/* open-coded because we need .debug_frame seperately for unwinding */
-	.debug_aranges 0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	.debug_info 0 : { *(.debug_info) }
-	.debug_abbrev 0 : { *(.debug_abbrev) }
-	.debug_line 0 : { *(.debug_line) }
-	.debug_str 0 : { *(.debug_str) }
-	.debug_loc 0 : { *(.debug_loc) }
-	.debug_macinfo 0 : { *(.debug_macinfo) }
+	/DISCARD/ : { *(.debug_aranges) }
+	/DISCARD/ : { *(.debug_pubnames) }
+	/DISCARD/ : { *(.debug_info) }
+	/DISCARD/ : { *(.debug_abbrev) }
+	/DISCARD/ : { *(.debug_line) }
+	/DISCARD/ : { *(.debug_str) }
+	/DISCARD/ : { *(.debug_loc) }
+	/DISCARD/ : { *(.debug_macinfo) }
+	/DISCARD/ : { *(.debug_ranges) }
+#endif
 
 #ifdef CONFIG_ARC_HAS_DCCM
 	. = CONFIG_ARC_DCCM_BASE;
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index aedce190544..f415d851b76 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -73,6 +73,33 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
 
+/* Instruction cache related Auxiliary registers */
+#define ARC_REG_IC_BCR		0x77	/* Build Config reg */
+#define ARC_REG_IC_IVIC		0x10
+#define ARC_REG_IC_CTRL		0x11
+#define ARC_REG_IC_IVIL		0x19
+#if (CONFIG_ARC_MMU_VER > 2)
+#define ARC_REG_IC_PTAG		0x1E
+#endif
+
+/* Bit val in IC_CTRL */
+#define IC_CTRL_CACHE_DISABLE   0x1
+
+/* Data cache related Auxiliary registers */
+#define ARC_REG_DC_BCR		0x72	/* Build Config reg */
+#define ARC_REG_DC_IVDC		0x47
+#define ARC_REG_DC_CTRL		0x48
+#define ARC_REG_DC_IVDL		0x4A
+#define ARC_REG_DC_FLSH		0x4B
+#define ARC_REG_DC_FLDL		0x4C
+#if (CONFIG_ARC_MMU_VER > 2)
+#define ARC_REG_DC_PTAG		0x5C
+#endif
+
+/* Bit val in DC_CTRL */
+#define DC_CTRL_INV_MODE_FLUSH  0x40
+#define DC_CTRL_FLUSH_STATUS    0x100
+
 char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	int n = 0;
@@ -89,8 +116,10 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 			enb ?  "" : "DISABLED (kernel-build)");		\
 }
 
-	PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache");
-	PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache");
+	PR_CACHE(&cpuinfo_arc700[c].icache, IS_ENABLED(CONFIG_ARC_HAS_ICACHE),
+			"I-Cache");
+	PR_CACHE(&cpuinfo_arc700[c].dcache, IS_ENABLED(CONFIG_ARC_HAS_DCACHE),
+			"D-Cache");
 
 	return buf;
 }
@@ -100,17 +129,23 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
  * the cpuinfo structure for later use.
  * No Validation done here, simply read/convert the BCRs
  */
-void __cpuinit read_decode_cache_bcr(void)
+void read_decode_cache_bcr(void)
 {
-	struct bcr_cache ibcr, dbcr;
 	struct cpuinfo_arc_cache *p_ic, *p_dc;
 	unsigned int cpu = smp_processor_id();
+	struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+	} ibcr, dbcr;
 
 	p_ic = &cpuinfo_arc700[cpu].icache;
 	READ_BCR(ARC_REG_IC_BCR, ibcr);
 
-	if (ibcr.config == 0x3)
-		p_ic->assoc = 2;
+	BUG_ON(ibcr.config != 3);
+	p_ic->assoc = 2;		/* Fixed to 2w set assoc */
 	p_ic->line_len = 8 << ibcr.line_len;
 	p_ic->sz = 0x200 << ibcr.sz;
 	p_ic->ver = ibcr.ver;
@@ -118,8 +153,8 @@ void __cpuinit read_decode_cache_bcr(void)
 	p_dc = &cpuinfo_arc700[cpu].dcache;
 	READ_BCR(ARC_REG_DC_BCR, dbcr);
 
-	if (dbcr.config == 0x2)
-		p_dc->assoc = 4;
+	BUG_ON(dbcr.config != 2);
+	p_dc->assoc = 4;		/* Fixed to 4w set assoc */
 	p_dc->line_len = 16 << dbcr.line_len;
 	p_dc->sz = 0x200 << dbcr.sz;
 	p_dc->ver = dbcr.ver;
@@ -132,14 +167,12 @@ void __cpuinit read_decode_cache_bcr(void)
  * 3. Enable the Caches, setup default flush mode for D-Cache
  * 3. Calculate the SHMLBA used by user space
  */
-void __cpuinit arc_cache_init(void)
+void arc_cache_init(void)
 {
-	unsigned int temp;
 	unsigned int cpu = smp_processor_id();
 	struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 	struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
-	int way_pg_ratio = way_pg_ratio;
-	int dcache_does_alias;
+	unsigned int dcache_does_alias, temp;
 	char str[256];
 
 	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
@@ -149,20 +182,11 @@ void __cpuinit arc_cache_init(void)
 
 #ifdef CONFIG_ARC_HAS_ICACHE
 	/* 1. Confirm some of I-cache params which Linux assumes */
-	if ((ic->assoc != ARC_ICACHE_WAYS) ||
-	    (ic->line_len != ARC_ICACHE_LINE_LEN)) {
+	if (ic->line_len != ARC_ICACHE_LINE_LEN)
 		panic("Cache H/W doesn't match kernel Config");
-	}
-#if (CONFIG_ARC_MMU_VER > 2)
-	if (ic->ver != 3) {
-		if (running_on_hw)
-			panic("Cache ver doesn't match MMU ver\n");
-
-		/* For ISS - suggest the toggles to use */
-		pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n");
 
-	}
-#endif
+	if (ic->ver != CONFIG_ARC_MMU_VER)
+		panic("Cache ver doesn't match MMU ver\n");
 #endif
 
 	/* Enable/disable I-Cache */
@@ -181,14 +205,12 @@ chk_dc:
 		return;
 
 #ifdef CONFIG_ARC_HAS_DCACHE
-	if ((dc->assoc != ARC_DCACHE_WAYS) ||
-	    (dc->line_len != ARC_DCACHE_LINE_LEN)) {
+	if (dc->line_len != ARC_DCACHE_LINE_LEN)
 		panic("Cache H/W doesn't match kernel Config");
-	}
-
-	dcache_does_alias = (dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE;
 
 	/* check for D-Cache aliasing */
+	dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE;
+
 	if (dcache_does_alias && !cache_is_vipt_aliasing())
 		panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
 	else if (!dcache_does_alias && cache_is_vipt_aliasing())
@@ -239,11 +261,9 @@ static inline void wait_for_flush(void)
  */
 static inline void __dc_entire_op(const int cacheop)
 {
-	unsigned long flags, tmp = tmp;
+	unsigned int tmp = tmp;
 	int aux;
 
-	local_irq_save(flags);
-
 	if (cacheop == OP_FLUSH_N_INV) {
 		/* Dcache provides 2 cmd: FLUSH or INV
 		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
@@ -267,8 +287,6 @@ static inline void __dc_entire_op(const int cacheop)
 	/* Switch back the DISCARD ONLY Invalidate mode */
 	if (cacheop == OP_FLUSH_N_INV)
 		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
-
-	local_irq_restore(flags);
 }
 
 /*
@@ -459,8 +477,15 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 	local_irq_restore(flags);
 }
 
+static inline void __ic_entire_inv(void)
+{
+	write_aux_reg(ARC_REG_IC_IVIC, 1);
+	read_aux_reg(ARC_REG_IC_CTRL);	/* blocks */
+}
+
 #else
 
+#define __ic_entire_inv()
 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
@@ -487,7 +512,7 @@ void flush_dcache_page(struct page *page)
 	struct address_space *mapping;
 
 	if (!cache_is_vipt_aliasing()) {
-		set_bit(PG_arch_1, &page->flags);
+		clear_bit(PG_dc_clean, &page->flags);
 		return;
 	}
 
@@ -501,7 +526,7 @@ void flush_dcache_page(struct page *page)
 	 * Make a note that K-mapping is dirty
 	 */
 	if (!mapping_mapped(mapping)) {
-		set_bit(PG_arch_1, &page->flags);
+		clear_bit(PG_dc_clean, &page->flags);
 	} else if (page_mapped(page)) {
 
 		/* kernel reading from page with U-mapping */
@@ -629,26 +654,13 @@ void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
 	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
 }
 
-void flush_icache_all(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	write_aux_reg(ARC_REG_IC_IVIC, 1);
-
-	/* lr will not complete till the icache inv operation is not over */
-	read_aux_reg(ARC_REG_IC_CTRL);
-	local_irq_restore(flags);
-}
-
 noinline void flush_cache_all(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	flush_icache_all();
+	__ic_entire_inv();
 	__dc_entire_op(OP_FLUSH_N_INV);
 
 	local_irq_restore(flags);
@@ -667,7 +679,12 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 {
 	unsigned int paddr = pfn << PAGE_SHIFT;
 
-	__sync_icache_dcache(paddr, u_vaddr, PAGE_SIZE);
+	u_vaddr &= PAGE_MASK;
+
+	___flush_dcache_page(paddr, u_vaddr);
+
+	if (vma->vm_flags & VM_EXEC)
+		__inv_icache_page(paddr, u_vaddr);
 }
 
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
@@ -717,7 +734,7 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 * non copied user pages (e.g. read faults which wire in pagecache page
 	 * directly).
 	 */
-	set_bit(PG_arch_1, &to->flags);
+	clear_bit(PG_dc_clean, &to->flags);
 
 	/*
 	 * if SRC was already usermapped and non-congruent to kernel mapping
@@ -725,15 +742,16 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 */
 	if (clean_src_k_mappings) {
 		__flush_dcache_page(kfrom, kfrom);
+		set_bit(PG_dc_clean, &from->flags);
 	} else {
-		set_bit(PG_arch_1, &from->flags);
+		clear_bit(PG_dc_clean, &from->flags);
 	}
 }
 
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
 {
 	clear_page(to);
-	set_bit(PG_arch_1, &page->flags);
+	clear_bit(PG_dc_clean, &page->flags);
 }
 
 
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 689ffd86d5e..318164cabdf 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
 #include <asm/pgalloc.h>
+#include <asm/mmu.h>
 
 static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
 {
@@ -51,14 +52,14 @@ bad_area:
 	return 1;
 }
 
-void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
-		   unsigned long cause_code)
+void do_page_fault(struct pt_regs *regs, unsigned long address)
 {
 	struct vm_area_struct *vma = NULL;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	siginfo_t info;
 	int fault, ret;
+	int write = regs->ecr_cause & ECR_C_PROTV_STORE;  /* ST/EX */
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
 				(write ? FAULT_FLAG_WRITE : 0);
 
@@ -109,7 +110,8 @@ good_area:
 
 	/* Handle protection violation, execute on heap or stack */
 
-	if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH))
+	if ((regs->ecr_vec == ECR_V_PROTV) &&
+	    (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
 		goto bad_area;
 
 	if (write) {
@@ -176,7 +178,6 @@ bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (user_mode(regs)) {
 		tsk->thread.fault_address = address;
-		tsk->thread.cause_code = cause_code;
 		info.si_signo = SIGSEGV;
 		info.si_errno = 0;
 		/* info.si_code has been set above */
@@ -197,7 +198,7 @@ no_context:
 	if (fixup_exception(regs))
 		return;
 
-	die("Oops", regs, address, cause_code);
+	die("Oops", regs, address);
 
 out_of_memory:
 	if (is_global_init(tsk)) {
@@ -218,7 +219,6 @@ do_sigbus:
 		goto no_context;
 
 	tsk->thread.fault_address = address;
-	tsk->thread.cause_code = cause_code;
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code = BUS_ADRERR;
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 4a177365b2c..a08ce718542 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -74,7 +74,7 @@ void __init setup_arch_memory(void)
 	/* Last usable page of low mem (no HIGHMEM yet for ARC port) */
 	max_low_pfn = max_pfn = PFN_DOWN(end_mem);
 
-	max_mapnr = num_physpages = max_low_pfn - min_low_pfn;
+	max_mapnr = max_low_pfn - min_low_pfn;
 
 	/*------------- reserve kernel image -----------------------*/
 	memblock_reserve(CONFIG_LINUX_LINK_BASE,
@@ -84,7 +84,7 @@ void __init setup_arch_memory(void)
 
 	/*-------------- node setup --------------------------------*/
 	memset(zones_size, 0, sizeof(zones_size));
-	zones_size[ZONE_NORMAL] = num_physpages;
+	zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
 
 	/*
 	 * We can't use the helper free_area_init(zones[]) because it uses
@@ -106,39 +106,9 @@ void __init setup_arch_memory(void)
  */
 void __init mem_init(void)
 {
-	int codesize, datasize, initsize, reserved_pages, free_pages;
-	int tmp;
-
 	high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
-
-	totalram_pages = free_all_bootmem();
-
-	/* count all reserved pages [kernel code/data/mem_map..] */
-	reserved_pages = 0;
-	for (tmp = 0; tmp < max_mapnr; tmp++)
-		if (PageReserved(mem_map + tmp))
-			reserved_pages++;
-
-	/* XXX: nr_free_pages() is equivalent */
-	free_pages = max_mapnr - reserved_pages;
-
-	/*
-	 * For the purpose of display below, split the "reserve mem"
-	 * kernel code/data is already shown explicitly,
-	 * Show any other reservations (mem_map[ ] et al)
-	 */
-	reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >>
-								PAGE_SHIFT);
-
-	codesize = _etext - _text;
-	datasize = _end - _etext;
-	initsize = __init_end - __init_begin;
-
-	pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n",
-		PAGES_TO_MB(free_pages),
-		TO_MB(arc_mem_sz),
-		TO_KB(codesize), TO_KB(datasize), TO_KB(initsize),
-		PAGES_TO_KB(reserved_pages));
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 }
 
 /*
@@ -146,13 +116,13 @@ void __init mem_init(void)
  */
 void __init_refok free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index fe1c5a073af..7957dc4e4d4 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -55,7 +55,7 @@
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
-#include <asm/tlb.h>
+#include <asm/mmu.h>
 
 /*			Need for ARC MMU v2
  *
@@ -97,6 +97,7 @@
  * J-TLB entry got evicted/replaced.
  */
 
+
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 int asid_cache = FIRST_ASID;
 
@@ -432,9 +433,14 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
 	unsigned long paddr = pte_val(*ptep) & PAGE_MASK;
+	struct page *page = pfn_to_page(pte_pfn(*ptep));
 
 	create_tlb(vma, vaddr, ptep);
 
+	if (page == ZERO_PAGE(0)) {
+		return;
+	}
+
 	/*
 	 * Exec page : Independent of aliasing/page-color considerations,
 	 *	       since icache doesn't snoop dcache on ARC, any dirty
@@ -446,9 +452,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 	 */
 	if ((vma->vm_flags & VM_EXEC) ||
 	     addr_not_cache_congruent(paddr, vaddr)) {
-		struct page *page = pfn_to_page(pte_pfn(*ptep));
 
-		int dirty = test_and_clear_bit(PG_arch_1, &page->flags);
+		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
 		if (dirty) {
 			/* wback + inv dcache lines */
 			__flush_dcache_page(paddr, paddr);
@@ -464,12 +469,27 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void __cpuinit read_decode_mmu_bcr(void)
+void read_decode_mmu_bcr(void)
 {
-	unsigned int tmp;
-	struct bcr_mmu_1_2 *mmu2;	/* encoded MMU2 attr */
-	struct bcr_mmu_3 *mmu3;		/* encoded MMU3 attr */
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned int tmp;
+	struct bcr_mmu_1_2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
+#else
+		unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
+#endif
+	} *mmu2;
+
+	struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4,
+		     u_itlb:4, u_dtlb:4;
+#else
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4,
+		     ways:4, ver:8;
+#endif
+	} *mmu3;
 
 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
 	mmu->ver = (tmp >> 24);
@@ -505,12 +525,12 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 		       "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n",
 		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : "");
+		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : "");
 
 	return buf;
 }
 
-void __cpuinit arc_mmu_init(void)
+void arc_mmu_init(void)
 {
 	char str[256];
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 3357d26ffe5..5c5bb23001b 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -39,7 +39,7 @@
 
 #include <linux/linkage.h>
 #include <asm/entry.h>
-#include <asm/tlb.h>
+#include <asm/mmu.h>
 #include <asm/pgtable.h>
 #include <asm/arcregs.h>
 #include <asm/cache.h>
@@ -147,9 +147,9 @@ ex_saved_reg1:
 #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
 	and.f 0, r0, _PAGE_PRESENT
 	bz   1f
-	ld   r2, [num_pte_not_present]
-	add  r2, r2, 1
-	st   r2, [num_pte_not_present]
+	ld   r3, [num_pte_not_present]
+	add  r3, r3, 1
+	st   r3, [num_pte_not_present]
 1:
 #endif
 
@@ -271,22 +271,22 @@ ARC_ENTRY EV_TLBMissI
 #endif
 
 	;----------------------------------------------------------------
-	; Get the PTE corresponding to V-addr accessed
+	; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
 	LOAD_FAULT_PTE
 
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Check if PTE permissions approp for executing code
 	cmp_s   r2, VMALLOC_START
-	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_READ | _PAGE_U_EXECUTE)
-	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE)
+	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE)
+	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE)
 
 	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
 	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
 	bnz     do_slow_path_pf
 
 	; Let Linux VM know that the page was accessed
-	or      r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED)  ; set Accessed Bit
-	st_s    r0, [r1]                                  ; Write back PTE
+	or      r0, r0, _PAGE_ACCESSED  ; set Accessed Bit
+	st_s    r0, [r1]                ; Write back PTE
 
 	CONV_PTE_TO_TLB
 	COMMIT_ENTRY_TO_MMU
@@ -311,7 +311,7 @@ ARC_ENTRY EV_TLBMissD
 
 	;----------------------------------------------------------------
 	; Get the PTE corresponding to V-addr accessed
-	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE
+	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
 	LOAD_FAULT_PTE
 
 	;----------------------------------------------------------------
@@ -345,7 +345,7 @@ ARC_ENTRY EV_TLBMissD
 	;----------------------------------------------------------------
 	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
 	lr      r3, [ecr]
-	or      r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always
+	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
 	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
 	or.nz   r0, r0, _PAGE_MODIFIED        ; if Write, set Dirty bit as well
 	st_s    r0, [r1]                      ; Write back PTE
@@ -381,18 +381,7 @@ do_slow_path_pf:
 
 	; ------- setup args for Linux Page fault Hanlder ---------
 	mov_s r0, sp
-	lr  r2, [efa]
-	lr  r3, [ecr]
-
-	; Both st and ex imply WRITE access of some sort, hence do_page_fault( )
-	; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or
-	; DTLB-ld Miss
-	; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03
-	; Following code uses that fact that st/ex have one bit in common
-
-	btst_s r3,  ECR_C_BIT_DTLB_ST_MISS
-	mov.z  r1, 0
-	mov.nz r1, 1
+	lr  r1, [efa]
 
 	; We don't want exceptions to be disabled while the fault is handled.
 	; Now that we have saved the context we return from exception hence
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
index b3700c064c0..d71f3c3bcf2 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-arcfpga/platform.c
@@ -77,6 +77,7 @@ static void __init setup_bvci_lat_unit(void)
 
 /*----------------------- Platform Devices -----------------------------*/
 
+#if IS_ENABLED(CONFIG_SERIAL_ARC)
 static unsigned long arc_uart_info[] = {
 	0,	/* uart->is_emulated (runtime @running_on_hw) */
 	0,	/* uart->port.uartclk */
@@ -115,7 +116,7 @@ static struct platform_device arc_uart0_dev = {
 static struct platform_device *fpga_early_devs[] __initdata = {
 	&arc_uart0_dev,
 };
-#endif
+#endif	/* CONFIG_SERIAL_ARC_CONSOLE */
 
 static void arc_fpga_serial_init(void)
 {
@@ -152,8 +153,13 @@ static void arc_fpga_serial_init(void)
 	 * otherwise the early console never gets a chance to run.
 	 */
 	add_preferred_console("ttyARC", 0, "115200");
-#endif
+#endif	/* CONFIG_SERIAL_ARC_CONSOLE */
+}
+#else	/* !IS_ENABLED(CONFIG_SERIAL_ARC) */
+static void arc_fpga_serial_init(void)
+{
 }
+#endif
 
 static void __init plat_fpga_early_init(void)
 {
@@ -169,7 +175,7 @@ static void __init plat_fpga_early_init(void)
 }
 
 static struct of_dev_auxdata plat_auxdata_lookup[] __initdata = {
-#if defined(CONFIG_SERIAL_ARC) || defined(CONFIG_SERIAL_ARC_MODULE)
+#if IS_ENABLED(CONFIG_SERIAL_ARC)
 	OF_DEV_AUXDATA("snps,arc-uart", UART0_BASE, "arc-uart", arc_uart_info),
 #endif
 	{}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index de7049bdea8..531cdda016f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -175,6 +175,9 @@ config ARCH_HAS_CPUFREQ
 	  and that the relevant menu configurations are displayed for
 	  it.
 
+config ARCH_HAS_BANDGAP
+	bool
+
 config GENERIC_HWEIGHT
 	bool
 	default y
@@ -1450,7 +1453,7 @@ config SMP
 	depends on CPU_V6K || CPU_V7
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAVE_SMP
-	depends on MMU
+	depends on MMU || ARM_MPU
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
@@ -1471,7 +1474,7 @@ config SMP
 
 config SMP_ON_UP
 	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
-	depends on SMP && !XIP_KERNEL
+	depends on SMP && !XIP_KERNEL && MMU
 	default y
 	help
 	  SMP kernels contain instructions which fail on non-SMP processors.
@@ -1744,6 +1747,14 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+       def_bool y
+       depends on ARM_LPAE
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index c859495da48..aed66d5df7f 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -50,3 +50,15 @@ config REMAP_VECTORS_TO_RAM
 	  Otherwise, say 'y' here.  In this case, the kernel will require
 	  external support to redirect the hardware exception vectors to
 	  the writable versions located at DRAM_BASE.
+
+config ARM_MPU
+       bool 'Use the ARM v7 PMSA Compliant MPU'
+       depends on CPU_V7
+       default y
+       help
+         Some ARM systems without an MMU have instead a Memory Protection
+         Unit (MPU) that defines the type and permissions for regions of
+         memory.
+
+         If your CPU has an MPU then you should choose 'y' here unless you
+         know that you do not want to use the MPU.
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index ff4920b1f6c..e401a766c0b 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -560,6 +560,13 @@ choice
 		  of the tiles using the RS1 memory map, including all new A-class
 		  core tiles, FPGA-based SMMs and software models.
 
+	config DEBUG_VEXPRESS_UART0_CRX
+		bool "Use PL011 UART0 at 0xb0090000 (Cortex-R compliant tiles)"
+		depends on ARCH_VEXPRESS && !MMU
+		help
+		  This option selects UART0 at 0xb0090000. This is appropriate for
+		  Cortex-R series tiles and SMMs, such as Cortex-R5 and Cortex-R7
+
 	config DEBUG_VT8500_UART0
 		bool "Use UART0 on VIA/Wondermedia SoCs"
 		depends on ARCH_VT8500
@@ -789,7 +796,8 @@ config DEBUG_LL_INCLUDE
 	default "debug/u300.S" if DEBUG_U300_UART
 	default "debug/ux500.S" if DEBUG_UX500_UART
 	default "debug/vexpress.S" if DEBUG_VEXPRESS_UART0_DETECT || \
-		DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1
+		DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 || \
+		DEBUG_VEXPRESS_UART0_CRX
 	default "debug/vt8500.S" if DEBUG_VT8500_UART0
 	default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
 	default "mach/debug-macro.S"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 72caf82a828..c0ac0f5e5e5 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -59,38 +59,44 @@ comma = ,
 # Note that GCC does not numerically define an architecture version
 # macro, but instead defines a whole series of macros which makes
 # testing for a specific architecture or later rather impossible.
-arch-$(CONFIG_CPU_32v7M)	:=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
-arch-$(CONFIG_CPU_32v7)		:=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
-arch-$(CONFIG_CPU_32v6)		:=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
+arch-$(CONFIG_CPU_32v7M)	=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
+arch-$(CONFIG_CPU_32v7)		=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
+arch-$(CONFIG_CPU_32v6)		=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
 # Only override the compiler option if ARMv6. The ARMv6K extensions are
 # always available in ARMv7
 ifeq ($(CONFIG_CPU_32v6),y)
-arch-$(CONFIG_CPU_32v6K)	:=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
+arch-$(CONFIG_CPU_32v6K)	=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
 endif
-arch-$(CONFIG_CPU_32v5)		:=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
-arch-$(CONFIG_CPU_32v4T)	:=-D__LINUX_ARM_ARCH__=4 -march=armv4t
-arch-$(CONFIG_CPU_32v4)		:=-D__LINUX_ARM_ARCH__=4 -march=armv4
-arch-$(CONFIG_CPU_32v3)		:=-D__LINUX_ARM_ARCH__=3 -march=armv3
+arch-$(CONFIG_CPU_32v5)		=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
+arch-$(CONFIG_CPU_32v4T)	=-D__LINUX_ARM_ARCH__=4 -march=armv4t
+arch-$(CONFIG_CPU_32v4)		=-D__LINUX_ARM_ARCH__=4 -march=armv4
+arch-$(CONFIG_CPU_32v3)		=-D__LINUX_ARM_ARCH__=3 -march=armv3
+
+# Evaluate arch cc-option calls now
+arch-y := $(arch-y)
 
 # This selects how we optimise for the processor.
-tune-$(CONFIG_CPU_ARM7TDMI)	:=-mtune=arm7tdmi
-tune-$(CONFIG_CPU_ARM720T)	:=-mtune=arm7tdmi
-tune-$(CONFIG_CPU_ARM740T)	:=-mtune=arm7tdmi
-tune-$(CONFIG_CPU_ARM9TDMI)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM940T)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM946E)	:=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
-tune-$(CONFIG_CPU_ARM920T)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM922T)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM925T)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM926T)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_FA526)	:=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_SA110)	:=-mtune=strongarm110
-tune-$(CONFIG_CPU_SA1100)	:=-mtune=strongarm1100
-tune-$(CONFIG_CPU_XSCALE)	:=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_XSC3)		:=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_FEROCEON)	:=$(call cc-option,-mtune=marvell-f,-mtune=xscale)
-tune-$(CONFIG_CPU_V6)		:=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
-tune-$(CONFIG_CPU_V6K)		:=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+tune-$(CONFIG_CPU_ARM7TDMI)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM720T)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM740T)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM9TDMI)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM940T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM946E)	=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
+tune-$(CONFIG_CPU_ARM920T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM922T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM925T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM926T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_FA526)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_SA110)	=-mtune=strongarm110
+tune-$(CONFIG_CPU_SA1100)	=-mtune=strongarm1100
+tune-$(CONFIG_CPU_XSCALE)	=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
+tune-$(CONFIG_CPU_XSC3)		=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
+tune-$(CONFIG_CPU_FEROCEON)	=$(call cc-option,-mtune=marvell-f,-mtune=xscale)
+tune-$(CONFIG_CPU_V6)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+tune-$(CONFIG_CPU_V6K)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+
+# Evaluate tune cc-option calls now
+tune-y := $(tune-y)
 
 ifeq ($(CONFIG_AEABI),y)
 CFLAGS_ABI	:=-mabi=aapcs-linux -mno-thumb-interwork
@@ -295,9 +301,10 @@ zImage Image xipImage bootpImage uImage: vmlinux
 zinstall uinstall install: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
 
-%.dtb: scripts
+%.dtb: | scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@
 
+PHONY += dtbs
 dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) dtbs
 
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 120b83bfde2..48d0a44270b 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -27,7 +27,7 @@ OBJS	+= misc.o decompress.o
 ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y)
 OBJS	+= debug.o
 endif
-FONTC	= $(srctree)/drivers/video/console/font_acorn_8x8.c
+FONTC	= $(srctree)/lib/fonts/font_acorn_8x8.c
 
 # string library code (-Os is enforced to keep it much smaller)
 OBJS		+= string.o
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index aabc02a6848..d1153c8a765 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path,
 	return fdt_getprop(fdt, offset, property, len);
 }
 
+static uint32_t get_cell_size(const void *fdt)
+{
+	int len;
+	uint32_t cell_size = 1;
+	const uint32_t *size_len =  getprop(fdt, "/", "#size-cells", &len);
+
+	if (size_len)
+		cell_size = fdt32_to_cpu(*size_len);
+	return cell_size;
+}
+
 static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 {
 	char cmdline[COMMAND_LINE_SIZE];
@@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 {
 	struct tag *atag = atag_list;
-	uint32_t mem_reg_property[2 * NR_BANKS];
+	/* In the case of 64 bits memory size, need to reserve 2 cells for
+	 * address and size for each bank */
+	uint32_t mem_reg_property[2 * 2 * NR_BANKS];
 	int memcount = 0;
-	int ret;
+	int ret, memsize;
 
 	/* make sure we've got an aligned pointer */
 	if ((u32)atag_list & 0x3)
@@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 				continue;
 			if (!atag->u.mem.size)
 				continue;
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+			memsize = get_cell_size(fdt);
+
+			if (memsize == 2) {
+				/* if memsize is 2, that means that
+				 * each data needs 2 cells of 32 bits,
+				 * so the data are 64 bits */
+				uint64_t *mem_reg_prop64 =
+					(uint64_t *)mem_reg_property;
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.start);
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.size);
+			} else {
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.start);
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.size);
+			}
+
 		} else if (atag->hdr.tag == ATAG_INITRD2) {
 			uint32_t initrd_start, initrd_size;
 			initrd_start = atag->u.initrd.start;
@@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 		}
 	}
 
-	if (memcount)
-		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+	if (memcount) {
+		setprop(fdt, "/memory", "reg", mem_reg_property,
+			4 * memcount * memsize);
+	}
 
 	return fdt_pack(fdt);
 }
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 032a8d98714..75189f13cf5 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -142,7 +142,6 @@ start:
 		mov	r7, r1			@ save architecture ID
 		mov	r8, r2			@ save atags pointer
 
-#ifndef __ARM_ARCH_2__
 		/*
 		 * Booting from Angel - need to enter SVC mode and disable
 		 * FIQs/IRQs (numeric definitions from angel arm.h source).
@@ -158,10 +157,6 @@ not_angel:
 		safe_svcmode_maskall r0
 		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 						@ SPSR
-#else
-		teqp	pc, #0x0c000003		@ turn off interrupts
-#endif
-
 		/*
 		 * Note that some cache flushing and other stuff may
 		 * be needed here - is there an Angel SWI call for this?
@@ -183,7 +178,19 @@ not_angel:
 		ldr	r4, =zreladdr
 #endif
 
-		bl	cache_on
+		/*
+		 * Set up a page table only if it won't overwrite ourself.
+		 * That means r4 < pc && r4 - 16k page directory > &_end.
+		 * Given that r4 > &_end is most unfrequent, we add a rough
+		 * additional 1MB of room for a possible appended DTB.
+		 */
+		mov	r0, pc
+		cmp	r0, r4
+		ldrcc	r0, LC0+32
+		addcc	r0, r0, pc
+		cmpcc	r4, r0
+		orrcc	r4, r4, #1		@ remember we skipped cache_on
+		blcs	cache_on
 
 restart:	adr	r0, LC0
 		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
@@ -229,7 +236,7 @@ restart:	adr	r0, LC0
  *   r0  = delta
  *   r2  = BSS start
  *   r3  = BSS end
- *   r4  = final kernel address
+ *   r4  = final kernel address (possibly with LSB set)
  *   r5  = appended dtb size (still unknown)
  *   r6  = _edata
  *   r7  = architecture ID
@@ -277,6 +284,7 @@ restart:	adr	r0, LC0
 		 */
 		cmp	r0, #1
 		sub	r0, r4, #TEXT_OFFSET
+		bic	r0, r0, #1
 		add	r0, r0, #0x100
 		mov	r1, r6
 		sub	r2, sp, r6
@@ -323,12 +331,13 @@ dtb_check_done:
 
 /*
  * Check to see if we will overwrite ourselves.
- *   r4  = final kernel address
+ *   r4  = final kernel address (possibly with LSB set)
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
  *   r4 + image length <= address of wont_overwrite -> OK
+ * Note: the possible LSB in r4 is harmless here.
  */
 		add	r10, r10, #16384
 		cmp	r4, r10
@@ -390,7 +399,8 @@ dtb_check_done:
 		add	sp, sp, r6
 #endif
 
-		bl	cache_clean_flush
+		tst	r4, #1
+		bleq	cache_clean_flush
 
 		adr	r0, BSYM(restart)
 		add	r0, r0, r6
@@ -402,7 +412,7 @@ wont_overwrite:
  *   r0  = delta
  *   r2  = BSS start
  *   r3  = BSS end
- *   r4  = kernel execution address
+ *   r4  = kernel execution address (possibly with LSB set)
  *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
@@ -465,6 +475,15 @@ not_relocated:	mov	r0, #0
 		cmp	r2, r3
 		blo	1b
 
+		/*
+		 * Did we skip the cache setup earlier?
+		 * That is indicated by the LSB in r4.
+		 * Do it now if so.
+		 */
+		tst	r4, #1
+		bic	r4, r4, #1
+		blne	cache_on
+
 /*
  * The C runtime environment should now be setup sufficiently.
  * Set up some pointers, and start decompressing.
@@ -513,6 +532,7 @@ LC0:		.word	LC0			@ r1
 		.word	_got_start		@ r11
 		.word	_got_end		@ ip
 		.word	.L_user_stack_end	@ sp
+		.word	_end - restart + 16384 + 1024*1024
 		.size	LC0, . - LC0
 
 #ifdef CONFIG_ARCH_RPC
diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi
index 7d1a27949c1..9866cd736de 100644
--- a/arch/arm/boot/dts/atlas6.dtsi
+++ b/arch/arm/boot/dts/atlas6.dtsi
@@ -613,7 +613,7 @@
 		};
 
 		rtc-iobg {
-			compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus";
+			compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x80030000 0x10000>;
diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi
index 17979d5f23b..c0cdf66f896 100644
--- a/arch/arm/boot/dts/bcm11351.dtsi
+++ b/arch/arm/boot/dts/bcm11351.dtsi
@@ -50,10 +50,10 @@
 	};
 
 	L2: l2-cache {
-		    compatible = "arm,pl310-cache";
-		    reg = <0x3ff20000 0x1000>;
-		    cache-unified;
-		    cache-level = <2>;
+		compatible = "bcm,bcm11351-a2-pl310-cache";
+		reg = <0x3ff20000 0x1000>;
+		cache-unified;
+		cache-level = <2>;
 	};
 
 	timer@35006000 {
diff --git a/arch/arm/boot/dts/ecx-common.dtsi b/arch/arm/boot/dts/ecx-common.dtsi
index d61b535f682..e8559b753c9 100644
--- a/arch/arm/boot/dts/ecx-common.dtsi
+++ b/arch/arm/boot/dts/ecx-common.dtsi
@@ -33,6 +33,8 @@
 			calxeda,port-phys = <&combophy5 0 &combophy0 0
 					     &combophy0 1 &combophy0 2
 					     &combophy0 3>;
+			calxeda,sgpio-gpio =<&gpioh 5 1 &gpioh 6 1 &gpioh 7 1>;
+			calxeda,led-order = <4 0 1 2 3>;
 		};
 
 		sdhci@ffe0e000 {
diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi
index 02edd8965f8..05e9489cf95 100644
--- a/arch/arm/boot/dts/prima2.dtsi
+++ b/arch/arm/boot/dts/prima2.dtsi
@@ -610,7 +610,7 @@
 		};
 
 		rtc-iobg {
-			compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus";
+			compatible = "sirf,prima2-rtciobg", "sirf-prima2-rtciobg-bus", "simple-bus";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x80030000 0x10000>;
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705c4b2..80f033614a1 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -32,11 +32,11 @@
 1901:	adr	r0, 1902b
 	bl	printascii
 	mov	r0, r9
-	bl	printhex8
+	bl	printhex2
 	adr	r0, 1903b
 	bl	printascii
 	mov	r0, r10
-	bl	printhex8
+	bl	printhex2
 	adr	r0, 1904b
 	bl	printascii
 #endif
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
index 3caed0db698..510e5b13aa2 100644
--- a/arch/arm/common/mcpm_platsmp.c
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -19,10 +19,6 @@
 #include <asm/smp.h>
 #include <asm/smp_plat.h>
 
-static void __init simple_smp_init_cpus(void)
-{
-}
-
 static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned int mpidr, pcpu, pcluster, ret;
@@ -74,7 +70,6 @@ static void mcpm_cpu_die(unsigned int cpu)
 #endif
 
 static struct smp_operations __initdata mcpm_smp_ops = {
-	.smp_init_cpus		= simple_smp_init_cpus,
 	.smp_boot_secondary	= mcpm_boot_secondary,
 	.smp_secondary_init	= mcpm_secondary_init,
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index f9b7fccd795..0870b5cd553 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -38,6 +38,8 @@ CONFIG_NR_CPUS=2
 CONFIG_LEDS=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_ARM_APPENDED_DTB=y
+CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CMDLINE="root=/dev/mmcblk0p2 rootwait console=ttyO2,115200"
 CONFIG_KEXEC=y
 CONFIG_FPE_NWFPE=y
@@ -156,6 +158,13 @@ CONFIG_W1=y
 CONFIG_POWER_SUPPLY=y
 CONFIG_SENSORS_LM75=m
 CONFIG_WATCHDOG=y
+CONFIG_THERMAL=y
+CONFIG_THERMAL_HWMON=y
+CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
+CONFIG_THERMAL_GOV_FAIR_SHARE=y
+CONFIG_THERMAL_GOV_STEP_WISE=y
+CONFIG_THERMAL_GOV_USER_SPACE=y
+CONFIG_CPU_THERMAL=y
 CONFIG_OMAP_WATCHDOG=y
 CONFIG_TWL4030_WATCHDOG=y
 CONFIG_MFD_TPS65217=y
@@ -242,7 +251,13 @@ CONFIG_RTC_DRV_TWL92330=y
 CONFIG_RTC_DRV_TWL4030=y
 CONFIG_RTC_DRV_OMAP=y
 CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_DMA_OMAP=y
+CONFIG_TI_SOC_THERMAL=y
+CONFIG_TI_THERMAL=y
+CONFIG_OMAP4_THERMAL=y
+CONFIG_OMAP5_THERMAL=y
+CONFIG_DRA752_THERMAL=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 7c1bfc0aea0..accefe09918 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index cedd3721318..6493802f880 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -23,6 +23,11 @@
 #define CR_RR	(1 << 14)	/* Round Robin cache replacement	*/
 #define CR_L4	(1 << 15)	/* LDR pc can set T bit			*/
 #define CR_DT	(1 << 16)
+#ifdef CONFIG_MMU
+#define CR_HA	(1 << 17)	/* Hardware management of Access Flag   */
+#else
+#define CR_BR	(1 << 17)	/* MPU Background region enable (PMSA)  */
+#endif
 #define CR_IT	(1 << 18)
 #define CR_ST	(1 << 19)
 #define CR_FI	(1 << 21)	/* Fast interrupt (lower latency mode)	*/
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index d7deb62554c..8c25dc4e985 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,7 @@
 #define CPUID_CACHETYPE	1
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
+#define CPUID_MPUIR	4
 #define CPUID_MPIDR	5
 
 #ifdef CONFIG_CPU_V7M
diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index fe92ccf1d0b..191ada6e4d2 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -46,7 +46,7 @@
 	__rem;							\
 })
 
-#if __GNUC__ < 4
+#if __GNUC__ < 4 || !defined(CONFIG_AEABI)
 
 /*
  * gcc versions earlier than 4.0 are simply too problematic for the
diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index e6168c0c18e..74a8b84f3cb 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -230,21 +230,21 @@
 # endif
 #endif
 
-#ifdef CONFIG_CPU_PJ4B
+#ifdef CONFIG_CPU_V7M
 # ifdef CPU_NAME
 #  undef  MULTI_CPU
 #  define MULTI_CPU
 # else
-#  define CPU_NAME cpu_pj4b
+#  define CPU_NAME cpu_v7m
 # endif
 #endif
 
-#ifdef CONFIG_CPU_V7M
+#ifdef CONFIG_CPU_PJ4B
 # ifdef CPU_NAME
 #  undef  MULTI_CPU
 #  define MULTI_CPU
 # else
-#  define CPU_NAME cpu_v7m
+#  define CPU_NAME cpu_pj4b
 # endif
 #endif
 
diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
new file mode 100644
index 00000000000..d4014fbe5ea
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/include/asm/hugetlb-3level.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_3LEVEL_H
+#define _ASM_ARM_HUGETLB_3LEVEL_H
+
+
+/*
+ * If our huge pte is non-zero then mark the valid bit.
+ * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero
+ * ptes.
+ * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
+ */
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pte_t retval = *ptep;
+	if (pte_val(retval))
+		pte_val(retval) |= L_PTE_VALID;
+	return retval;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
new file mode 100644
index 00000000000..1f1b1cd112f
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_H
+#define _ASM_ARM_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#include <asm/hugetlb-3level.h>
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 652b56086de..d070741b2b3 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -130,16 +130,16 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
  */
 extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
 	size_t, unsigned int, void *);
-extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int,
+extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 	void *);
 
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached);
+extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
+extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
 extern void __arm_iounmap(volatile void __iomem *addr);
 
-extern void __iomem * (*arch_ioremap_caller)(unsigned long, size_t,
+extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 	unsigned int, void *);
 extern void (*arch_iounmap)(volatile void __iomem *);
 
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h
deleted file mode 100644
index 68cb9e1dfb8..00000000000
--- a/arch/arm/include/asm/kvm_arch_timer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_ARCH_TIMER_H
-#define __ASM_ARM_KVM_ARCH_TIMER_H
-
-#include <linux/clocksource.h>
-#include <linux/hrtimer.h>
-#include <linux/workqueue.h>
-
-struct arch_timer_kvm {
-#ifdef CONFIG_KVM_ARM_TIMER
-	/* Is the timer enabled */
-	bool			enabled;
-
-	/* Virtual offset */
-	cycle_t			cntvoff;
-#endif
-};
-
-struct arch_timer_cpu {
-#ifdef CONFIG_KVM_ARM_TIMER
-	/* Registers: control register, timer value */
-	u32				cntv_ctl;	/* Saved/restored */
-	cycle_t				cntv_cval;	/* Saved/restored */
-
-	/*
-	 * Anything that is not used directly from assembly code goes
-	 * here.
-	 */
-
-	/* Background timer used when the guest is not running */
-	struct hrtimer			timer;
-
-	/* Work queued with the above timer expires */
-	struct work_struct		expired;
-
-	/* Background timer active */
-	bool				armed;
-
-	/* Timer IRQ */
-	const struct kvm_irq_level	*irq;
-#endif
-};
-
-#ifdef CONFIG_KVM_ARM_TIMER
-int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
-void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
-void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
-#else
-static inline int kvm_timer_hyp_init(void)
-{
-	return 0;
-};
-
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef1..64e96960de2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -135,7 +135,6 @@
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
 #define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 18d50322a9e..a2f43ddcc30 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -37,16 +37,18 @@
 #define c5_AIFSR	15	/* Auxilary Instrunction Fault Status R */
 #define c6_DFAR		16	/* Data Fault Address Register */
 #define c6_IFAR		17	/* Instruction Fault Address Register */
-#define c9_L2CTLR	18	/* Cortex A15 L2 Control Register */
-#define c10_PRRR	19	/* Primary Region Remap Register */
-#define c10_NMRR	20	/* Normal Memory Remap Register */
-#define c12_VBAR	21	/* Vector Base Address Register */
-#define c13_CID		22	/* Context ID Register */
-#define c13_TID_URW	23	/* Thread ID, User R/W */
-#define c13_TID_URO	24	/* Thread ID, User R/O */
-#define c13_TID_PRIV	25	/* Thread ID, Privileged */
-#define c14_CNTKCTL	26	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	27	/* Number of regs (incl. invalid) */
+#define c7_PAR		18	/* Physical Address Register */
+#define c7_PAR_high	19	/* PAR top 32 bits */
+#define c9_L2CTLR	20	/* Cortex A15 L2 Control Register */
+#define c10_PRRR	21	/* Primary Region Remap Register */
+#define c10_NMRR	22	/* Normal Memory Remap Register */
+#define c12_VBAR	23	/* Vector Base Address Register */
+#define c13_CID		24	/* Context ID Register */
+#define c13_TID_URW	25	/* Thread ID, User R/W */
+#define c13_TID_URO	26	/* Thread ID, User R/O */
+#define c13_TID_PRIV	27	/* Thread ID, Privileged */
+#define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
+#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
@@ -72,8 +74,6 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2..a464e8d7b6c 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
-	return reg == 15;
-}
-
 static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.hsr;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a620..7d22517d807 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -23,9 +23,14 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
 
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -38,7 +43,7 @@
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
@@ -190,8 +195,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
-				       unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
deleted file mode 100644
index 343744e4809..00000000000
--- a/arch/arm/include/asm/kvm_vgic.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_VGIC_H
-#define __ASM_ARM_KVM_VGIC_H
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/irqreturn.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
-
-#define VGIC_NR_IRQS		128
-#define VGIC_NR_SGIS		16
-#define VGIC_NR_PPIS		16
-#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
-#define VGIC_MAX_LRS		(1 << 6)
-
-/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
-#error	Invalid number of CPU interfaces
-#endif
-
-#if (VGIC_NR_IRQS & 31)
-#error "VGIC_NR_IRQS must be a multiple of 32"
-#endif
-
-#if (VGIC_NR_IRQS > 1024)
-#error "VGIC_NR_IRQS must be <= 1024"
-#endif
-
-/*
- * The GIC distributor registers describing interrupts have two parts:
- * - 32 per-CPU interrupts (SGI + PPI)
- * - a bunch of shared interrupts (SPI)
- */
-struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
-};
-
-struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
-};
-
-struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
-	spinlock_t		lock;
-	bool			ready;
-
-	/* Virtual control interface mapping */
-	void __iomem		*vctrl_base;
-
-	/* Distributor and vcpu interface mapping in the guest */
-	phys_addr_t		vgic_dist_base;
-	phys_addr_t		vgic_cpu_base;
-
-	/* Distributor enabled */
-	u32			enabled;
-
-	/* Interrupt enabled (one bit per IRQ) */
-	struct vgic_bitmap	irq_enabled;
-
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
-
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
-
-	/* Interrupt priority. Not used yet. */
-	struct vgic_bytemap	irq_priority;
-
-	/* Level/edge triggered */
-	struct vgic_bitmap	irq_cfg;
-
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
-
-	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
-#endif
-};
-
-struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
-	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
-
-	/* Pending interrupts on this VCPU */
-	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
-
-	/* Bitmap of used/free list registers */
-	DECLARE_BITMAP(	lr_used, VGIC_MAX_LRS);
-
-	/* Number of list registers on this CPU */
-	int		nr_lr;
-
-	/* CPU vif control registers for world switch */
-	u32		vgic_hcr;
-	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
-	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
-#endif
-};
-
-#define LR_EMPTY	0xff
-
-struct kvm;
-struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
-
-#ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
-int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level);
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio);
-
-#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
-
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-				      unsigned int irq_num, bool level)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-	return true;
-}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c..e750a938fd3 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,6 +18,8 @@
 #include <linux/types.h>
 #include <linux/sizes.h>
 
+#include <asm/cache.h>
+
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -141,6 +143,20 @@
 #define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
 
+/*
+ * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
+ * around in head.S and proc-*.S are shifted by this amount, in order to
+ * leave spare high bits for systems with physical address extension.  This
+ * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
+ * gives us about 38-bits or so.
+ */
+#ifdef CONFIG_ARM_LPAE
+#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
+#else
+#define ARCH_PGD_SHIFT		0
+#endif
+#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -207,7 +223,7 @@ static inline unsigned long __phys_to_virt(unsigned long x)
  * direct-mapped view.  We assume this is the first page
  * of RAM in the mem_map as well.
  */
-#define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
@@ -260,12 +276,6 @@ static inline __deprecated void *bus_to_virt(unsigned long x)
 /*
  * Conversion between a struct page and a physical address.
  *
- * Note: when converting an unknown physical address to a
- * struct page, the resulting pointer must be validated
- * using VALID_PAGE().  It must return an invalid struct page
- * for any physical address not corresponding to a system
- * RAM address.
- *
  *  page_to_pfn(page)	convert a struct page * to a PFN number
  *  pfn_to_page(pfn)	convert a _valid_ PFN number to struct page *
  *
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index a7b85e0d0cc..b5792b7fd8d 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -18,6 +18,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/proc-fns.h>
+#include <asm/smp_plat.h>
 #include <asm-generic/mm_hooks.h>
 
 void __check_vmalloc_seq(struct mm_struct *mm);
@@ -27,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 #define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
-DECLARE_PER_CPU(atomic64_t, active_asids);
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask);
+#else  /* !CONFIG_ARM_ERRATA_798181 */
+static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+					   cpumask_t *mask)
+{
+}
+#endif /* CONFIG_ARM_ERRATA_798181 */
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
@@ -98,12 +107,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #ifdef CONFIG_MMU
 	unsigned int cpu = smp_processor_id();
 
-#ifdef CONFIG_SMP
-	/* check for possible thread migration */
-	if (!cpumask_empty(mm_cpumask(next)) &&
+	/*
+	 * __sync_icache_dcache doesn't broadcast the I-cache invalidation,
+	 * so check for possible thread migration and invalidate the I-cache
+	 * if we're new to this CPU.
+	 */
+	if (cache_ops_need_broadcast() &&
+	    !cpumask_empty(mm_cpumask(next)) &&
 	    !cpumask_test_cpu(cpu, mm_cpumask(next)))
 		__flush_icache_all();
-#endif
+
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
 		check_and_switch_context(next, tsk);
 		if (cache_is_vivt())
diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
new file mode 100644
index 00000000000..c3247cc2fe0
--- /dev/null
+++ b/arch/arm/include/asm/mpu.h
@@ -0,0 +1,76 @@
+#ifndef __ARM_MPU_H
+#define __ARM_MPU_H
+
+#ifdef CONFIG_ARM_MPU
+
+/* MPUIR layout */
+#define MPUIR_nU		1
+#define MPUIR_DREGION		8
+#define MPUIR_IREGION		16
+#define MPUIR_DREGION_SZMASK	(0xFF << MPUIR_DREGION)
+#define MPUIR_IREGION_SZMASK	(0xFF << MPUIR_IREGION)
+
+/* ID_MMFR0 data relevant to MPU */
+#define MMFR0_PMSA		(0xF << 4)
+#define MMFR0_PMSAv7		(3 << 4)
+
+/* MPU D/I Size Register fields */
+#define MPU_RSR_SZ		1
+#define MPU_RSR_EN		0
+
+/* The D/I RSR value for an enabled region spanning the whole of memory */
+#define MPU_RSR_ALL_MEM		63
+
+/* Individual bits in the DR/IR ACR */
+#define MPU_ACR_XN		(1 << 12)
+#define MPU_ACR_SHARED		(1 << 2)
+
+/* C, B and TEX[2:0] bits only have semantic meanings when grouped */
+#define MPU_RGN_CACHEABLE	0xB
+#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#define MPU_RGN_STRONGLY_ORDERED 0
+
+/* Main region should only be shared for SMP */
+#ifdef CONFIG_SMP
+#define MPU_RGN_NORMAL		(MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#else
+#define MPU_RGN_NORMAL		MPU_RGN_CACHEABLE
+#endif
+
+/* Access permission bits of ACR (only define those that we use)*/
+#define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
+#define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
+#define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
+
+/* For minimal static MPU region configurations */
+#define MPU_PROBE_REGION	0
+#define MPU_BG_REGION		1
+#define MPU_RAM_REGION		2
+#define MPU_VECTORS_REGION	3
+
+/* Maximum number of regions Linux is interested in */
+#define MPU_MAX_REGIONS		16
+
+#define MPU_DATA_SIDE		0
+#define MPU_INSTR_SIDE		1
+
+#ifndef __ASSEMBLY__
+
+struct mpu_rgn {
+	/* Assume same attributes for d/i-side  */
+	u32 drbar;
+	u32 drsr;
+	u32 dracr;
+};
+
+struct mpu_rgn_info {
+	u32 mpuir;
+	struct mpu_rgn rgns[MPU_MAX_REGIONS];
+};
+extern struct mpu_rgn_info mpu_rgn_info;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_ARM_MPU */
+
+#endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 812a4944e78..6363f3d1d50 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -13,7 +13,7 @@
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
+#define PAGE_MASK		(~((1 << PAGE_SHIFT) - 1))
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef82ad..626989fec4d 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -30,6 +30,7 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
 #define PMD_APTABLE_SHIFT	(61)
@@ -41,6 +42,8 @@
  */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
@@ -66,6 +69,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
@@ -79,4 +83,24 @@
 #define PHYS_MASK_SHIFT		(40)
 #define PHYS_MASK		((1ULL << PHYS_MASK_SHIFT) - 1)
 
+/*
+ * TTBR0/TTBR1 split (PAGE_OFFSET):
+ *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
+ *   0x80000000: T0SZ = 0, T1SZ = 1
+ *   0xc0000000: T0SZ = 0, T1SZ = 2
+ *
+ * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
+ * booting secondary CPUs would end up using TTBR1 for the identity
+ * mapping set up in TTBR0.
+ */
+#if defined CONFIG_VMSPLIT_2G
+#define TTBR1_OFFSET	16			/* skip two L1 entries */
+#elif defined CONFIG_VMSPLIT_3G
+#define TTBR1_OFFSET	(4096 * (1 + 3))	/* only L2, skip pgd + 3*pmd */
+#else
+#define TTBR1_OFFSET	0
+#endif
+
+#define TTBR1_SIZE	(((PAGE_OFFSET >> 30) - 1) << 16)
+
 #endif
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b9..5689c18c85f 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -33,7 +33,7 @@
 #define PTRS_PER_PMD		512
 #define PTRS_PER_PGD		4
 
-#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_PTRS	(0)
 #define PTE_HWTABLE_OFF		(0)
 #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u64))
 
@@ -48,20 +48,28 @@
 #define PMD_SHIFT		21
 
 #define PMD_SIZE		(1UL << PMD_SHIFT)
-#define PMD_MASK		(~(PMD_SIZE-1))
+#define PMD_MASK		(~((1 << PMD_SHIFT) - 1))
 #define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PGDIR_MASK		(~((1 << PGDIR_SHIFT) - 1))
 
 /*
  * section address mask and size definitions.
  */
 #define SECTION_SHIFT		21
 #define SECTION_SIZE		(1UL << SECTION_SHIFT)
-#define SECTION_MASK		(~(SECTION_SIZE-1))
+#define SECTION_MASK		(~((1 << SECTION_SHIFT) - 1))
 
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
 /*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+/*
  * "Linux" PTE definitions for LPAE.
  *
  * These bits overlap with the hardware bits but the naming is preserved for
@@ -79,6 +87,11 @@
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+
 /*
  * To be used in assembly code with the upper page attributes.
  */
@@ -166,8 +179,83 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
+/*
+ * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes
+ * that are written to a page table but not for ptes created with mk_pte.
+ *
+ * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to
+ * hugetlb_cow, where it is compared with an entry in a page table.
+ * This comparison test fails erroneously leading ultimately to a memory leak.
+ *
+ * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is
+ * present before running the comparison.
+ */
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(pte_a,pte_b)	((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG	\
+					: pte_val(pte_a))				\
+				== (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG	\
+					: pte_val(pte_b)))
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
+#define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+#define pmd_mknotpresent(pmd)	(__pmd(0))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
+				PMD_SECT_VALID | PMD_SECT_NONE;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	BUG_ON(addr >= TASK_SIZE);
+
+	/* create a faulting entry if PROT_NONE protected */
+	if (pmd_val(pmd) & PMD_SECT_NONE)
+		pmd_val(pmd) &= ~PMD_SECT_VALID;
+
+	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+	flush_pmd_entry(pmdp);
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 229e0dde9c7..04aeb02d2e1 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,9 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 
+
+#include <asm/tlbflush.h>
+
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level.h>
 #else
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f3628fb3d2b..5324c1112f3 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -60,7 +60,7 @@ extern struct processor {
 	/*
 	 * Set the page table
 	 */
-	void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
+	void (*switch_mm)(phys_addr_t pgd_phys, struct mm_struct *mm);
 	/*
 	 * Set a possibly extended PTE.  Non-extended PTEs should
 	 * ignore 'ext'.
@@ -82,7 +82,7 @@ extern void cpu_proc_init(void);
 extern void cpu_proc_fin(void);
 extern int cpu_do_idle(void);
 extern void cpu_dcache_clean_area(void *, int);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
+extern void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
 #ifdef CONFIG_ARM_LPAE
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte);
 #else
@@ -116,13 +116,25 @@ extern void cpu_resume(void);
 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
 
 #ifdef CONFIG_ARM_LPAE
+
+#define cpu_get_ttbr(nr)					\
+	({							\
+		u64 ttbr;					\
+		__asm__("mrrc	p15, " #nr ", %Q0, %R0, c2"	\
+			: "=r" (ttbr));				\
+		ttbr;						\
+	})
+
+#define cpu_set_ttbr(nr, val)					\
+	do {							\
+		u64 ttbr = val;					\
+		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
+			: : "r" (ttbr));			\
+	} while (0)
+
 #define cpu_get_pgd()	\
 	({						\
-		unsigned long pg, pg2;			\
-		__asm__("mrrc	p15, 0, %0, %1, c2"	\
-			: "=r" (pg), "=r" (pg2)		\
-			:				\
-			: "cc");			\
+		u64 pg = cpu_get_ttbr(0);		\
 		pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1);	\
 		(pgd_t *)phys_to_virt(pg);		\
 	})
@@ -137,6 +149,10 @@ extern void cpu_resume(void);
 	})
 #endif
 
+#else	/*!CONFIG_MMU */
+
+#define cpu_switch_mm(pgd,mm)	{ }
+
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22bebe6c..a8cae71cace 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -65,7 +65,10 @@ asmlinkage void secondary_start_kernel(void);
  * Initial data for bringing up a secondary CPU.
  */
 struct secondary_data {
-	unsigned long pgdir;
+	union {
+		unsigned long mpu_rgn_szr;
+		unsigned long pgdir;
+	};
 	unsigned long swapper_pg_dir;
 	void *stack;
 };
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index e7898320273..6462a721ebd 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -26,6 +26,9 @@ static inline bool is_smp(void)
 }
 
 /* all SMP configurations have the extended CPUID registers */
+#ifndef CONFIG_MMU
+#define tlb_ops_need_broadcast()	0
+#else
 static inline int tlb_ops_need_broadcast(void)
 {
 	if (!is_smp())
@@ -33,6 +36,7 @@ static inline int tlb_ops_need_broadcast(void)
 
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
 }
+#endif
 
 #if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7
 #define cache_ops_need_broadcast()	0
@@ -66,4 +70,22 @@ static inline int get_logical_index(u32 mpidr)
 	return -EINVAL;
 }
 
+/*
+ * NOTE ! Assembly code relies on the following
+ * structure memory layout in order to carry out load
+ * multiple from its base address. For more
+ * information check arch/arm/kernel/sleep.S
+ */
+struct mpidr_hash {
+	u32	mask; /* used by sleep.S */
+	u32	shift_aff[3]; /* used by sleep.S */
+	u32	bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+	return 1 << mpidr_hash.bits;
+}
 #endif
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d16937361..0393fbab8dd 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -23,10 +23,21 @@ static inline unsigned long scu_a9_get_base(void)
 	return pa;
 }
 
+#ifdef CONFIG_HAVE_ARM_SCU
 unsigned int scu_get_core_count(void __iomem *);
 int scu_power_mode(void __iomem *, unsigned int);
+#else
+static inline unsigned int scu_get_core_count(void __iomem *scu_base)
+{
+	return 0;
+}
+static inline int scu_power_mode(void __iomem *scu_base, unsigned int mode)
+{
+	return -EINVAL;
+}
+#endif
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_ARM_SCU)
 void scu_enable(void __iomem *scu_base);
 #else
 static inline void scu_enable(void __iomem *scu_base) {}
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 6220e9fdf4c..f8b8965666e 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -97,19 +97,22 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-	unsigned long tmp;
+	unsigned long contended, res;
 	u32 slock;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%2]\n"
-"	subs	%1, %0, %0, ror #16\n"
-"	addeq	%0, %0, %3\n"
-"	strexeq	%1, %0, [%2]"
-	: "=&r" (slock), "=&r" (tmp)
-	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
-	: "cc");
-
-	if (tmp == 0) {
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%3]\n"
+		"	mov	%2, #0\n"
+		"	subs	%1, %0, %0, ror #16\n"
+		"	addeq	%0, %0, %4\n"
+		"	strexeq	%2, %0, [%3]"
+		: "=&r" (slock), "=&r" (contended), "=r" (res)
+		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
+		: "cc");
+	} while (res);
+
+	if (!contended) {
 		smp_mb();
 		return 1;
 	} else {
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index 1c0a551ae37..cd20029bcd9 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,6 +1,11 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
+struct sleep_save_sp {
+	u32 *save_ptr_stash;
+	u32 save_ptr_stash_phys;
+};
+
 extern void cpu_resume(void);
 extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1995d1a8406..214d4158089 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
 	struct cpu_context_save	cpu_context;	/* cpu context */
 	__u32			syscall;	/* syscall number */
 	__u8			used_cp[16];	/* thread used copro */
-	unsigned long		tp_value;
+	unsigned long		tp_value[2];	/* TLS registers */
 #ifdef CONFIG_CRUNCH
 	struct crunch_state	crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index bdf2b8458ec..46e7cfb3e72 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -204,6 +204,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 #endif
 }
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d141c1..fdbb9e36974 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -535,8 +535,33 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 }
 #endif
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
-#endif /* CONFIG_MMU */
+#elif defined(CONFIG_SMP)	/* !CONFIG_MMU */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/mm_types.h>
+
+static inline void local_flush_tlb_all(void)									{ }
+static inline void local_flush_tlb_mm(struct mm_struct *mm)							{ }
+static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)			{ }
+static inline void local_flush_tlb_kernel_page(unsigned long kaddr)						{ }
+static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)	{ }
+static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)				{ }
+static inline void local_flush_bp_all(void)									{ }
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
+extern void flush_tlb_kernel_page(unsigned long kaddr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_bp_all(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6c025..83259b87333 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-	.macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+	.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
 	.endm
 
-	.macro set_tls_v6k, tp, tmp1, tmp2
+	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
+	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
-	mov	\tmp1, #0
-	mcr	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
+	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
+	str	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_v6, tp, tmp1, tmp2
+	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
 	ldr	\tmp1, =elf_hwcap
 	ldr	\tmp1, [\tmp1, #0]
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
-	movne	\tmp1, #0
-	mcrne	p15, 0, \tmp1, c13, c0, 2	@ clear user r/w TLS register
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
+	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
+	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
 
-	.macro set_tls_software, tp, tmp1, tmp2
+	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
@@ -31,19 +34,30 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu		1
 #define has_tls_reg		1
-#define set_tls		set_tls_none
+#define switch_tls	switch_tls_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define set_tls		set_tls_v6
+#define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
 #define has_tls_reg		1
-#define set_tls		set_tls_v6k
+#define switch_tls	switch_tls_v6k
 #else
 #define tls_emu		0
 #define has_tls_reg		0
-#define set_tls		set_tls_software
+#define switch_tls	switch_tls_software
 #endif
 
+#ifndef __ASSEMBLY__
+static inline unsigned long get_tpuser(void)
+{
+	unsigned long reg = 0;
+
+	if (has_tls_reg && !tls_emu)
+		__asm__("mrc p15, 0, %0, c13, c0, 2" : "=r" (reg));
+
+	return reg;
+}
+#endif
 #endif	/* __ASMARM_TLS_H */
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 30cdacb675a..359a7b50b15 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_ARM_XEN_PAGE_H
 #define _ASM_ARM_XEN_PAGE_H
 
-#include <asm/mach/map.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -88,6 +87,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 	return __set_phys_to_machine(pfn, mfn);
 }
 
-#define xen_remap(cookie, size) __arm_ioremap((cookie), (size), MT_MEMORY);
+#define xen_remap(cookie, size) ioremap_cached((cookie), (size));
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S
index dc8e882a625..acafb229e2b 100644
--- a/arch/arm/include/debug/vexpress.S
+++ b/arch/arm/include/debug/vexpress.S
@@ -16,6 +16,8 @@
 #define DEBUG_LL_PHYS_BASE_RS1		0x1c000000
 #define DEBUG_LL_UART_OFFSET_RS1	0x00090000
 
+#define DEBUG_LL_UART_PHYS_CRX		0xb0090000
+
 #define DEBUG_LL_VIRT_BASE		0xf8000000
 
 #if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT)
@@ -67,6 +69,14 @@
 
 #include <asm/hardware/debug-pl01x.S>
 
+#elif defined(CONFIG_DEBUG_VEXPRESS_UART0_CRX)
+
+		.macro	addruart,rp,tmp,tmp2
+		ldr	\rp, =DEBUG_LL_UART_PHYS_CRX
+		.endm
+
+#include <asm/hardware/debug-pl01x.S>
+
 #else /* CONFIG_DEBUG_LL_UART_NONE */
 
 		.macro	addruart, rp, rv, tmp
diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 3688fd15a32..6d34d080372 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -25,6 +25,6 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_VFPD32	(1 << 19)	/* set if VFP has 32 regs (not 16) */
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
-
+#define HWCAP_LPAE	(1 << 20)
 
 #endif /* _UAPI__ASMARM_HWCAP_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f4285b5ffb0..fccfbdb03df 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
-obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
+obj-$(CONFIG_SMP)		+= smp.o
+ifdef CONFIG_MMU
+obj-$(CONFIG_SMP)		+= smp_tlb.o
+endif
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48..ded041711be 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/procinfo.h>
+#include <asm/suspend.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -145,6 +146,11 @@ int main(void)
 #ifdef MULTI_CACHE
   DEFINE(CACHE_FLUSH_KERN_ALL,	offsetof(struct cpu_cache_fns, flush_kern_all));
 #endif
+#ifdef CONFIG_ARM_CPU_SUSPEND
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
+#endif
   BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 582b405befc..a39cfc2a1f9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -685,15 +685,16 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldr	r4, [r2, #TI_TP_VALUE]
+	ldr	r5, [r2, #TI_TP_VALUE + 4]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 85a72b0809c..94104bf6971 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -366,6 +366,16 @@ ENTRY(vector_swi)
 #endif
 	zero_fp
 
+#ifdef CONFIG_ALIGNMENT_TRAP
+	ldr	ip, __cr_alignment
+	ldr	ip, [ip]
+	mcr	p15, 0, ip, c1, c0		@ update control register
+#endif
+
+	enable_irq
+	ct_user_exit
+	get_thread_info tsk
+
 	/*
 	 * Get the system call number.
 	 */
@@ -379,9 +389,9 @@ ENTRY(vector_swi)
 #ifdef CONFIG_ARM_THUMB
 	tst	r8, #PSR_T_BIT
 	movne	r10, #0				@ no thumb OABI emulation
-	ldreq	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldreq	r10, [lr, #-4]		)	@ get SWI instruction
 #else
-	ldr	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
 #endif
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	rev	r10, r10			@ little endian instruction
@@ -396,22 +406,13 @@ ENTRY(vector_swi)
 	/* Legacy ABI only, possibly thumb mode. */
 	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
 	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
-	ldreq	scno, [lr, #-4]
+ USER(	ldreq	scno, [lr, #-4]		)
 
 #else
 	/* Legacy ABI only. */
-	ldr	scno, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	ip, __cr_alignment
-	ldr	ip, [ip]
-	mcr	p15, 0, ip, c1, c0		@ update control register
-#endif
-	enable_irq
-	ct_user_exit
-
-	get_thread_info tsk
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
@@ -446,6 +447,21 @@ local_restart:
 	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
 	bcs	arm_syscall	
 	b	sys_ni_syscall			@ not private func
+
+#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
+	/*
+	 * We failed to handle a fault trying to access the page
+	 * containing the swi instruction, but we're not really in a
+	 * position to return -EFAULT. Instead, return back to the
+	 * instruction and re-enter the user fault handling path trying
+	 * to page it in. This will likely result in sending SEGV to the
+	 * current task.
+	 */
+9001:
+	sub	lr, lr, #4
+	str	lr, [sp, #S_PC]
+	b	ret_fast_syscall
+#endif
 ENDPROC(vector_swi)
 
 	/*
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 8812ce88f7a..75f14cc3e07 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -17,9 +17,12 @@
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/memory.h>
 #include <asm/cp15.h>
 #include <asm/thread_info.h>
 #include <asm/v7m.h>
+#include <asm/mpu.h>
+#include <asm/page.h>
 
 /*
  * Kernel startup entry point.
@@ -63,12 +66,74 @@ ENTRY(stext)
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
 
-	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
+#ifdef CONFIG_ARM_MPU
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
+	bl	__setup_mpu
+#endif
+	ldr	r13, =__mmap_switched		@ address to jump to after
+						@ initialising sctlr
+	adr	lr, BSYM(1f)			@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	mov	pc, r12				)
+ 1:	b	__after_proc_init
 ENDPROC(stext)
 
+#ifdef CONFIG_SMP
+	__CPUINIT
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifndef CONFIG_CPU_CP15
+	ldr	r9, =CONFIG_PROCESSOR_ID
+#else
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+#endif
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor?
+	beq	__error_p			@ yes, error 'p'
+
+	adr	r4, __secondary_data
+	ldmia	r4, {r7, r12}
+
+#ifdef CONFIG_ARM_MPU
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_szr
+	bl      __setup_mpu			@ Initialize the MPU
+#endif
+
+	adr	lr, BSYM(__after_proc_init)	@ return address
+	mov	r13, r12			@ __secondary_switched address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #8]			@ set up the stack pointer
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* CONFIG_SMP */
+
 /*
  * Set the Control Register and Read the process ID.
  */
@@ -99,10 +164,97 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-
-	b	__mmap_switched			@ clear the BSS and jump
-						@ to start_kernel
+	mov	pc, r13
 ENDPROC(__after_proc_init)
 	.ltorg
 
+#ifdef CONFIG_ARM_MPU
+
+
+/* Set which MPU region should be programmed */
+.macro set_region_nr tmp, rgnr
+	mov	\tmp, \rgnr			@ Use static region numbers
+	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
+.endm
+
+/* Setup a single MPU region, either D or I side (D-side for unified) */
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
+	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
+	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
+.endm
+
+/*
+ * Setup the MPU and initial MPU Regions. We create the following regions:
+ * Region 0: Use this for probing the MPU details, so leave disabled.
+ * Region 1: Background region - covers the whole of RAM as strongly ordered
+ * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
+ *
+ * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+*/
+
+ENTRY(__setup_mpu)
+
+	/* Probe for v7 PMSA compliance */
+	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
+	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
+	bne	__error_p			@ Fail: ARM_MPU on NOT v7 PMSA
+
+	/* Determine whether the D/I-side memory map is unified. We set the
+	 * flags here and continue to use them for the rest of this function */
+	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
+	beq	__error_p			@ Fail: ARM_MPU and no MPU
+	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
+
+	/* Setup second region first to free up r6 */
+	set_region_nr r0, #MPU_RAM_REGION
+	isb
+	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
+	ldr	r0, =PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
+	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ PHYS_OFFSET, shared, enabled
+	beq	1f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+1:	isb
+
+	/* First/background region */
+	set_region_nr r0, #MPU_BG_REGION
+	isb
+	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
+	mov	r0, #0				@ BG region starts at 0x0
+	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
+	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ 0x0, BG region, enabled
+	beq	2f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+2:	isb
+
+	/* Vectors region */
+	set_region_nr r0, #MPU_VECTORS_REGION
+	isb
+	/* Shared, inaccessible to PL0, rw PL1 */
+	mov	r0, #CONFIG_VECTORS_BASE	@ Cover from VECTORS_BASE
+	ldr	r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
+	/* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
+	mov	r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+	beq	3f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+3:	isb
+
+	/* Enable the MPU */
+	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
+	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	isb
+	mov pc,lr
+ENDPROC(__setup_mpu)
+#endif
 #include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553fe21..45e8935cae4 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -156,7 +156,7 @@ ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = physical page table address
+ *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -331,6 +331,7 @@ __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
+	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	mov	pc, lr
 ENDPROC(__create_page_tables)
@@ -408,7 +409,7 @@ __secondary_data:
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table pointer
+ *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -427,10 +428,7 @@ __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifdef CONFIG_ARM_LPAE
-	mov	r5, #0
-	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
-#else
+#ifndef CONFIG_ARM_LPAE
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 1315c4ccfa5..4910232c483 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -153,6 +153,13 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	mov	r7, #0
+	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
+
+	@ Disable virtual timer in case it was counting
+	mrc	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
+	bic	r7, #1			@ Clear ENABLE
+	mcr	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
 1:
 #endif
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d0f7b..d9f5cd4e533 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 	}
 
+	perf_callchain_store(entry, regs->ARM_pc);
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
 	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6e8931ccf13..7f1efcd4a6e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -374,7 +375,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tpuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 23a11424c56..219f1d73572 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu)
        /* We should never return */
        panic("psci: cpu %d failed to shutdown\n", cpu);
 }
-#else
-#define psci_cpu_die NULL
 #endif
 
 bool __init psci_smp_available(void)
@@ -80,5 +78,7 @@ bool __init psci_smp_available(void)
 
 struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= psci_cpu_die,
+#endif
 };
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeffd9f6..2bc1514d6db 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1c8278de6c4..9b653278c9e 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -367,7 +367,7 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs;
+	unsigned int divide_instrs, vmsa;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
@@ -380,6 +380,11 @@ static void __init cpuid_init_hwcaps(void)
 	case 1:
 		elf_hwcap |= HWCAP_IDIVT;
 	}
+
+	/* LPAE implies atomic ldrd/strd instructions */
+	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
+	if (vmsa >= 5)
+		elf_hwcap |= HWCAP_LPAE;
 }
 
 static void __init feat_v6_fixup(void)
@@ -470,9 +475,82 @@ void __init smp_setup_processor_id(void)
 	for (i = 1; i < nr_cpu_ids; ++i)
 		cpu_logical_map(i) = i == cpu ? 0 : i;
 
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
+
 	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity;
+	u32 fs[3], bits[3], ls, mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits 0x%x\n", mask);
+	/*
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
+	 */
+	for (i = 0; i < 3; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 24 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n",
+				mpidr_hash.shift_aff[0],
+				mpidr_hash.shift_aff[1],
+				mpidr_hash.shift_aff[2],
+				mpidr_hash.mask,
+				mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	sync_cache_w(&mpidr_hash);
+}
+#endif
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -820,6 +898,7 @@ void __init setup_arch(char **cmdline_p)
 				smp_set_ops(mdesc->smp);
 		}
 		smp_init_cpus();
+		smp_build_mpidr_hash();
 	}
 #endif
 
@@ -892,6 +971,7 @@ static const char *hwcap_str[] = {
 	"vfpv4",
 	"idiva",
 	"idivt",
+	"lpae",
 	NULL
 };
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786bdbb7..1c16c35c271 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 			idx += 3;
 
+		/*
+		 * Put the sigreturn code on the stack no matter which return
+		 * mechanism we use in order to remain ABI compliant
+		 */
 		if (__put_user(sigreturn_codes[idx],   rc) ||
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
-		if (cpsr & MODE32_BIT) {
+		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
 			/*
 			 * 32-bit code can use the new high-page
-			 * signal return code support.
+			 * signal return code support except when the MPU has
+			 * protected the vectors page from PL0
 			 */
 			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
 		} else {
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf33415..db1536b8b30 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -7,6 +7,49 @@
 	.text
 
 /*
+ * Implementation of MPIDR hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @mpidr: register containing MPIDR value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
+ *	u32 aff0, aff1, aff2;
+ *	u32 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
+ *}
+ * Input registers: rs0, rs1, rs2, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
+	and	\mpidr, \mpidr, \mask			@ mask out MPIDR bits
+	and	\dst, \mpidr, #0xff			@ mask=aff0
+ ARM(	mov	\dst, \dst, lsr \rs0		)	@ dst=aff0>>rs0
+ THUMB(	lsr	\dst, \dst, \rs0		)
+	and	\mask, \mpidr, #0xff00			@ mask = aff1
+ ARM(	orr	\dst, \dst, \mask, lsr \rs1	)	@ dst|=(aff1>>rs1)
+ THUMB(	lsr	\mask, \mask, \rs1		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	and	\mask, \mpidr, #0xff0000		@ mask = aff2
+ ARM(	orr	\dst, \dst, \mask, lsr \rs2	)	@ dst|=(aff2>>rs2)
+ THUMB(	lsr	\mask, \mask, \rs2		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	.endm
+
+/*
  * Save CPU state for a suspend.  This saves the CPU general purpose
  * registers, and allocates space on the kernel stack to save the CPU
  * specific registers and some other data for resume.
@@ -29,12 +72,18 @@ ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
+	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
+	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
+        ALT_UP_B(1f)
+	ldr	r8, =mpidr_hash
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
+	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
 	add	r3, r3, lr, lsl #2
-#endif
+1:
 	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
@@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu)
 	.data
 	.align
 ENTRY(cpu_resume)
-#ifdef CONFIG_SMP
-	adr	r0, sleep_save_sp
-	ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
-	ALT_UP(mov r1, #0)
-	and	r1, r1, #15
-	ldr	r0, [r0, r1, lsl #2]	@ stack phys addr
-#else
-	ldr	r0, sleep_save_sp	@ stack phys addr
-#endif
+	mov	r1, #0
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	adr	r2, mpidr_hash_ptr
+	ldr	r3, [r2]
+	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
+	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
+1:
+	adr	r0, _sleep_save_sp
+	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
+	ldr	r0, [r0, r1, lsl #2]
+
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
 	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
@@ -98,7 +155,11 @@ THUMB(	mov	sp, r2			)
 THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
-sleep_save_sp:
-	.rept	CONFIG_NR_CPUS
-	.long	0				@ preserve stack phys ptr here
-	.endr
+	.align 2
+mpidr_hash_ptr:
+	.long	mpidr_hash - .			@ mpidr_hash struct offset
+
+	.type	sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+_sleep_save_sp:
+	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb..c5fb5469054 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -45,6 +45,7 @@
 #include <asm/smp_plat.h>
 #include <asm/virt.h>
 #include <asm/mach/arch.h>
+#include <asm/mpu.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -78,6 +79,13 @@ void __init smp_set_ops(struct smp_operations *ops)
 		smp_ops = *ops;
 };
 
+static unsigned long get_arch_pgd(pgd_t *pgd)
+{
+	phys_addr_t pgdir = virt_to_phys(pgd);
+	BUG_ON(pgdir & ARCH_PGD_MASK);
+	return pgdir >> ARCH_PGD_SHIFT;
+}
+
 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -87,8 +95,14 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+#ifdef CONFIG_ARM_MPU
+	secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+#endif
+
+#ifdef CONFIG_MMU
+	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
+#endif
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
@@ -112,9 +126,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 	}
 
-	secondary_data.stack = NULL;
-	secondary_data.pgdir = 0;
 
+	memset(&secondary_data, 0, sizeof(secondary_data));
 	return ret;
 }
 
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 9a52a07aa40..a98b62dca2f 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
 
 static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 {
-	int cpu, this_cpu;
+	int this_cpu;
 	cpumask_t mask = { CPU_BITS_NONE };
 
 	if (!erratum_a15_798181())
@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 
 	dummy_flush_tlb_a15_erratum();
 	this_cpu = get_cpu();
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We only need to send an IPI if the other CPUs are running
-		 * the same ASID as the one being invalidated. There is no
-		 * need for locking around the active_asids check since the
-		 * switch_mm() function has at least one dmb() (as required by
-		 * this workaround) in case a context switch happens on
-		 * another CPU after the condition below.
-		 */
-		if (atomic64_read(&mm->context.id) ==
-		    atomic64_read(&per_cpu(active_asids, cpu)))
-			cpumask_set_cpu(cpu, &mask);
-	}
+	a15_erratum_get_cpumask(this_cpu, mm, &mask);
 	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
 	put_cpu();
 }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index c59c97ea826..41cf3cbf756 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,15 +1,54 @@
 #include <linux/init.h>
+#include <linux/slab.h>
 
+#include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
+#include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
 extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
 extern void cpu_resume_mmu(void);
 
+#ifdef CONFIG_MMU
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!idmap_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+#else
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	return __cpu_suspend(arg, fn);
+}
+#define	idmap_pgd	NULL
+#endif
+
 /*
  * This is called by __cpu_suspend() to save the state, and do whatever
  * flushing is required to ensure that when the CPU goes to sleep we have
@@ -47,30 +86,19 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
 }
 
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	struct mm_struct *mm = current->active_mm;
-	int ret;
-
-	if (!idmap_pgd)
-		return -EINVAL;
+extern struct sleep_save_sp sleep_save_sp;
 
-	/*
-	 * Provide a temporary page table with an identity mapping for
-	 * the MMU-enable code, required for resuming.  On successful
-	 * resume (indicated by a zero return code), we need to switch
-	 * back to the correct page tables.
-	 */
-	ret = __cpu_suspend(arg, fn);
-	if (ret == 0) {
-		cpu_switch_mm(mm->pgd, mm);
-		local_flush_bp_all();
-		local_flush_tlb_all();
-	}
+static int cpu_suspend_alloc_sp(void)
+{
+	void *ctx_ptr;
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL);
 
-	return ret;
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sync_cache_w(&sleep_save_sp);
+	return 0;
 }
+early_initcall(cpu_suspend_alloc_sp);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 486e12a0f26..cab094c234e 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6a..ebf5015508b 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -41,9 +41,9 @@ config KVM_ARM_HOST
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
-	default 4 if KVM_ARM_HOST
-	default 0
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
 	help
 	  Static number of max supported virtual CPUs per VM.
 
@@ -67,6 +67,4 @@ config KVM_ARM_TIMER
 	---help---
 	  Adds support for the Architected Timers in virtual machines
 
-source drivers/virtio/Kconfig
-
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16..d99bee4950e 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I.
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
deleted file mode 100644
index c55b6089e92..00000000000
--- a/arch/arm/kvm/arch_timer.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/of_irq.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-
-#include <clocksource/arm_arch_timer.h>
-#include <asm/arch_timer.h>
-
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
-
-static struct timecounter *timecounter;
-static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
-	.level	= 1,
-};
-
-static cycle_t kvm_phys_timer_read(void)
-{
-	return timecounter->cc->read(timecounter->cc);
-}
-
-static bool timer_is_armed(struct arch_timer_cpu *timer)
-{
-	return timer->armed;
-}
-
-/* timer_arm: as in "arm the timer", not as in ARM the company */
-static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
-{
-	timer->armed = true;
-	hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
-		      HRTIMER_MODE_ABS);
-}
-
-static void timer_disarm(struct arch_timer_cpu *timer)
-{
-	if (timer_is_armed(timer)) {
-		hrtimer_cancel(&timer->timer);
-		cancel_work_sync(&timer->expired);
-		timer->armed = false;
-	}
-}
-
-static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    vcpu->arch.timer_cpu.irq->irq,
-			    vcpu->arch.timer_cpu.irq->level);
-}
-
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
-{
-	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-
-	/*
-	 * We disable the timer in the world switch and let it be
-	 * handled by kvm_timer_sync_hwstate(). Getting a timer
-	 * interrupt at this point is a sure sign of some major
-	 * breakage.
-	 */
-	pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
-	return IRQ_HANDLED;
-}
-
-static void kvm_timer_inject_irq_work(struct work_struct *work)
-{
-	struct kvm_vcpu *vcpu;
-
-	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
-	vcpu->arch.timer_cpu.armed = false;
-	kvm_timer_inject_irq(vcpu);
-}
-
-static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
-{
-	struct arch_timer_cpu *timer;
-	timer = container_of(hrt, struct arch_timer_cpu, timer);
-	queue_work(wqueue, &timer->expired);
-	return HRTIMER_NORESTART;
-}
-
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
- * @vcpu: The vcpu pointer
- *
- * Disarm any pending soft timers, since the world-switch code will write the
- * virtual timer state back to the physical CPU.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	/*
-	 * We're about to run this vcpu again, so there is no need to
-	 * keep the background timer running, as we're about to
-	 * populate the CPU timer again.
-	 */
-	timer_disarm(timer);
-}
-
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer was armed and either schedule a corresponding
- * soft timer or inject directly if already expired.
- */
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	cycle_t cval, now;
-	u64 ns;
-
-	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
-		return;
-
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	BUG_ON(timer_is_armed(timer));
-
-	if (cval <= now) {
-		/*
-		 * Timer has already expired while we were not
-		 * looking. Inject the interrupt and carry on.
-		 */
-		kvm_timer_inject_irq(vcpu);
-		return;
-	}
-
-	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
-	timer_arm(timer, ns);
-}
-
-void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
-	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	timer->timer.function = kvm_timer_expire;
-	timer->irq = &timer_irq;
-}
-
-static void kvm_timer_init_interrupt(void *info)
-{
-	enable_percpu_irq(timer_irq.irq, 0);
-}
-
-
-static int kvm_timer_cpu_notify(struct notifier_block *self,
-				unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		kvm_timer_init_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(timer_irq.irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block kvm_timer_cpu_nb = {
-	.notifier_call = kvm_timer_cpu_notify,
-};
-
-static const struct of_device_id arch_timer_of_match[] = {
-	{ .compatible	= "arm,armv7-timer",	},
-	{},
-};
-
-int kvm_timer_hyp_init(void)
-{
-	struct device_node *np;
-	unsigned int ppi;
-	int err;
-
-	timecounter = arch_timer_get_timecounter();
-	if (!timecounter)
-		return -ENODEV;
-
-	np = of_find_matching_node(NULL, arch_timer_of_match);
-	if (!np) {
-		kvm_err("kvm_arch_timer: can't find DT node\n");
-		return -ENODEV;
-	}
-
-	ppi = irq_of_parse_and_map(np, 2);
-	if (!ppi) {
-		kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = request_percpu_irq(ppi, kvm_arch_timer_handler,
-				 "kvm guest timer", kvm_get_running_vcpus());
-	if (err) {
-		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
-			ppi, err);
-		goto out;
-	}
-
-	timer_irq.irq = ppi;
-
-	err = register_cpu_notifier(&kvm_timer_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register timer CPU notifier\n");
-		goto out_free;
-	}
-
-	wqueue = create_singlethread_workqueue("kvm_arch_timer");
-	if (!wqueue) {
-		err = -ENOMEM;
-		goto out_free;
-	}
-
-	kvm_info("%s IRQ%d\n", np->name, ppi);
-	on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
-
-	goto out;
-out_free:
-	free_percpu_irq(ppi, kvm_get_running_vcpus());
-out:
-	of_node_put(np);
-	return err;
-}
-
-void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	timer_disarm(timer);
-}
-
-int kvm_timer_init(struct kvm *kvm)
-{
-	if (timecounter && wqueue) {
-		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
-		kvm->arch.timer.enabled = 1;
-	}
-
-	return 0;
-}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef1703b9587..741f66a2edb 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	unsigned long long boot_pgd_ptr;
-	unsigned long long pgd_ptr;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
-	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 8eea97be1ed..4a519907043 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -180,6 +180,10 @@ static const struct coproc_reg cp15_regs[] = {
 			NULL, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c6_IFAR },
+
+	/* PAR swapped by interrupt.S */
+	{ CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
+
 	/*
 	 * DC{C,I,CI}SW operations:
 	 */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47d..df4c82d47ad 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f7793df62f5..16cd4ba5d7f 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -49,6 +49,7 @@ __kvm_hyp_code_start:
 ENTRY(__kvm_tlb_flush_vmid_ipa)
 	push	{r2, r3}
 
+	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
 	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
@@ -291,6 +292,7 @@ THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	ldr	r2, =BSYM(panic)
 	msr	ELR_hyp, r2
 	ldr	r0, =\panic_str
+	clrex				@ Clear exclusive monitor
 	eret
 .endm
 
@@ -414,6 +416,10 @@ guest_trap:
 	mrcne	p15, 4, r2, c6, c0, 4	@ HPFAR
 	bne	3f
 
+	/* Preserve PAR */
+	mrrc	p15, 0, r0, r1, c7	@ PAR
+	push	{r0, r1}
+
 	/* Resolve IPA using the xFAR */
 	mcr	p15, 0, r2, c7, c8, 0	@ ATS1CPR
 	isb
@@ -424,13 +430,20 @@ guest_trap:
 	lsl	r2, r2, #4
 	orr	r2, r2, r1, lsl #24
 
+	/* Restore PAR */
+	pop	{r0, r1}
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+
 3:	load_vcpu			@ Load VCPU pointer to r0
 	str	r2, [r0, #VCPU_HPFAR]
 
 1:	mov	r1, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
-4:	pop	{r0, r1, r2}		@ Failed translation, return to guest
+4:	pop	{r0, r1}		@ Failed translation, return to guest
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+	clrex
+	pop	{r0, r1, r2}
 	eret
 
 /*
@@ -456,6 +469,7 @@ switch_to_guest_vfp:
 
 	pop	{r3-r7}
 	pop	{r0-r2}
+	clrex
 	eret
 #endif
 
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 3c8f2f0b4c5..6f18695a09c 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -302,11 +302,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	.endif
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mrrc	p15, 0, r4, r5, c7	@ PAR
 
 	.if \store_to_vcpu == 0
-	push	{r2}
+	push	{r2,r4-r5}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	strd	r4, r5, [r12]
 	.endif
 .endm
 
@@ -319,12 +322,15 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2}
+	pop	{r2,r4-r5}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	ldrd	r4, r5, [r12]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mcrr	p15, 0, r4, r5, c7	@ PAR
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}
@@ -497,6 +503,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b..b8e06b7a283 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
 	mmio->is_write = is_write;
 	mmio->phys_addr = fault_ipa;
 	mmio->len = len;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 84ba67b982c..ca6bea4859b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a366..86a693a02ba 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  * kvm_psci_call - handle PSCI call if r0 value is in range
  * @vcpu: Pointer to the VCPU struct
  *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
+ * Handle PSCI calls from guests through traps from HVC instructions.
  * The calling convention is similar to SMC calls to the secure world where
  * the function number is placed in r0 and this function returns true if the
  * function number specified in r0 is withing the PSCI range, and false
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554c..b7840e7aa45 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,6 +27,8 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_coproc.h>
 
+#include <kvm/arm_arch_timer.h>
+
 /******************************************************************************
  * Cortex-A15 Reset Values
  */
@@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static const struct kvm_irq_level a15_vtimer_irq = {
+	.irq = 27,
+	.level = 1,
+};
+
 
 /*******************************************************************************
  * Exported reset function
@@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = {
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_regs *cpu_reset;
+	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A15:
@@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			return -EINVAL;
 		cpu_reset = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+		cpu_vtimer_irq = &a15_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;
@@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);
 
+	/* Reset arch_timer context */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
deleted file mode 100644
index 17c5ac7d10e..00000000000
--- a/arch/arm/kvm/vgic.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-/*
- * How the whole thing works (courtesy of Christoffer Dall):
- *
- * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
- * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
- *   recalculated
- * - To calculate the oracle, we need info for each cpu from
- *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
- *     registers, stored on each vcpu. We only keep one bit of
- *     information per interrupt, making sure that only one vcpu can
- *     accept the interrupt.
- * - The same is true when injecting an interrupt, except that we only
- *   consider a single interrupt at a time. The irq_spi_cpu array
- *   contains the target CPU for each SPI.
- *
- * The handling of level interrupts adds some extra complexity. We
- * need to track when the interrupt has been EOIed, so we can sample
- * the 'line' again. This is achieved as such:
- *
- * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
- *   will be ignored for further interrupts. The interrupt is injected
- *   into the vcpu with the GICH_LR_EOI bit set (generate a
- *   maintenance interrupt on EOI).
- * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
- *   interrupt line to be sampled again.
- */
-
-#define VGIC_ADDR_UNDEF		(-1)
-#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
-
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
-#define ACCESS_READ_VALUE	(1 << 0)
-#define ACCESS_READ_RAZ		(0 << 0)
-#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
-#define ACCESS_WRITE_IGNORED	(0 << 1)
-#define ACCESS_WRITE_SETBIT	(1 << 1)
-#define ACCESS_WRITE_CLEARBIT	(2 << 1)
-#define ACCESS_WRITE_VALUE	(3 << 1)
-#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
-
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_update_state(struct kvm *kvm);
-static void vgic_kick_vcpus(struct kvm *kvm);
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
-static u32 vgic_nr_lr;
-
-static unsigned int vgic_maint_irq;
-
-static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
-				int cpuid, u32 offset)
-{
-	offset >>= 2;
-	if (!offset)
-		return x->percpu[cpuid].reg;
-	else
-		return x->shared.reg + offset - 1;
-}
-
-static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
-				   int cpuid, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
-
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
-}
-
-static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
-				    int irq, int val)
-{
-	unsigned long *reg;
-
-	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
-	} else {
-		reg =  x->shared.reg_ul;
-		irq -= VGIC_NR_PRIVATE_IRQS;
-	}
-
-	if (val)
-		set_bit(irq, reg);
-	else
-		clear_bit(irq, reg);
-}
-
-static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
-{
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
-}
-
-static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
-{
-	return x->shared.reg_ul;
-}
-
-static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
-{
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
-}
-
-#define VGIC_CFG_LEVEL	0
-#define VGIC_CFG_EDGE	1
-
-static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int irq_val;
-
-	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
-	return irq_val == VGIC_CFG_EDGE;
-}
-
-static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
-}
-
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
-}
-
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
-}
-
-static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
-}
-
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
-}
-
-static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			  vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
-{
-	return *((u32 *)mmio->data) & mask;
-}
-
-static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
-{
-	*((u32 *)mmio->data) = value & mask;
-}
-
-/**
- * vgic_reg_access - access vgic register
- * @mmio:   pointer to the data describing the mmio access
- * @reg:    pointer to the virtual backing of vgic distributor data
- * @offset: least significant 2 bits used for word offset
- * @mode:   ACCESS_ mode (see defines above)
- *
- * Helper to make vgic register access easier using one of the access
- * modes defined for vgic register access
- * (read,raz,write-ignored,setbit,clearbit,write)
- */
-static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
-			    phys_addr_t offset, int mode)
-{
-	int word_offset = (offset & 3) * 8;
-	u32 mask = (1UL << (mmio->len * 8)) - 1;
-	u32 regval;
-
-	/*
-	 * Any alignment fault should have been delivered to the guest
-	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
-	 */
-
-	if (reg) {
-		regval = *reg;
-	} else {
-		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
-		regval = 0;
-	}
-
-	if (mmio->is_write) {
-		u32 data = mmio_data_read(mmio, mask) << word_offset;
-		switch (ACCESS_WRITE_MASK(mode)) {
-		case ACCESS_WRITE_IGNORED:
-			return;
-
-		case ACCESS_WRITE_SETBIT:
-			regval |= data;
-			break;
-
-		case ACCESS_WRITE_CLEARBIT:
-			regval &= ~data;
-			break;
-
-		case ACCESS_WRITE_VALUE:
-			regval = (regval & ~(mask << word_offset)) | data;
-			break;
-		}
-		*reg = regval;
-	} else {
-		switch (ACCESS_READ_MASK(mode)) {
-		case ACCESS_READ_RAZ:
-			regval = 0;
-			/* fall through */
-
-		case ACCESS_READ_VALUE:
-			mmio_data_write(mmio, mask, regval >> word_offset);
-		}
-	}
-}
-
-static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
-			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	u32 word_offset = offset & 3;
-
-	switch (offset & ~3) {
-	case 0:			/* CTLR */
-		reg = vcpu->kvm->arch.vgic.enabled;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-		if (mmio->is_write) {
-			vcpu->kvm->arch.vgic.enabled = reg & 1;
-			vgic_update_state(vcpu->kvm);
-			return true;
-		}
-		break;
-
-	case 4:			/* TYPER */
-		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-
-	case 8:			/* IIDR */
-		reg = 0x4B00043B;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
-			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	vgic_reg_access(mmio, NULL, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-	return false;
-}
-
-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
-					 struct kvm_exit_mmio *mmio,
-					 phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		if (offset < 4) /* Force SGI enabled */
-			*reg |= 0xffff;
-		vgic_retire_disabled_irqs(vcpu);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
-					struct kvm_exit_mmio *mmio,
-					phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
-					  struct kvm_exit_mmio *mmio,
-					  phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
-				     struct kvm_exit_mmio *mmio,
-				     phys_addr_t offset)
-{
-	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
-					vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	return false;
-}
-
-#define GICD_ITARGETSR_SIZE	32
-#define GICD_CPUTARGETS_BITS	8
-#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
-static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 val = 0;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
-
-	return val;
-}
-
-static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 target;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	/*
-	 * Pick the LSB in each byte. This ensures we target exactly
-	 * one vcpu per IRQ. If the byte is null, assume we target
-	 * CPU0.
-	 */
-	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
-		int shift = i * GICD_CPUTARGETS_BITS;
-		target = ffs((val >> shift) & 0xffU);
-		target = target ? (target - 1) : 0;
-		dist->irq_spi_cpu[irq + i] = target;
-		kvm_for_each_vcpu(c, vcpu, kvm) {
-			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-			if (c == target)
-				set_bit(irq + i, bmap);
-			else
-				clear_bit(irq + i, bmap);
-		}
-	}
-}
-
-static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
-				   struct kvm_exit_mmio *mmio,
-				   phys_addr_t offset)
-{
-	u32 reg;
-
-	/* We treat the banked interrupts targets as read-only */
-	if (offset < 32) {
-		u32 roreg = 1 << vcpu->vcpu_id;
-		roreg |= roreg << 8;
-		roreg |= roreg << 16;
-
-		vgic_reg_access(mmio, &roreg, offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		return false;
-	}
-
-	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static u32 vgic_cfg_expand(u16 val)
-{
-	u32 res = 0;
-	int i;
-
-	/*
-	 * Turn a 16bit value like abcd...mnop into a 32bit word
-	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
-
-	return res;
-}
-
-static u16 vgic_cfg_compress(u32 val)
-{
-	u16 res = 0;
-	int i;
-
-	/*
-	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
-	 * abcd...mnop which is what we really care about.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
-
-	return res;
-}
-
-/*
- * The distributor uses 2 bits per IRQ for the CFG register, but the
- * LSB is always 0. As such, we only keep the upper bit, and use the
- * two above functions to compress/expand the bits
- */
-static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
-	if (offset & 2)
-		val = *reg >> 16;
-	else
-		val = *reg & 0xffff;
-
-	val = vgic_cfg_expand(val);
-	vgic_reg_access(mmio, &val, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		if (offset < 4) {
-			*reg = ~0U; /* Force PPIs/SGIs to 1 */
-			return false;
-		}
-
-		val = vgic_cfg_compress(val);
-		if (offset & 2) {
-			*reg &= 0xffff;
-			*reg |= val << 16;
-		} else {
-			*reg &= 0xffff << 16;
-			*reg |= val;
-		}
-	}
-
-	return false;
-}
-
-static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_dispatch_sgi(vcpu, reg);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * I would have liked to use the kvm_bus_io_*() API instead, but it
- * cannot cope with banked registers (only the VM pointer is passed
- * around, and we need the vcpu). One of these days, someone please
- * fix it!
- */
-struct mmio_range {
-	phys_addr_t base;
-	unsigned long len;
-	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
-			    phys_addr_t offset);
-};
-
-static const struct mmio_range vgic_ranges[] = {
-	{
-		.base		= GIC_DIST_CTRL,
-		.len		= 12,
-		.handle_mmio	= handle_mmio_misc,
-	},
-	{
-		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_priority_reg,
-	},
-	{
-		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_target_reg,
-	},
-	{
-		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
-		.handle_mmio	= handle_mmio_cfg_reg,
-	},
-	{
-		.base		= GIC_DIST_SOFTINT,
-		.len		= 4,
-		.handle_mmio	= handle_mmio_sgi_reg,
-	},
-	{}
-};
-
-static const
-struct mmio_range *find_matching_range(const struct mmio_range *ranges,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t base)
-{
-	const struct mmio_range *r = ranges;
-	phys_addr_t addr = mmio->phys_addr - base;
-
-	while (r->len) {
-		if (addr >= r->base &&
-		    (addr + mmio->len) <= (r->base + r->len))
-			return r;
-		r++;
-	}
-
-	return NULL;
-}
-
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access
- * @vcpu:	pointer to the vcpu performing the access
- * @run:	pointer to the kvm_run structure
- * @mmio:	pointer to the data describing the access
- *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio)
-{
-	const struct mmio_range *range;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long base = dist->vgic_dist_base;
-	bool updated_state;
-	unsigned long offset;
-
-	if (!irqchip_in_kernel(vcpu->kvm) ||
-	    mmio->phys_addr < base ||
-	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
-		return false;
-
-	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
-	if (mmio->len > 4) {
-		kvm_inject_dabt(vcpu, mmio->phys_addr);
-		return true;
-	}
-
-	range = find_matching_range(vgic_ranges, mmio, base);
-	if (unlikely(!range || !range->handle_mmio)) {
-		pr_warn("Unhandled access %d %08llx %d\n",
-			mmio->is_write, mmio->phys_addr, mmio->len);
-		return false;
-	}
-
-	spin_lock(&vcpu->kvm->arch.vgic.lock);
-	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
-	spin_unlock(&vcpu->kvm->arch.vgic.lock);
-	kvm_prepare_mmio(run, mmio);
-	kvm_handle_mmio_return(vcpu, run);
-
-	if (updated_state)
-		vgic_kick_vcpus(vcpu->kvm);
-
-	return true;
-}
-
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	int nrcpus = atomic_read(&kvm->online_vcpus);
-	u8 target_cpus;
-	int sgi, mode, c, vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	sgi = reg & 0xf;
-	target_cpus = (reg >> 16) & 0xff;
-	mode = (reg >> 24) & 3;
-
-	switch (mode) {
-	case 0:
-		if (!target_cpus)
-			return;
-
-	case 1:
-		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
-		break;
-
-	case 2:
-		target_cpus = 1 << vcpu_id;
-		break;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (target_cpus & 1) {
-			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
-			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
-		}
-
-		target_cpus >>= 1;
-	}
-}
-
-static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
-	unsigned long pending_private, pending_shared;
-	int vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
-	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
-
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
-	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
-	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
-
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
-	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
-	bitmap_and(pend_shared, pend_shared,
-		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
-
-	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
-	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
-}
-
-/*
- * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
- */
-static void vgic_update_state(struct kvm *kvm)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
-		return;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (compute_pending_for_cpu(vcpu)) {
-			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
-		}
-	}
-}
-
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define MK_LR_PEND(src, irq)	\
-	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
-
-/*
- * An interrupt may have been disabled after being made pending on the
- * CPU interface (the classic case is a timer running while we're
- * rebooting the guest - the interrupt would kick as soon as the CPU
- * interface gets enabled, with deadly consequences).
- *
- * The solution is to examine already active LRs, and check the
- * interrupt is still enabled. If not, just retire it.
- */
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-			clear_bit(lr, vgic_cpu->lr_used);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
-			if (vgic_irq_is_active(vcpu, irq))
-				vgic_irq_clear_active(vcpu, irq);
-		}
-	}
-}
-
-/*
- * Queue an interrupt to a CPU virtual interface. Return true on success,
- * or false if it wasn't possible to queue it.
- */
-static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	/* Sanitize the input... */
-	BUG_ON(sgi_source_id & ~7);
-	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
-
-	kvm_debug("Queue IRQ%d\n", irq);
-
-	lr = vgic_cpu->vgic_irq_lr_map[irq];
-
-	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
-		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_lr[lr]);
-		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-		return true;
-	}
-
-	/* Try to use another LR for this interrupt */
-	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic_cpu->nr_lr);
-	if (lr >= vgic_cpu->nr_lr)
-		return false;
-
-	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
-	vgic_cpu->vgic_irq_lr_map[irq] = lr;
-	set_bit(lr, vgic_cpu->lr_used);
-
-	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
-
-	return true;
-}
-
-static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long sources;
-	int vcpu_id = vcpu->vcpu_id;
-	int c;
-
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
-
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
-		if (vgic_queue_irq(vcpu, c, irq))
-			clear_bit(c, &sources);
-	}
-
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
-
-	/*
-	 * If the sources bitmap has been cleared it means that we
-	 * could queue all the SGIs onto link registers (see the
-	 * clear_bit above), and therefore we are done with them in
-	 * our emulated gic and can get rid of them.
-	 */
-	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
-		vgic_cpu_irq_clear(vcpu, irq);
-		return true;
-	}
-
-	return false;
-}
-
-static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
-{
-	if (vgic_irq_is_active(vcpu, irq))
-		return true; /* level interrupt, already queued */
-
-	if (vgic_queue_irq(vcpu, 0, irq)) {
-		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
-			vgic_cpu_irq_clear(vcpu, irq);
-		} else {
-			vgic_irq_set_active(vcpu, irq);
-		}
-
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * Fill the list registers with pending interrupts before running the
- * guest.
- */
-static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i, vcpu_id;
-	int overflow = 0;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	/*
-	 * We may not have any pending interrupt, or the interrupts
-	 * may have been serviced from another vcpu. In all cases,
-	 * move along.
-	 */
-	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
-		goto epilog;
-	}
-
-	/* SGIs */
-	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
-		if (!vgic_queue_sgi(vcpu, i))
-			overflow = 1;
-	}
-
-	/* PPIs */
-	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i))
-			overflow = 1;
-	}
-
-	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
-			overflow = 1;
-	}
-
-epilog:
-	if (overflow) {
-		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
-	} else {
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-		/*
-		 * We're about to run this VCPU, and we've consumed
-		 * everything the distributor had in store for
-		 * us. Claim we don't have anything pending. We'll
-		 * adjust that if needed while exiting.
-		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
-	}
-}
-
-static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	bool level_pending = false;
-
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
-		/*
-		 * Some level interrupts have been EOIed. Clear their
-		 * active bit.
-		 */
-		int lr, irq;
-
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
-				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
-
-			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
-				level_pending = true;
-			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
-			}
-
-			/*
-			 * Despite being EOIed, the LR may not have
-			 * been marked as empty.
-			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
-		}
-	}
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_U)
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-
-	return level_pending;
-}
-
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
-static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int lr, pending;
-	bool level_pending;
-
-	level_pending = vgic_process_maintenance(vcpu);
-
-	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
-			 vgic_cpu->nr_lr) {
-		int irq;
-
-		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
-			continue;
-
-		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		BUG_ON(irq >= VGIC_NR_IRQS);
-		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-	}
-
-	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
-				      vgic_cpu->nr_lr);
-	if (level_pending || pending < vgic_cpu->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_flush_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_sync_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-static void vgic_kick_vcpus(struct kvm *kvm)
-{
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	/*
-	 * We've injected an interrupt, time to find out who deserves
-	 * a good kick...
-	 */
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (kvm_vgic_vcpu_pending_irq(vcpu))
-			kvm_vcpu_kick(vcpu);
-	}
-}
-
-static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
-{
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
-
-	/*
-	 * Only inject an interrupt if:
-	 * - edge triggered and we have a rising edge
-	 * - level triggered and we change level
-	 */
-	if (is_edge)
-		return level > state;
-	else
-		return level != state;
-}
-
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
-				  unsigned int irq_num, bool level)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
-	int enabled;
-	bool ret = true;
-
-	spin_lock(&dist->lock);
-
-	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
-
-	if (!vgic_validate_injection(vcpu, irq_num, level)) {
-		ret = false;
-		goto out;
-	}
-
-	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
-		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
-		vcpu = kvm_get_vcpu(kvm, cpuid);
-	}
-
-	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
-
-	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear(vcpu, irq_num);
-
-	enabled = vgic_irq_is_enabled(vcpu, irq_num);
-
-	if (!enabled) {
-		ret = false;
-		goto out;
-	}
-
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
-		/*
-		 * Level interrupt in progress, will be picked up
-		 * when EOId.
-		 */
-		ret = false;
-		goto out;
-	}
-
-	if (level) {
-		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
-	}
-
-out:
-	spin_unlock(&dist->lock);
-
-	return ret;
-}
-
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm:     The VM structure pointer
- * @cpuid:   The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
- * @level:   Edge-triggered:  true:  to trigger the interrupt
- *			      false: to ignore the call
- *	     Level-sensitive  true:  activates an interrupt
- *			      false: deactivates an interrupt
- *
- * The GIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts.  You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level)
-{
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
-
-	return 0;
-}
-
-static irqreturn_t vgic_maintenance_handler(int irq, void *data)
-{
-	/*
-	 * We cannot rely on the vgic maintenance interrupt to be
-	 * delivered synchronously. This means we can only use it to
-	 * exit the VM, and we perform the handling of EOIed
-	 * interrupts on the exit path (see vgic_process_maintenance).
-	 */
-	return IRQ_HANDLED;
-}
-
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
-		return -EBUSY;
-
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
-
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vgic_cpu->vgic_vmcr = 0;
-
-	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
-
-	return 0;
-}
-
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic_maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic_maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
-
-	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
-	if (!vgic_node) {
-		kvm_err("error: no compatible vgic node in DT\n");
-		return -ENODEV;
-	}
-
-	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic_maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
-		goto out;
-	}
-
-	ret = register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain VCTRL resource\n");
-		goto out_free_irq;
-	}
-
-	vgic_vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic_vctrl_base) {
-		kvm_err("Cannot ioremap VCTRL\n");
-		ret = -ENOMEM;
-		goto out_free_irq;
-	}
-
-	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-
-	ret = create_hyp_io_mappings(vgic_vctrl_base,
-				     vgic_vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out_unmap;
-	}
-
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic_maint_irq);
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-	vgic_vcpu_base = vcpu_res.start;
-
-	goto out;
-
-out_unmap:
-	iounmap(vgic_vctrl_base);
-out_free_irq:
-	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
-out:
-	of_node_put(vgic_node);
-	return ret;
-}
-
-int kvm_vgic_init(struct kvm *kvm)
-{
-	int ret = 0, i;
-
-	mutex_lock(&kvm->lock);
-
-	if (vgic_initialized(kvm))
-		goto out;
-
-	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
-		kvm_err("Need to set vgic cpu and dist addresses first\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
-	if (ret) {
-		kvm_err("Unable to remap VGIC CPU to VCPU\n");
-		goto out;
-	}
-
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
-
-	kvm_timer_init(kvm);
-	kvm->arch.vgic.ready = true;
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-int kvm_vgic_create(struct kvm *kvm)
-{
-	int ret = 0;
-
-	mutex_lock(&kvm->lock);
-
-	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
-		ret = -EEXIST;
-		goto out;
-	}
-
-	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
-	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
-	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
-
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
-{
-	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
-	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
-
-	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
-		return 0;
-	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
-	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
-		return -EBUSY;
-	return 0;
-}
-
-static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
-			      phys_addr_t addr, phys_addr_t size)
-{
-	int ret;
-
-	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
-		return -EEXIST;
-	if (addr + size < addr)
-		return -EINVAL;
-
-	ret = vgic_ioaddr_overlap(kvm);
-	if (ret)
-		return ret;
-	*ioaddr = addr;
-	return ret;
-}
-
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	int r = 0;
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-
-	if (addr & ~KVM_PHYS_MASK)
-		return -E2BIG;
-
-	if (addr & (SZ_4K - 1))
-		return -EINVAL;
-
-	mutex_lock(&kvm->lock);
-	switch (type) {
-	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-				       addr, KVM_VGIC_V2_DIST_SIZE);
-		break;
-	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-				       addr, KVM_VGIC_V2_CPU_SIZE);
-		break;
-	default:
-		r = -ENODEV;
-	}
-
-	mutex_unlock(&kvm->lock);
-	return r;
-}
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index a075b3e0c5c..e026b19b23e 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -40,6 +40,7 @@ config ARCH_DAVINCI_DA850
 	bool "DA850/OMAP-L138/AM18x based system"
 	select ARCH_DAVINCI_DA8XX
 	select ARCH_HAS_CPUFREQ
+	select CPU_FREQ_TABLE
 	select CP_INTC
 
 config ARCH_DAVINCI_DA8XX
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 4d6933848ab..a0d4f6038b6 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -1004,7 +1004,7 @@ static const struct da850_opp da850_opp_96 = {
 
 #define OPP(freq) 		\
 	{				\
-		.index = (unsigned int) &da850_opp_##freq,	\
+		.driver_data = (unsigned int) &da850_opp_##freq,	\
 		.frequency = freq * 1000, \
 	}
 
@@ -1016,7 +1016,7 @@ static struct cpufreq_frequency_table da850_freq_table[] = {
 	OPP(200),
 	OPP(96),
 	{
-		.index		= 0,
+		.driver_data		= 0,
 		.frequency	= CPUFREQ_TABLE_END,
 	},
 };
@@ -1044,7 +1044,7 @@ static int da850_set_voltage(unsigned int index)
 	if (!cvdd)
 		return -ENODEV;
 
-	opp = (struct da850_opp *) cpufreq_info.freq_table[index].index;
+	opp = (struct da850_opp *) cpufreq_info.freq_table[index].driver_data;
 
 	return regulator_set_voltage(cvdd, opp->cvdd_min, opp->cvdd_max);
 }
@@ -1125,7 +1125,7 @@ static int da850_set_pll0rate(struct clk *clk, unsigned long index)
 	struct pll_data *pll = clk->pll_data;
 	int ret;
 
-	opp = (struct da850_opp *) cpufreq_info.freq_table[index].index;
+	opp = (struct da850_opp *) cpufreq_info.freq_table[index].driver_data;
 	prediv = opp->prediv;
 	mult = opp->mult;
 	postdiv = opp->postdiv;
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index b13cc74114d..8a53f346cdb 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -116,7 +116,7 @@ static void __init ebsa110_map_io(void)
 	iotable_init(ebsa110_io_desc, ARRAY_SIZE(ebsa110_io_desc));
 }
 
-static void __iomem *ebsa110_ioremap_caller(unsigned long cookie, size_t size,
+static void __iomem *ebsa110_ioremap_caller(phys_addr_t cookie, size_t size,
 					    unsigned int flags, void *caller)
 {
 	return (void __iomem *)cookie;
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index ba70a846d1c..855d4a7b462 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -93,7 +93,7 @@ config SOC_EXYNOS5440
 	default y
 	depends on ARCH_EXYNOS5
 	select ARCH_HAS_OPP
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR
 	select MIGHT_HAVE_PCI
 	select PCI_DOMAINS if PCI
diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c
index dae4cd7be04..6f424eced18 100644
--- a/arch/arm/mach-imx/mach-mx31moboard.c
+++ b/arch/arm/mach-imx/mach-mx31moboard.c
@@ -268,10 +268,11 @@ static struct mc13xxx_led_platform_data moboard_led[] = {
 static struct mc13xxx_leds_platform_data moboard_leds = {
 	.num_leds = ARRAY_SIZE(moboard_led),
 	.led = moboard_led,
-	.flags = MC13783_LED_SLEWLIMTC,
-	.abmode = MC13783_LED_AB_DISABLED,
-	.tc1_period = MC13783_LED_PERIOD_10MS,
-	.tc2_period = MC13783_LED_PERIOD_10MS,
+	.led_control[0]	= MC13783_LED_C0_ENABLE | MC13783_LED_C0_ABMODE(0),
+	.led_control[1]	= MC13783_LED_C1_SLEWLIM,
+	.led_control[2]	= MC13783_LED_C2_SLEWLIM,
+	.led_control[3]	= MC13783_LED_C3_PERIOD(0),
+	.led_control[4]	= MC13783_LED_C3_PERIOD(0),
 };
 
 static struct mc13xxx_buttons_platform_data moboard_buttons = {
diff --git a/arch/arm/mach-imx/mm-imx3.c b/arch/arm/mach-imx/mm-imx3.c
index 8f0f60697f5..0884ca90d15 100644
--- a/arch/arm/mach-imx/mm-imx3.c
+++ b/arch/arm/mach-imx/mm-imx3.c
@@ -65,7 +65,7 @@ static void imx3_idle(void)
 		: "=r" (reg));
 }
 
-static void __iomem *imx3_ioremap_caller(unsigned long phys_addr, size_t size,
+static void __iomem *imx3_ioremap_caller(phys_addr_t phys_addr, size_t size,
 					 unsigned int mtype, void *caller)
 {
 	if (mtype == MT_DEVICE) {
diff --git a/arch/arm/mach-iop13xx/io.c b/arch/arm/mach-iop13xx/io.c
index 183dc8b5511..faaf7d4482c 100644
--- a/arch/arm/mach-iop13xx/io.c
+++ b/arch/arm/mach-iop13xx/io.c
@@ -23,7 +23,7 @@
 
 #include "pci.h"
 
-static void __iomem *__iop13xx_ioremap_caller(unsigned long cookie,
+static void __iomem *__iop13xx_ioremap_caller(phys_addr_t cookie,
 	size_t size, unsigned int mtype, void *caller)
 {
 	void __iomem * retval;
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 3181f61ea63..1c5bd7637b0 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -469,7 +469,6 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_1:
@@ -479,7 +478,6 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_2:
@@ -489,7 +487,6 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 6600cff6bd9..d7223b3b81f 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -559,7 +559,7 @@ void ixp4xx_restart(char mode, const char *cmd)
  * fallback to the default.
  */
 
-static void __iomem *ixp4xx_ioremap_caller(unsigned long addr, size_t size,
+static void __iomem *ixp4xx_ioremap_caller(phys_addr_t addr, size_t size,
 					   unsigned int mtype, void *caller)
 {
 	if (!is_pci_memory(addr))
diff --git a/arch/arm/mach-msm/common.h b/arch/arm/mach-msm/common.h
index ce8215a269e..421cf7751a8 100644
--- a/arch/arm/mach-msm/common.h
+++ b/arch/arm/mach-msm/common.h
@@ -23,7 +23,7 @@ extern void msm_map_msm8x60_io(void);
 extern void msm_map_msm8960_io(void);
 extern void msm_map_qsd8x50_io(void);
 
-extern void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size,
+extern void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 					  unsigned int mtype, void *caller);
 
 extern struct smp_operations msm_smp_ops;
diff --git a/arch/arm/mach-msm/io.c b/arch/arm/mach-msm/io.c
index efa113e4de8..3dc04ccaf59 100644
--- a/arch/arm/mach-msm/io.c
+++ b/arch/arm/mach-msm/io.c
@@ -168,7 +168,7 @@ void __init msm_map_msm7x30_io(void)
 }
 #endif /* CONFIG_ARCH_MSM7X30 */
 
-void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size,
+void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 				   unsigned int mtype, void *caller)
 {
 	if (mtype == MT_DEVICE) {
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 58152b15eca..627fa7e41fb 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -62,6 +62,7 @@ config SOC_OMAP5
 	select HAVE_SMP
 	select COMMON_CLK
 	select HAVE_ARM_ARCH_TIMER
+	select ARM_ERRATA_798181
 
 config SOC_AM33XX
 	bool "AM33XX support"
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index ea5a27ff994..d4f671547c3 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -95,10 +95,6 @@ obj-$(CONFIG_POWER_AVS_OMAP_CLASS3)    += smartreflex-class3.o
 AFLAGS_sleep24xx.o			:=-Wa,-march=armv6
 AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a$(plus_sec)
 
-ifeq ($(CONFIG_PM_VERBOSE),y)
-CFLAGS_pm_bus.o				+= -DDEBUG
-endif
-
 endif
 
 ifeq ($(CONFIG_CPU_IDLE),y)
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index b54562d1235..87e65dde8e1 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -553,6 +553,37 @@ static struct usbhs_omap_platform_data igep3_usbhs_bdata __initdata = {
 
 #ifdef CONFIG_OMAP_MUX
 static struct omap_board_mux board_mux[] __initdata = {
+	/* Display Sub System */
+	OMAP3_MUX(DSS_PCLK, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_HSYNC, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_VSYNC, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_ACBIAS, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA0, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA1, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA2, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA3, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA4, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA5, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA6, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA7, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA8, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA9, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA10, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA11, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA12, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA13, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA14, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA15, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA16, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA17, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA18, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA19, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA20, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA21, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA22, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	OMAP3_MUX(DSS_DATA23, OMAP_MUX_MODE0 | OMAP_PIN_OUTPUT),
+	/* TFP410 PanelBus DVI Transmitte (GPIO_170) */
+	OMAP3_MUX(HDQ_SIO, OMAP_MUX_MODE4 | OMAP_PIN_OUTPUT),
 	/* SMSC9221 LAN Controller ETH IRQ (GPIO_176) */
 	OMAP3_MUX(MCSPI1_CS2, OMAP_MUX_MODE4 | OMAP_PIN_INPUT),
 	{ .reg_offset = OMAP_MUX_TERMINATOR },
diff --git a/arch/arm/mach-omap2/board-rx51-video.c b/arch/arm/mach-omap2/board-rx51-video.c
index bd74f9f6063..bdd1e3a179e 100644
--- a/arch/arm/mach-omap2/board-rx51-video.c
+++ b/arch/arm/mach-omap2/board-rx51-video.c
@@ -61,7 +61,7 @@ static struct omap_dss_board_info rx51_dss_board_info = {
 
 static int __init rx51_video_init(void)
 {
-	if (!machine_is_nokia_rx51())
+	if (!machine_is_nokia_rx51() && !of_machine_is_compatible("nokia,omap3-n900"))
 		return 0;
 
 	if (omap_mux_init_gpio(RX51_LCD_RESET_GPIO, OMAP_PIN_OUTPUT)) {
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index aef96e45cb2..3c1279f27d1 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -15,7 +15,6 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/pinctrl/machine.h>
@@ -66,7 +65,7 @@ static int __init omap3_l3_init(void)
 
 	WARN(IS_ERR(pdev), "could not build omap_device for %s\n", oh_name);
 
-	return IS_ERR(pdev) ? PTR_ERR(pdev) : 0;
+	return PTR_RET(pdev);
 }
 omap_postcore_initcall(omap3_l3_init);
 
@@ -100,7 +99,7 @@ static int __init omap4_l3_init(void)
 
 	WARN(IS_ERR(pdev), "could not build omap_device for %s\n", oh_name);
 
-	return IS_ERR(pdev) ? PTR_ERR(pdev) : 0;
+	return PTR_RET(pdev);
 }
 omap_postcore_initcall(omap4_l3_init);
 
diff --git a/arch/arm/mach-omap2/fb.c b/arch/arm/mach-omap2/fb.c
index 190ae493c6e..2ca33cc0c48 100644
--- a/arch/arm/mach-omap2/fb.c
+++ b/arch/arm/mach-omap2/fb.c
@@ -83,10 +83,7 @@ static int __init omap_init_vrfb(void)
 	pdev = platform_device_register_resndata(NULL, "omapvrfb", -1,
 			res, num_res, NULL, 0);
 
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-	else
-		return 0;
+	return PTR_RET(pdev);
 }
 
 omap_arch_initcall(omap_init_vrfb);
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 1c7969e965d..f3fdd6afa21 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1734,7 +1734,7 @@ static int __init omap_gpmc_init(void)
 	pdev = omap_device_build(DEVICE_NAME, -1, oh, NULL, 0);
 	WARN(IS_ERR(pdev), "could not build omap_device for %s\n", oh_name);
 
-	return IS_ERR(pdev) ? PTR_ERR(pdev) : 0;
+	return PTR_RET(pdev);
 }
 omap_postcore_initcall(omap_gpmc_init);
 
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 68be532f868..5cc92874be7 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -588,11 +588,6 @@ static int _od_runtime_suspend(struct device *dev)
 	return ret;
 }
 
-static int _od_runtime_idle(struct device *dev)
-{
-	return pm_generic_runtime_idle(dev);
-}
-
 static int _od_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -648,7 +643,7 @@ static int _od_resume_noirq(struct device *dev)
 struct dev_pm_domain omap_device_pm_domain = {
 	.ops = {
 		SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
-				   _od_runtime_idle)
+				   NULL)
 		USE_PLATFORM_PM_SLEEP_OPS
 		.suspend_noirq = _od_suspend_noirq,
 		.resume_noirq = _od_resume_noirq,
diff --git a/arch/arm/mach-omap2/pmu.c b/arch/arm/mach-omap2/pmu.c
index 9ace8eae7ee..33c8846b419 100644
--- a/arch/arm/mach-omap2/pmu.c
+++ b/arch/arm/mach-omap2/pmu.c
@@ -54,10 +54,7 @@ static int __init omap2_init_pmu(unsigned oh_num, char *oh_names[])
 	WARN(IS_ERR(omap_pmu_dev), "Can't build omap_device for %s.\n",
 	     dev_name);
 
-	if (IS_ERR(omap_pmu_dev))
-		return PTR_ERR(omap_pmu_dev);
-
-	return 0;
+	return PTR_RET(omap_pmu_dev);
 }
 
 static int __init omap_init_pmu(void)
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
index 88ff83a0942..9086ce03ae1 100644
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ b/arch/arm/mach-omap2/sleep44xx.S
@@ -34,6 +34,8 @@ ppa_zero_params:
 ppa_por_params:
 	.word		1, 0
 
+#ifdef CONFIG_ARCH_OMAP4
+
 /*
  * =============================
  * == CPU suspend finisher ==
@@ -326,7 +328,9 @@ skip_l2en:
 
 	b	cpu_resume			@ Jump to generic resume
 ENDPROC(omap4_cpu_resume)
-#endif
+#endif	/* CONFIG_ARCH_OMAP4 */
+
+#endif	/* defined(CONFIG_SMP) && defined(CONFIG_PM) */
 
 #ifndef CONFIG_OMAP4_ERRATA_I688
 ENTRY(omap_bus_sync)
diff --git a/arch/arm/mach-omap2/smartreflex-class3.c b/arch/arm/mach-omap2/smartreflex-class3.c
index aee3c8940a3..7a42e1960c3 100644
--- a/arch/arm/mach-omap2/smartreflex-class3.c
+++ b/arch/arm/mach-omap2/smartreflex-class3.c
@@ -26,14 +26,14 @@ static int sr_class3_enable(struct omap_sr *sr)
 	}
 
 	omap_vp_enable(sr->voltdm);
-	return sr_enable(sr->voltdm, volt);
+	return sr_enable(sr, volt);
 }
 
 static int sr_class3_disable(struct omap_sr *sr, int is_volt_reset)
 {
-	sr_disable_errgen(sr->voltdm);
+	sr_disable_errgen(sr);
 	omap_vp_disable(sr->voltdm);
-	sr_disable(sr->voltdm);
+	sr_disable(sr);
 	if (is_volt_reset)
 		voltdm_reset(sr->voltdm);
 
@@ -42,7 +42,7 @@ static int sr_class3_disable(struct omap_sr *sr, int is_volt_reset)
 
 static int sr_class3_configure(struct omap_sr *sr)
 {
-	return sr_configure_errgen(sr->voltdm);
+	return sr_configure_errgen(sr);
 }
 
 /* SR class3 structure */
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 3bdb0fb0202..5f148e72179 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -220,7 +220,7 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
 					 int posted)
 {
 	char name[10]; /* 10 = sizeof("gptXX_Xck0") */
-	const char *oh_name;
+	const char *oh_name = NULL;
 	struct device_node *np;
 	struct omap_hwmod *oh;
 	struct resource irq, mem;
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 96100dbf5a2..a8427115ee0 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -615,12 +615,14 @@ endmenu
 config PXA25x
 	bool
 	select CPU_XSCALE
+	select CPU_FREQ_TABLE if CPU_FREQ
 	help
 	  Select code specific to PXA21x/25x/26x variants
 
 config PXA27x
 	bool
 	select CPU_XSCALE
+	select CPU_FREQ_TABLE if CPU_FREQ
 	help
 	  Select code specific to PXA27x variants
 
@@ -633,6 +635,7 @@ config CPU_PXA26x
 config PXA3xx
 	bool
 	select CPU_XSC3
+	select CPU_FREQ_TABLE if CPU_FREQ
 	help
 	  Select code specific to PXA3xx variants
 
diff --git a/arch/arm/mach-s3c24xx/cpufreq-utils.c b/arch/arm/mach-s3c24xx/cpufreq-utils.c
index ddd8280e387..2a0aa5684e7 100644
--- a/arch/arm/mach-s3c24xx/cpufreq-utils.c
+++ b/arch/arm/mach-s3c24xx/cpufreq-utils.c
@@ -60,5 +60,5 @@ void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg)
  */
 void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg)
 {
-	__raw_writel(cfg->pll.index, S3C2410_MPLLCON);
+	__raw_writel(cfg->pll.driver_data, S3C2410_MPLLCON);
 }
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2410.c b/arch/arm/mach-s3c24xx/pll-s3c2410.c
index dcf3420a327..5e37d368594 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2410.c
@@ -33,36 +33,36 @@
 #include <plat/cpu-freq-core.h>
 
 static struct cpufreq_frequency_table pll_vals_12MHz[] = {
-    { .frequency = 34000000,  .index = PLLVAL(82, 2, 3),   },
-    { .frequency = 45000000,  .index = PLLVAL(82, 1, 3),   },
-    { .frequency = 51000000,  .index = PLLVAL(161, 3, 3),  },
-    { .frequency = 48000000,  .index = PLLVAL(120, 2, 3),  },
-    { .frequency = 56000000,  .index = PLLVAL(142, 2, 3),  },
-    { .frequency = 68000000,  .index = PLLVAL(82, 2, 2),   },
-    { .frequency = 79000000,  .index = PLLVAL(71, 1, 2),   },
-    { .frequency = 85000000,  .index = PLLVAL(105, 2, 2),  },
-    { .frequency = 90000000,  .index = PLLVAL(112, 2, 2),  },
-    { .frequency = 101000000, .index = PLLVAL(127, 2, 2),  },
-    { .frequency = 113000000, .index = PLLVAL(105, 1, 2),  },
-    { .frequency = 118000000, .index = PLLVAL(150, 2, 2),  },
-    { .frequency = 124000000, .index = PLLVAL(116, 1, 2),  },
-    { .frequency = 135000000, .index = PLLVAL(82, 2, 1),   },
-    { .frequency = 147000000, .index = PLLVAL(90, 2, 1),   },
-    { .frequency = 152000000, .index = PLLVAL(68, 1, 1),   },
-    { .frequency = 158000000, .index = PLLVAL(71, 1, 1),   },
-    { .frequency = 170000000, .index = PLLVAL(77, 1, 1),   },
-    { .frequency = 180000000, .index = PLLVAL(82, 1, 1),   },
-    { .frequency = 186000000, .index = PLLVAL(85, 1, 1),   },
-    { .frequency = 192000000, .index = PLLVAL(88, 1, 1),   },
-    { .frequency = 203000000, .index = PLLVAL(161, 3, 1),  },
+    { .frequency = 34000000,  .driver_data = PLLVAL(82, 2, 3),   },
+    { .frequency = 45000000,  .driver_data = PLLVAL(82, 1, 3),   },
+    { .frequency = 51000000,  .driver_data = PLLVAL(161, 3, 3),  },
+    { .frequency = 48000000,  .driver_data = PLLVAL(120, 2, 3),  },
+    { .frequency = 56000000,  .driver_data = PLLVAL(142, 2, 3),  },
+    { .frequency = 68000000,  .driver_data = PLLVAL(82, 2, 2),   },
+    { .frequency = 79000000,  .driver_data = PLLVAL(71, 1, 2),   },
+    { .frequency = 85000000,  .driver_data = PLLVAL(105, 2, 2),  },
+    { .frequency = 90000000,  .driver_data = PLLVAL(112, 2, 2),  },
+    { .frequency = 101000000, .driver_data = PLLVAL(127, 2, 2),  },
+    { .frequency = 113000000, .driver_data = PLLVAL(105, 1, 2),  },
+    { .frequency = 118000000, .driver_data = PLLVAL(150, 2, 2),  },
+    { .frequency = 124000000, .driver_data = PLLVAL(116, 1, 2),  },
+    { .frequency = 135000000, .driver_data = PLLVAL(82, 2, 1),   },
+    { .frequency = 147000000, .driver_data = PLLVAL(90, 2, 1),   },
+    { .frequency = 152000000, .driver_data = PLLVAL(68, 1, 1),   },
+    { .frequency = 158000000, .driver_data = PLLVAL(71, 1, 1),   },
+    { .frequency = 170000000, .driver_data = PLLVAL(77, 1, 1),   },
+    { .frequency = 180000000, .driver_data = PLLVAL(82, 1, 1),   },
+    { .frequency = 186000000, .driver_data = PLLVAL(85, 1, 1),   },
+    { .frequency = 192000000, .driver_data = PLLVAL(88, 1, 1),   },
+    { .frequency = 203000000, .driver_data = PLLVAL(161, 3, 1),  },
 
     /* 2410A extras */
 
-    { .frequency = 210000000, .index = PLLVAL(132, 2, 1),  },
-    { .frequency = 226000000, .index = PLLVAL(105, 1, 1),  },
-    { .frequency = 266000000, .index = PLLVAL(125, 1, 1),  },
-    { .frequency = 268000000, .index = PLLVAL(126, 1, 1),  },
-    { .frequency = 270000000, .index = PLLVAL(127, 1, 1),  },
+    { .frequency = 210000000, .driver_data = PLLVAL(132, 2, 1),  },
+    { .frequency = 226000000, .driver_data = PLLVAL(105, 1, 1),  },
+    { .frequency = 266000000, .driver_data = PLLVAL(125, 1, 1),  },
+    { .frequency = 268000000, .driver_data = PLLVAL(126, 1, 1),  },
+    { .frequency = 270000000, .driver_data = PLLVAL(127, 1, 1),  },
 };
 
 static int s3c2410_plls_add(struct device *dev, struct subsys_interface *sif)
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
index 67378175831..a19460e6e7b 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
@@ -21,33 +21,33 @@
 #include <plat/cpu-freq-core.h>
 
 static struct cpufreq_frequency_table s3c2440_plls_12[] __initdata = {
-	{ .frequency = 75000000,	.index = PLLVAL(0x75, 3, 3),  }, 	/* FVco 600.000000 */
-	{ .frequency = 80000000,	.index = PLLVAL(0x98, 4, 3),  }, 	/* FVco 640.000000 */
-	{ .frequency = 90000000,	.index = PLLVAL(0x70, 2, 3),  }, 	/* FVco 720.000000 */
-	{ .frequency = 100000000,	.index = PLLVAL(0x5c, 1, 3),  }, 	/* FVco 800.000000 */
-	{ .frequency = 110000000,	.index = PLLVAL(0x66, 1, 3),  }, 	/* FVco 880.000000 */
-	{ .frequency = 120000000,	.index = PLLVAL(0x70, 1, 3),  }, 	/* FVco 960.000000 */
-	{ .frequency = 150000000,	.index = PLLVAL(0x75, 3, 2),  }, 	/* FVco 600.000000 */
-	{ .frequency = 160000000,	.index = PLLVAL(0x98, 4, 2),  }, 	/* FVco 640.000000 */
-	{ .frequency = 170000000,	.index = PLLVAL(0x4d, 1, 2),  }, 	/* FVco 680.000000 */
-	{ .frequency = 180000000,	.index = PLLVAL(0x70, 2, 2),  }, 	/* FVco 720.000000 */
-	{ .frequency = 190000000,	.index = PLLVAL(0x57, 1, 2),  }, 	/* FVco 760.000000 */
-	{ .frequency = 200000000,	.index = PLLVAL(0x5c, 1, 2),  }, 	/* FVco 800.000000 */
-	{ .frequency = 210000000,	.index = PLLVAL(0x84, 2, 2),  }, 	/* FVco 840.000000 */
-	{ .frequency = 220000000,	.index = PLLVAL(0x66, 1, 2),  }, 	/* FVco 880.000000 */
-	{ .frequency = 230000000,	.index = PLLVAL(0x6b, 1, 2),  }, 	/* FVco 920.000000 */
-	{ .frequency = 240000000,	.index = PLLVAL(0x70, 1, 2),  }, 	/* FVco 960.000000 */
-	{ .frequency = 300000000,	.index = PLLVAL(0x75, 3, 1),  }, 	/* FVco 600.000000 */
-	{ .frequency = 310000000,	.index = PLLVAL(0x93, 4, 1),  }, 	/* FVco 620.000000 */
-	{ .frequency = 320000000,	.index = PLLVAL(0x98, 4, 1),  }, 	/* FVco 640.000000 */
-	{ .frequency = 330000000,	.index = PLLVAL(0x66, 2, 1),  }, 	/* FVco 660.000000 */
-	{ .frequency = 340000000,	.index = PLLVAL(0x4d, 1, 1),  }, 	/* FVco 680.000000 */
-	{ .frequency = 350000000,	.index = PLLVAL(0xa7, 4, 1),  }, 	/* FVco 700.000000 */
-	{ .frequency = 360000000,	.index = PLLVAL(0x70, 2, 1),  }, 	/* FVco 720.000000 */
-	{ .frequency = 370000000,	.index = PLLVAL(0xb1, 4, 1),  }, 	/* FVco 740.000000 */
-	{ .frequency = 380000000,	.index = PLLVAL(0x57, 1, 1),  }, 	/* FVco 760.000000 */
-	{ .frequency = 390000000,	.index = PLLVAL(0x7a, 2, 1),  }, 	/* FVco 780.000000 */
-	{ .frequency = 400000000,	.index = PLLVAL(0x5c, 1, 1),  }, 	/* FVco 800.000000 */
+	{ .frequency = 75000000,	.driver_data = PLLVAL(0x75, 3, 3),  }, 	/* FVco 600.000000 */
+	{ .frequency = 80000000,	.driver_data = PLLVAL(0x98, 4, 3),  }, 	/* FVco 640.000000 */
+	{ .frequency = 90000000,	.driver_data = PLLVAL(0x70, 2, 3),  }, 	/* FVco 720.000000 */
+	{ .frequency = 100000000,	.driver_data = PLLVAL(0x5c, 1, 3),  }, 	/* FVco 800.000000 */
+	{ .frequency = 110000000,	.driver_data = PLLVAL(0x66, 1, 3),  }, 	/* FVco 880.000000 */
+	{ .frequency = 120000000,	.driver_data = PLLVAL(0x70, 1, 3),  }, 	/* FVco 960.000000 */
+	{ .frequency = 150000000,	.driver_data = PLLVAL(0x75, 3, 2),  }, 	/* FVco 600.000000 */
+	{ .frequency = 160000000,	.driver_data = PLLVAL(0x98, 4, 2),  }, 	/* FVco 640.000000 */
+	{ .frequency = 170000000,	.driver_data = PLLVAL(0x4d, 1, 2),  }, 	/* FVco 680.000000 */
+	{ .frequency = 180000000,	.driver_data = PLLVAL(0x70, 2, 2),  }, 	/* FVco 720.000000 */
+	{ .frequency = 190000000,	.driver_data = PLLVAL(0x57, 1, 2),  }, 	/* FVco 760.000000 */
+	{ .frequency = 200000000,	.driver_data = PLLVAL(0x5c, 1, 2),  }, 	/* FVco 800.000000 */
+	{ .frequency = 210000000,	.driver_data = PLLVAL(0x84, 2, 2),  }, 	/* FVco 840.000000 */
+	{ .frequency = 220000000,	.driver_data = PLLVAL(0x66, 1, 2),  }, 	/* FVco 880.000000 */
+	{ .frequency = 230000000,	.driver_data = PLLVAL(0x6b, 1, 2),  }, 	/* FVco 920.000000 */
+	{ .frequency = 240000000,	.driver_data = PLLVAL(0x70, 1, 2),  }, 	/* FVco 960.000000 */
+	{ .frequency = 300000000,	.driver_data = PLLVAL(0x75, 3, 1),  }, 	/* FVco 600.000000 */
+	{ .frequency = 310000000,	.driver_data = PLLVAL(0x93, 4, 1),  }, 	/* FVco 620.000000 */
+	{ .frequency = 320000000,	.driver_data = PLLVAL(0x98, 4, 1),  }, 	/* FVco 640.000000 */
+	{ .frequency = 330000000,	.driver_data = PLLVAL(0x66, 2, 1),  }, 	/* FVco 660.000000 */
+	{ .frequency = 340000000,	.driver_data = PLLVAL(0x4d, 1, 1),  }, 	/* FVco 680.000000 */
+	{ .frequency = 350000000,	.driver_data = PLLVAL(0xa7, 4, 1),  }, 	/* FVco 700.000000 */
+	{ .frequency = 360000000,	.driver_data = PLLVAL(0x70, 2, 1),  }, 	/* FVco 720.000000 */
+	{ .frequency = 370000000,	.driver_data = PLLVAL(0xb1, 4, 1),  }, 	/* FVco 740.000000 */
+	{ .frequency = 380000000,	.driver_data = PLLVAL(0x57, 1, 1),  }, 	/* FVco 760.000000 */
+	{ .frequency = 390000000,	.driver_data = PLLVAL(0x7a, 2, 1),  }, 	/* FVco 780.000000 */
+	{ .frequency = 400000000,	.driver_data = PLLVAL(0x5c, 1, 1),  }, 	/* FVco 800.000000 */
 };
 
 static int s3c2440_plls12_add(struct device *dev, struct subsys_interface *sif)
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
index debfa106289..1191b290562 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
@@ -21,61 +21,61 @@
 #include <plat/cpu-freq-core.h>
 
 static struct cpufreq_frequency_table s3c2440_plls_169344[] __initdata = {
-	{ .frequency = 78019200,	.index = PLLVAL(121, 5, 3), 	}, 	/* FVco 624.153600 */
-	{ .frequency = 84067200,	.index = PLLVAL(131, 5, 3), 	}, 	/* FVco 672.537600 */
-	{ .frequency = 90115200,	.index = PLLVAL(141, 5, 3), 	}, 	/* FVco 720.921600 */
-	{ .frequency = 96163200,	.index = PLLVAL(151, 5, 3), 	}, 	/* FVco 769.305600 */
-	{ .frequency = 102135600,	.index = PLLVAL(185, 6, 3), 	}, 	/* FVco 817.084800 */
-	{ .frequency = 108259200,	.index = PLLVAL(171, 5, 3), 	}, 	/* FVco 866.073600 */
-	{ .frequency = 114307200,	.index = PLLVAL(127, 3, 3), 	}, 	/* FVco 914.457600 */
-	{ .frequency = 120234240,	.index = PLLVAL(134, 3, 3), 	}, 	/* FVco 961.873920 */
-	{ .frequency = 126161280,	.index = PLLVAL(141, 3, 3), 	}, 	/* FVco 1009.290240 */
-	{ .frequency = 132088320,	.index = PLLVAL(148, 3, 3), 	}, 	/* FVco 1056.706560 */
-	{ .frequency = 138015360,	.index = PLLVAL(155, 3, 3), 	}, 	/* FVco 1104.122880 */
-	{ .frequency = 144789120,	.index = PLLVAL(163, 3, 3), 	}, 	/* FVco 1158.312960 */
-	{ .frequency = 150100363,	.index = PLLVAL(187, 9, 2), 	}, 	/* FVco 600.401454 */
-	{ .frequency = 156038400,	.index = PLLVAL(121, 5, 2), 	}, 	/* FVco 624.153600 */
-	{ .frequency = 162086400,	.index = PLLVAL(126, 5, 2), 	}, 	/* FVco 648.345600 */
-	{ .frequency = 168134400,	.index = PLLVAL(131, 5, 2), 	}, 	/* FVco 672.537600 */
-	{ .frequency = 174048000,	.index = PLLVAL(177, 7, 2), 	}, 	/* FVco 696.192000 */
-	{ .frequency = 180230400,	.index = PLLVAL(141, 5, 2), 	}, 	/* FVco 720.921600 */
-	{ .frequency = 186278400,	.index = PLLVAL(124, 4, 2), 	}, 	/* FVco 745.113600 */
-	{ .frequency = 192326400,	.index = PLLVAL(151, 5, 2), 	}, 	/* FVco 769.305600 */
-	{ .frequency = 198132480,	.index = PLLVAL(109, 3, 2), 	}, 	/* FVco 792.529920 */
-	{ .frequency = 204271200,	.index = PLLVAL(185, 6, 2), 	}, 	/* FVco 817.084800 */
-	{ .frequency = 210268800,	.index = PLLVAL(141, 4, 2), 	}, 	/* FVco 841.075200 */
-	{ .frequency = 216518400,	.index = PLLVAL(171, 5, 2), 	}, 	/* FVco 866.073600 */
-	{ .frequency = 222264000,	.index = PLLVAL(97, 2, 2), 	}, 	/* FVco 889.056000 */
-	{ .frequency = 228614400,	.index = PLLVAL(127, 3, 2), 	}, 	/* FVco 914.457600 */
-	{ .frequency = 234259200,	.index = PLLVAL(158, 4, 2), 	}, 	/* FVco 937.036800 */
-	{ .frequency = 240468480,	.index = PLLVAL(134, 3, 2), 	}, 	/* FVco 961.873920 */
-	{ .frequency = 246960000,	.index = PLLVAL(167, 4, 2), 	}, 	/* FVco 987.840000 */
-	{ .frequency = 252322560,	.index = PLLVAL(141, 3, 2), 	}, 	/* FVco 1009.290240 */
-	{ .frequency = 258249600,	.index = PLLVAL(114, 2, 2), 	}, 	/* FVco 1032.998400 */
-	{ .frequency = 264176640,	.index = PLLVAL(148, 3, 2), 	}, 	/* FVco 1056.706560 */
-	{ .frequency = 270950400,	.index = PLLVAL(120, 2, 2), 	}, 	/* FVco 1083.801600 */
-	{ .frequency = 276030720,	.index = PLLVAL(155, 3, 2), 	}, 	/* FVco 1104.122880 */
-	{ .frequency = 282240000,	.index = PLLVAL(92, 1, 2), 	}, 	/* FVco 1128.960000 */
-	{ .frequency = 289578240,	.index = PLLVAL(163, 3, 2), 	}, 	/* FVco 1158.312960 */
-	{ .frequency = 294235200,	.index = PLLVAL(131, 2, 2), 	}, 	/* FVco 1176.940800 */
-	{ .frequency = 300200727,	.index = PLLVAL(187, 9, 1), 	}, 	/* FVco 600.401454 */
-	{ .frequency = 306358690,	.index = PLLVAL(191, 9, 1), 	}, 	/* FVco 612.717380 */
-	{ .frequency = 312076800,	.index = PLLVAL(121, 5, 1), 	}, 	/* FVco 624.153600 */
-	{ .frequency = 318366720,	.index = PLLVAL(86, 3, 1), 	}, 	/* FVco 636.733440 */
-	{ .frequency = 324172800,	.index = PLLVAL(126, 5, 1), 	}, 	/* FVco 648.345600 */
-	{ .frequency = 330220800,	.index = PLLVAL(109, 4, 1), 	}, 	/* FVco 660.441600 */
-	{ .frequency = 336268800,	.index = PLLVAL(131, 5, 1), 	}, 	/* FVco 672.537600 */
-	{ .frequency = 342074880,	.index = PLLVAL(93, 3, 1), 	}, 	/* FVco 684.149760 */
-	{ .frequency = 348096000,	.index = PLLVAL(177, 7, 1), 	}, 	/* FVco 696.192000 */
-	{ .frequency = 355622400,	.index = PLLVAL(118, 4, 1), 	}, 	/* FVco 711.244800 */
-	{ .frequency = 360460800,	.index = PLLVAL(141, 5, 1), 	}, 	/* FVco 720.921600 */
-	{ .frequency = 366206400,	.index = PLLVAL(165, 6, 1), 	}, 	/* FVco 732.412800 */
-	{ .frequency = 372556800,	.index = PLLVAL(124, 4, 1), 	}, 	/* FVco 745.113600 */
-	{ .frequency = 378201600,	.index = PLLVAL(126, 4, 1), 	}, 	/* FVco 756.403200 */
-	{ .frequency = 384652800,	.index = PLLVAL(151, 5, 1), 	}, 	/* FVco 769.305600 */
-	{ .frequency = 391608000,	.index = PLLVAL(177, 6, 1), 	}, 	/* FVco 783.216000 */
-	{ .frequency = 396264960,	.index = PLLVAL(109, 3, 1), 	}, 	/* FVco 792.529920 */
-	{ .frequency = 402192000,	.index = PLLVAL(87, 2, 1), 	}, 	/* FVco 804.384000 */
+	{ .frequency = 78019200,	.driver_data = PLLVAL(121, 5, 3), 	}, 	/* FVco 624.153600 */
+	{ .frequency = 84067200,	.driver_data = PLLVAL(131, 5, 3), 	}, 	/* FVco 672.537600 */
+	{ .frequency = 90115200,	.driver_data = PLLVAL(141, 5, 3), 	}, 	/* FVco 720.921600 */
+	{ .frequency = 96163200,	.driver_data = PLLVAL(151, 5, 3), 	}, 	/* FVco 769.305600 */
+	{ .frequency = 102135600,	.driver_data = PLLVAL(185, 6, 3), 	}, 	/* FVco 817.084800 */
+	{ .frequency = 108259200,	.driver_data = PLLVAL(171, 5, 3), 	}, 	/* FVco 866.073600 */
+	{ .frequency = 114307200,	.driver_data = PLLVAL(127, 3, 3), 	}, 	/* FVco 914.457600 */
+	{ .frequency = 120234240,	.driver_data = PLLVAL(134, 3, 3), 	}, 	/* FVco 961.873920 */
+	{ .frequency = 126161280,	.driver_data = PLLVAL(141, 3, 3), 	}, 	/* FVco 1009.290240 */
+	{ .frequency = 132088320,	.driver_data = PLLVAL(148, 3, 3), 	}, 	/* FVco 1056.706560 */
+	{ .frequency = 138015360,	.driver_data = PLLVAL(155, 3, 3), 	}, 	/* FVco 1104.122880 */
+	{ .frequency = 144789120,	.driver_data = PLLVAL(163, 3, 3), 	}, 	/* FVco 1158.312960 */
+	{ .frequency = 150100363,	.driver_data = PLLVAL(187, 9, 2), 	}, 	/* FVco 600.401454 */
+	{ .frequency = 156038400,	.driver_data = PLLVAL(121, 5, 2), 	}, 	/* FVco 624.153600 */
+	{ .frequency = 162086400,	.driver_data = PLLVAL(126, 5, 2), 	}, 	/* FVco 648.345600 */
+	{ .frequency = 168134400,	.driver_data = PLLVAL(131, 5, 2), 	}, 	/* FVco 672.537600 */
+	{ .frequency = 174048000,	.driver_data = PLLVAL(177, 7, 2), 	}, 	/* FVco 696.192000 */
+	{ .frequency = 180230400,	.driver_data = PLLVAL(141, 5, 2), 	}, 	/* FVco 720.921600 */
+	{ .frequency = 186278400,	.driver_data = PLLVAL(124, 4, 2), 	}, 	/* FVco 745.113600 */
+	{ .frequency = 192326400,	.driver_data = PLLVAL(151, 5, 2), 	}, 	/* FVco 769.305600 */
+	{ .frequency = 198132480,	.driver_data = PLLVAL(109, 3, 2), 	}, 	/* FVco 792.529920 */
+	{ .frequency = 204271200,	.driver_data = PLLVAL(185, 6, 2), 	}, 	/* FVco 817.084800 */
+	{ .frequency = 210268800,	.driver_data = PLLVAL(141, 4, 2), 	}, 	/* FVco 841.075200 */
+	{ .frequency = 216518400,	.driver_data = PLLVAL(171, 5, 2), 	}, 	/* FVco 866.073600 */
+	{ .frequency = 222264000,	.driver_data = PLLVAL(97, 2, 2), 	}, 	/* FVco 889.056000 */
+	{ .frequency = 228614400,	.driver_data = PLLVAL(127, 3, 2), 	}, 	/* FVco 914.457600 */
+	{ .frequency = 234259200,	.driver_data = PLLVAL(158, 4, 2), 	}, 	/* FVco 937.036800 */
+	{ .frequency = 240468480,	.driver_data = PLLVAL(134, 3, 2), 	}, 	/* FVco 961.873920 */
+	{ .frequency = 246960000,	.driver_data = PLLVAL(167, 4, 2), 	}, 	/* FVco 987.840000 */
+	{ .frequency = 252322560,	.driver_data = PLLVAL(141, 3, 2), 	}, 	/* FVco 1009.290240 */
+	{ .frequency = 258249600,	.driver_data = PLLVAL(114, 2, 2), 	}, 	/* FVco 1032.998400 */
+	{ .frequency = 264176640,	.driver_data = PLLVAL(148, 3, 2), 	}, 	/* FVco 1056.706560 */
+	{ .frequency = 270950400,	.driver_data = PLLVAL(120, 2, 2), 	}, 	/* FVco 1083.801600 */
+	{ .frequency = 276030720,	.driver_data = PLLVAL(155, 3, 2), 	}, 	/* FVco 1104.122880 */
+	{ .frequency = 282240000,	.driver_data = PLLVAL(92, 1, 2), 	}, 	/* FVco 1128.960000 */
+	{ .frequency = 289578240,	.driver_data = PLLVAL(163, 3, 2), 	}, 	/* FVco 1158.312960 */
+	{ .frequency = 294235200,	.driver_data = PLLVAL(131, 2, 2), 	}, 	/* FVco 1176.940800 */
+	{ .frequency = 300200727,	.driver_data = PLLVAL(187, 9, 1), 	}, 	/* FVco 600.401454 */
+	{ .frequency = 306358690,	.driver_data = PLLVAL(191, 9, 1), 	}, 	/* FVco 612.717380 */
+	{ .frequency = 312076800,	.driver_data = PLLVAL(121, 5, 1), 	}, 	/* FVco 624.153600 */
+	{ .frequency = 318366720,	.driver_data = PLLVAL(86, 3, 1), 	}, 	/* FVco 636.733440 */
+	{ .frequency = 324172800,	.driver_data = PLLVAL(126, 5, 1), 	}, 	/* FVco 648.345600 */
+	{ .frequency = 330220800,	.driver_data = PLLVAL(109, 4, 1), 	}, 	/* FVco 660.441600 */
+	{ .frequency = 336268800,	.driver_data = PLLVAL(131, 5, 1), 	}, 	/* FVco 672.537600 */
+	{ .frequency = 342074880,	.driver_data = PLLVAL(93, 3, 1), 	}, 	/* FVco 684.149760 */
+	{ .frequency = 348096000,	.driver_data = PLLVAL(177, 7, 1), 	}, 	/* FVco 696.192000 */
+	{ .frequency = 355622400,	.driver_data = PLLVAL(118, 4, 1), 	}, 	/* FVco 711.244800 */
+	{ .frequency = 360460800,	.driver_data = PLLVAL(141, 5, 1), 	}, 	/* FVco 720.921600 */
+	{ .frequency = 366206400,	.driver_data = PLLVAL(165, 6, 1), 	}, 	/* FVco 732.412800 */
+	{ .frequency = 372556800,	.driver_data = PLLVAL(124, 4, 1), 	}, 	/* FVco 745.113600 */
+	{ .frequency = 378201600,	.driver_data = PLLVAL(126, 4, 1), 	}, 	/* FVco 756.403200 */
+	{ .frequency = 384652800,	.driver_data = PLLVAL(151, 5, 1), 	}, 	/* FVco 769.305600 */
+	{ .frequency = 391608000,	.driver_data = PLLVAL(177, 6, 1), 	}, 	/* FVco 783.216000 */
+	{ .frequency = 396264960,	.driver_data = PLLVAL(109, 3, 1), 	}, 	/* FVco 792.529920 */
+	{ .frequency = 402192000,	.driver_data = PLLVAL(87, 2, 1), 	}, 	/* FVco 804.384000 */
 };
 
 static int s3c2440_plls169344_add(struct device *dev,
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index db27e8eef19..3912ce91fee 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -23,7 +23,7 @@ config ARCH_R8A73A4
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
 	select CPU_V7
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select SH_CLK_CPG
 	select RENESAS_IRQC
 
@@ -59,7 +59,7 @@ config ARCH_R8A7790
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
 	select CPU_V7
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select SH_CLK_CPG
 	select RENESAS_IRQC
 
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 7e105932c09..5390c6bbbc0 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -142,15 +142,15 @@ static void pllc2_table_rebuild(struct clk *clk)
 	/* Initialise PLLC2 frequency table */
 	for (i = 0; i < ARRAY_SIZE(pllc2_freq_table) - 2; i++) {
 		pllc2_freq_table[i].frequency = clk->parent->rate * (i + 20) * 2;
-		pllc2_freq_table[i].index = i;
+		pllc2_freq_table[i].driver_data = i;
 	}
 
 	/* This is a special entry - switching PLL off makes it a repeater */
 	pllc2_freq_table[i].frequency = clk->parent->rate;
-	pllc2_freq_table[i].index = i;
+	pllc2_freq_table[i].driver_data = i;
 
 	pllc2_freq_table[++i].frequency = CPUFREQ_TABLE_END;
-	pllc2_freq_table[i].index = i;
+	pllc2_freq_table[i].driver_data = i;
 }
 
 static unsigned long pllc2_recalc(struct clk *clk)
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 84d72fc36df..ef3a8da49b2 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -28,7 +28,6 @@ config ARCH_TEGRA_2x_SOC
 	select ARM_ERRATA_754327 if SMP
 	select ARM_ERRATA_764369 if SMP
 	select ARM_GIC
-	select CPU_FREQ_TABLE if CPU_FREQ
 	select CPU_V7
 	select PINCTRL
 	select PINCTRL_TEGRA20
@@ -46,7 +45,6 @@ config ARCH_TEGRA_3x_SOC
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_764369 if SMP
 	select ARM_GIC
-	select CPU_FREQ_TABLE if CPU_FREQ
 	select CPU_V7
 	select PINCTRL
 	select PINCTRL_TEGRA30
@@ -60,10 +58,9 @@ config ARCH_TEGRA_3x_SOC
 
 config ARCH_TEGRA_114_SOC
 	bool "Enable support for Tegra114 family"
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select ARM_GIC
 	select ARM_L1_CACHE_SHIFT_6
-	select CPU_FREQ_TABLE if CPU_FREQ
 	select CPU_V7
 	select PINCTRL
 	select PINCTRL_TEGRA114
diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c
index ec5836b1e71..b25153e2eba 100644
--- a/arch/arm/mach-tegra/common.c
+++ b/arch/arm/mach-tegra/common.c
@@ -23,7 +23,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/irqchip.h>
-#include <linux/clk/tegra.h>
+#include <linux/clk-provider.h>
 
 #include <asm/hardware/cache-l2x0.h>
 
@@ -60,7 +60,7 @@ u32 tegra_uart_config[4] = {
 #ifdef CONFIG_OF
 void __init tegra_dt_init_irq(void)
 {
-	tegra_clocks_init();
+	of_clk_init(NULL);
 	tegra_pmc_init();
 	tegra_init_irq();
 	irqchip_init();
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index b6145ea5164..e6fb0239151 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -76,13 +76,15 @@ void __init ux500_init_irq(void)
 	} else if (cpu_is_u9540()) {
 		prcmu_early_init(U8500_PRCMU_BASE, SZ_8K - 1);
 		ux500_pm_init(U8500_PRCMU_BASE, SZ_8K - 1);
-		u8500_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE,
+		u9540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE,
 			       U8500_CLKRST3_BASE, U8500_CLKRST5_BASE,
 			       U8500_CLKRST6_BASE);
 	} else if (cpu_is_u8540()) {
 		prcmu_early_init(U8500_PRCMU_BASE, SZ_8K + SZ_4K - 1);
 		ux500_pm_init(U8500_PRCMU_BASE, SZ_8K + SZ_4K - 1);
-		u8540_clk_init();
+		u8540_clk_init(U8500_CLKRST1_BASE, U8500_CLKRST2_BASE,
+			       U8500_CLKRST3_BASE, U8500_CLKRST5_BASE,
+			       U8500_CLKRST6_BASE);
 	}
 }
 
diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig
index 8958f0d896b..081d4692943 100644
--- a/arch/arm/mach-virt/Kconfig
+++ b/arch/arm/mach-virt/Kconfig
@@ -2,7 +2,7 @@ config ARCH_VIRT
 	bool "Dummy Virtual Machine" if ARCH_MULTI_V7
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select ARM_PSCI
 	select HAVE_SMP
 	select CPU_V7
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 9e8101ecd63..6cacdc8dd65 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -392,7 +392,8 @@ config CPU_V7
 	select CPU_CACHE_V7
 	select CPU_CACHE_VIPT
 	select CPU_COPY_V6 if MMU
-	select CPU_CP15_MMU
+	select CPU_CP15_MMU if MMU
+	select CPU_CP15_MPU if !MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V7
 	select CPU_TLB_V7 if MMU
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index ee558a01f39..ecfe6e53f6e 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c465faca51b..d70e0aba0c9 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end)
 	}
 }
 
+/*
+ * For certain Broadcom SoCs, depending on the address range, different offsets
+ * need to be added to the address before passing it to L2 for
+ * invalidation/clean/flush
+ *
+ * Section Address Range              Offset        EMI
+ *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
+ *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
+ *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
+ *
+ * When the start and end addresses have crossed two different sections, we
+ * need to break the L2 operation into two, each within its own section.
+ * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
+ * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
+ * 0xC0000000 - 0xC0001000
+ *
+ * Note 1:
+ * By breaking a single L2 operation into two, we may potentially suffer some
+ * performance hit, but keep in mind the cross section case is very rare
+ *
+ * Note 2:
+ * We do not need to handle the case when the start address is in
+ * Section 1 and the end address is in Section 3, since it is not a valid use
+ * case
+ *
+ * Note 3:
+ * Section 1 in practical terms can no longer be used on rev A2. Because of
+ * that the code does not need to handle section 1 at all.
+ *
+ */
+#define BCM_SYS_EMI_START_ADDR        0x40000000UL
+#define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
+
+#define BCM_SYS_EMI_OFFSET            0x40000000UL
+#define BCM_VC_EMI_OFFSET             0x80000000UL
+
+static inline int bcm_addr_is_sys_emi(unsigned long addr)
+{
+	return (addr >= BCM_SYS_EMI_START_ADDR) &&
+		(addr < BCM_VC_EMI_SEC3_START_ADDR);
+}
+
+static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
+{
+	if (bcm_addr_is_sys_emi(addr))
+		return addr + BCM_SYS_EMI_OFFSET;
+	else
+		return addr + BCM_VC_EMI_OFFSET;
+}
+
+static void bcm_inv_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2x0_inv_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2x0_inv_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
+static void bcm_clean_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	if ((end - start) >= l2x0_size) {
+		l2x0_clean_all();
+		return;
+	}
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2x0_clean_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2x0_clean_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
+static void bcm_flush_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	if ((end - start) >= l2x0_size) {
+		l2x0_flush_all();
+		return;
+	}
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2x0_flush_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2x0_flush_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
 static void __init l2x0_of_setup(const struct device_node *np,
 				 u32 *aux_val, u32 *aux_mask)
 {
@@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = {
 	},
 };
 
+static const struct l2x0_of_data bcm_l2x0_data = {
+	.setup = pl310_of_setup,
+	.save  = pl310_save,
+	.outer_cache = {
+		.resume      = pl310_resume,
+		.inv_range   = bcm_inv_range,
+		.clean_range = bcm_clean_range,
+		.flush_range = bcm_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+	},
+};
+
 static const struct of_device_id l2x0_ids[] __initconst = {
 	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
 	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
@@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = {
 	  .data = (void *)&aurora_no_outer_data},
 	{ .compatible = "marvell,aurora-outer-cache",
 	  .data = (void *)&aurora_with_outer_data},
+	{ .compatible = "bcm,bcm11351-a2-pl310-cache",
+	  .data = (void *)&bcm_l2x0_data},
 	{}
 };
 
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 2ac37372ef5..b55b1015724 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -20,6 +20,7 @@
 #include <asm/smp_plat.h>
 #include <asm/thread_notify.h>
 #include <asm/tlbflush.h>
+#include <asm/proc-fns.h>
 
 /*
  * On ARMv6, we have the following structure in the Context ID:
@@ -39,33 +40,51 @@
  * non 64-bit operations.
  */
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
-#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1)
-
-#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1)
-#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
 static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
-DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask)
+{
+	int cpu;
+	unsigned long flags;
+	u64 context_id, asid;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	context_id = mm->context.id.counter;
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		/*
+		 * We only need to send an IPI if the other CPUs are
+		 * running the same ASID as the one being invalidated.
+		 */
+		asid = per_cpu(active_asids, cpu).counter;
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, cpu);
+		if (context_id == asid)
+			cpumask_set_cpu(cpu, mask);
+	}
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+#endif
+
 #ifdef CONFIG_ARM_LPAE
 static void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbl = __pa(swapper_pg_dir);
-	unsigned long ttbh = 0;
-
 	/*
 	 * Set TTBR0 to swapper_pg_dir which contains only global entries. The
 	 * ASID is set to 0.
 	 */
-	asm volatile(
-	"	mcrr	p15, 0, %0, %1, c2		@ set TTBR0\n"
-	:
-	: "r" (ttbl), "r" (ttbh));
+	cpu_set_ttbr(0, __pa(swapper_pg_dir));
 	isb();
 }
 #else
@@ -128,7 +147,16 @@ static void flush_context(unsigned int cpu)
 			asid = 0;
 		} else {
 			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			__set_bit(ASID_TO_IDX(asid), asid_map);
+			/*
+			 * If this CPU has already been through a
+			 * rollover, but hasn't run another task in
+			 * the meantime, we must preserve its reserved
+			 * ASID, as this is the only trace we have of
+			 * the process it is still running.
+			 */
+			if (asid == 0)
+				asid = per_cpu(reserved_asids, i);
+			__set_bit(asid & ~ASID_MASK, asid_map);
 		}
 		per_cpu(reserved_asids, i) = asid;
 	}
@@ -167,17 +195,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		/*
 		 * Allocate a free ASID. If we can't find one, take a
 		 * note of the currently active ASIDs and mark the TLBs
-		 * as requiring flushes.
+		 * as requiring flushes. We always count from ASID #1,
+		 * as we reserve ASID #0 to switch via TTBR0 and indicate
+		 * rollover events.
 		 */
-		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		if (asid == NUM_USER_ASIDS) {
 			generation = atomic64_add_return(ASID_FIRST_VERSION,
 							 &asid_generation);
 			flush_context(cpu);
-			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		}
 		__set_bit(asid, asid_map);
-		asid = generation | IDX_TO_ASID(asid);
+		asid |= generation;
 		cpumask_clear(mm_cpumask(mm));
 	}
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3aac9..7ec02961dfa 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 #ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#warning ARM Coherent DMA allocator does not (yet) support huge TLB
 #endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
@@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
 
 	/*
-	 * Mark the D-cache clean for this page to avoid extra flushing.
+	 * Mark the D-cache clean for these pages to avoid extra flushing.
 	 */
-	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
-		set_bit(PG_dcache_clean, &page->flags);
+	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+		unsigned long pfn;
+		size_t left = size;
+
+		pfn = page_to_pfn(page) + off / PAGE_SIZE;
+		off %= PAGE_SIZE;
+		if (off) {
+			pfn++;
+			left -= PAGE_SIZE - off;
+		}
+		while (left >= PAGE_SIZE) {
+			page = pfn_to_page(pfn++);
+			set_bit(PG_dcache_clean, &page->flags);
+			left -= PAGE_SIZE;
+		}
+	}
 }
 
 /**
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f954f..c97f7940cb9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -491,12 +491,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  * Some section permission faults need to be handled gracefully.
  * They can happen due to a __{get,put}_user during an oops.
  */
+#ifndef CONFIG_ARM_LPAE
 static int
 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
 	do_bad_area(addr, fsr, regs);
 	return 0;
 }
+#endif /* CONFIG_ARM_LPAE */
 
 /*
  * This abort handler always returns "fault".
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 32aa5861119..6d5ba9afb16 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -17,6 +17,7 @@
 #include <asm/highmem.h>
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
+#include <linux/hugetlb.h>
 
 #include "mm.h"
 
@@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * coherent with the kernels mapping.
 	 */
 	if (!PageHighMem(page)) {
-		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+		size_t page_size = PAGE_SIZE << compound_order(page);
+		__cpuc_flush_dcache_area(page_address(page), page_size);
 	} else {
-		void *addr;
-
+		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			addr = kmap_atomic(page);
-			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-			kunmap_atomic(addr);
-		} else {
-			addr = kmap_high_get(page);
-			if (addr) {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_atomic(page);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-				kunmap_high(page);
+				kunmap_atomic(addr);
+			}
+		} else {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_high_get(page);
+				if (addr) {
+					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+					kunmap_high(page);
+				}
 			}
 		}
 	}
@@ -287,7 +292,7 @@ void flush_dcache_page(struct page *page)
 	mapping = page_mapping(page);
 
 	if (!cache_ops_need_broadcast() &&
-	    mapping && !mapping_mapped(mapping))
+	    mapping && !page_mapped(page))
 		clear_bit(PG_dcache_clean, &page->flags);
 	else {
 		__flush_dcache_page(mapping, page);
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index 05a4e943183..ab4409a2307 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -9,11 +9,11 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
new file mode 100644
index 00000000000..3d1e4a205b0
--- /dev/null
+++ b/arch/arm/mm/hugetlbpage.c
@@ -0,0 +1,101 @@
+/*
+ * arch/arm/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
+ * of type casting from pmd_t * to pte_t *.
+ */
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud))
+			pmd = pmd_offset(pud, addr);
+	}
+
+	return (pte_t *)pmd;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
+	return pte;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9a5cdc01fcd..6833cbead6c 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -36,12 +36,13 @@
 
 #include "mm.h"
 
-static unsigned long phys_initrd_start __initdata = 0;
+static phys_addr_t phys_initrd_start __initdata = 0;
 static unsigned long phys_initrd_size __initdata = 0;
 
 static int __init early_initrd(char *p)
 {
-	unsigned long start, size;
+	phys_addr_t start;
+	unsigned long size;
 	char *endp;
 
 	start = memparse(p, &endp);
@@ -350,14 +351,14 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (phys_initrd_size &&
 	    !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
-		pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n",
-		       phys_initrd_start, phys_initrd_size);
+		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
+		       (u64)phys_initrd_start, phys_initrd_size);
 		phys_initrd_start = phys_initrd_size = 0;
 	}
 	if (phys_initrd_size &&
 	    memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
-		pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-		       phys_initrd_start, phys_initrd_size);
+		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
+		       (u64)phys_initrd_start, phys_initrd_size);
 		phys_initrd_start = phys_initrd_size = 0;
 	}
 	if (phys_initrd_size) {
@@ -442,7 +443,7 @@ static inline void
 free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 {
 	struct page *start_pg, *end_pg;
-	unsigned long pg, pgend;
+	phys_addr_t pg, pgend;
 
 	/*
 	 * Convert start_pfn/end_pfn to a struct page pointer.
@@ -454,8 +455,8 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * Convert to physical addresses, and
 	 * round start upwards and end downwards.
 	 */
-	pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
-	pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
 
 	/*
 	 * If there are free pages between these,
@@ -582,9 +583,6 @@ static void __init free_highpages(void)
  */
 void __init mem_init(void)
 {
-	unsigned long reserved_pages, free_pages;
-	struct memblock_region *reg;
-	int i;
 #ifdef CONFIG_HAVE_TCM
 	/* These pointers are filled in on TCM detection */
 	extern u32 dtcm_end;
@@ -595,57 +593,16 @@ void __init mem_init(void)
 
 	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap(&meminfo);
-
-	totalram_pages += free_all_bootmem();
+	free_all_bootmem();
 
 #ifdef CONFIG_SA1111
 	/* now that our DMA memory is actually so designated, we can free it */
-	free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL);
+	free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, -1, NULL);
 #endif
 
 	free_highpages();
 
-	reserved_pages = free_pages = 0;
-
-	for_each_bank(i, &meminfo) {
-		struct membank *bank = &meminfo.bank[i];
-		unsigned int pfn1, pfn2;
-		struct page *page, *end;
-
-		pfn1 = bank_pfn_start(bank);
-		pfn2 = bank_pfn_end(bank);
-
-		page = pfn_to_page(pfn1);
-		end  = pfn_to_page(pfn2 - 1) + 1;
-
-		do {
-			if (PageReserved(page))
-				reserved_pages++;
-			else if (!page_count(page))
-				free_pages++;
-			page++;
-		} while (page < end);
-	}
-
-	/*
-	 * Since our memory may not be contiguous, calculate the
-	 * real number of pages we have in this system
-	 */
-	printk(KERN_INFO "Memory:");
-	num_physpages = 0;
-	for_each_memblock(memory, reg) {
-		unsigned long pages = memblock_region_memory_end_pfn(reg) -
-			memblock_region_memory_base_pfn(reg);
-		num_physpages += pages;
-		printk(" %ldMB", pages >> (20 - PAGE_SHIFT));
-	}
-	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
-
-	printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		free_pages << (PAGE_SHIFT-10),
-		reserved_pages << (PAGE_SHIFT-10),
-		totalhigh_pages << (PAGE_SHIFT-10));
+	mem_init_print_info(NULL);
 
 #define MLK(b, t) b, t, ((t) - (b)) >> 10
 #define MLM(b, t) b, t, ((t) - (b)) >> 20
@@ -711,7 +668,7 @@ void __init mem_init(void)
 	BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE	> PAGE_OFFSET);
 #endif
 
-	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
 		/*
 		 * On a machine this small we won't get
@@ -728,12 +685,12 @@ void free_initmem(void)
 	extern char __tcm_start, __tcm_end;
 
 	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
-	free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link");
+	free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
 #endif
 
 	poison_init_mem(__init_begin, __init_end - __init_begin);
 	if (!machine_is_integrator() && !machine_is_cintegrator())
-		free_initmem_default(0);
+		free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -744,7 +701,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 {
 	if (!keep_initrd) {
 		poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
-		free_reserved_area(start, end, 0, "initrd");
+		free_reserved_area((void *)start, (void *)end, -1, "initrd");
 	}
 }
 
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 04d9006eab1..f123d6eb074 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 	return (void __iomem *) (offset + addr);
 }
 
-void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
+void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 	unsigned int mtype, void *caller)
 {
-	unsigned long last_addr;
+	phys_addr_t last_addr;
  	unsigned long offset = phys_addr & ~PAGE_MASK;
  	unsigned long pfn = __phys_to_pfn(phys_addr);
 
@@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
-void __iomem * (*arch_ioremap_caller)(unsigned long, size_t,
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 				      unsigned int, void *) =
 	__arm_ioremap_caller;
 
 void __iomem *
-__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype)
+__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype)
 {
 	return arch_ioremap_caller(phys_addr, size, mtype,
 		__builtin_return_address(0));
@@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap);
  * CONFIG_GENERIC_ALLOCATOR for allocating external memory.
  */
 void __iomem *
-__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached)
+__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)
 {
 	unsigned int mtype;
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d1d1cefa1f9..d7229d28c7f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -675,7 +675,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 }
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
-	unsigned long end, unsigned long phys, const struct mem_type *type)
+				  unsigned long end, phys_addr_t phys,
+				  const struct mem_type *type)
 {
 	pud_t *pud = pud_offset(pgd, addr);
 	unsigned long next;
@@ -989,27 +990,28 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
 void __init sanity_check_meminfo(void)
 {
 	int i, j, highmem = 0;
+	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 
 	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
 		struct membank *bank = &meminfo.bank[j];
-		*bank = meminfo.bank[i];
+		phys_addr_t size_limit;
 
-		if (bank->start > ULONG_MAX)
-			highmem = 1;
+		*bank = meminfo.bank[i];
+		size_limit = bank->size;
 
-#ifdef CONFIG_HIGHMEM
-		if (__va(bank->start) >= vmalloc_min ||
-		    __va(bank->start) < (void *)PAGE_OFFSET)
+		if (bank->start >= vmalloc_limit)
 			highmem = 1;
+		else
+			size_limit = vmalloc_limit - bank->start;
 
 		bank->highmem = highmem;
 
+#ifdef CONFIG_HIGHMEM
 		/*
 		 * Split those memory banks which are partially overlapping
 		 * the vmalloc area greatly simplifying things later.
 		 */
-		if (!highmem && __va(bank->start) < vmalloc_min &&
-		    bank->size > vmalloc_min - __va(bank->start)) {
+		if (!highmem && bank->size > size_limit) {
 			if (meminfo.nr_banks >= NR_BANKS) {
 				printk(KERN_CRIT "NR_BANKS too low, "
 						 "ignoring high memory\n");
@@ -1018,16 +1020,14 @@ void __init sanity_check_meminfo(void)
 					(meminfo.nr_banks - i) * sizeof(*bank));
 				meminfo.nr_banks++;
 				i++;
-				bank[1].size -= vmalloc_min - __va(bank->start);
-				bank[1].start = __pa(vmalloc_min - 1) + 1;
+				bank[1].size -= size_limit;
+				bank[1].start = vmalloc_limit;
 				bank[1].highmem = highmem = 1;
 				j++;
 			}
-			bank->size = vmalloc_min - __va(bank->start);
+			bank->size = size_limit;
 		}
 #else
-		bank->highmem = highmem;
-
 		/*
 		 * Highmem banks not allowed with !CONFIG_HIGHMEM.
 		 */
@@ -1040,31 +1040,16 @@ void __init sanity_check_meminfo(void)
 		}
 
 		/*
-		 * Check whether this memory bank would entirely overlap
-		 * the vmalloc area.
-		 */
-		if (__va(bank->start) >= vmalloc_min ||
-		    __va(bank->start) < (void *)PAGE_OFFSET) {
-			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-			       "(vmalloc region overlap).\n",
-			       (unsigned long long)bank->start,
-			       (unsigned long long)bank->start + bank->size - 1);
-			continue;
-		}
-
-		/*
 		 * Check whether this memory bank would partially overlap
 		 * the vmalloc area.
 		 */
-		if (__va(bank->start + bank->size - 1) >= vmalloc_min ||
-		    __va(bank->start + bank->size - 1) <= __va(bank->start)) {
-			unsigned long newsize = vmalloc_min - __va(bank->start);
+		if (bank->size > size_limit) {
 			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
 			       "to -%.8llx (vmalloc region overlap).\n",
 			       (unsigned long long)bank->start,
 			       (unsigned long long)bank->start + bank->size - 1,
-			       (unsigned long long)bank->start + newsize - 1);
-			bank->size = newsize;
+			       (unsigned long long)bank->start + size_limit - 1);
+			bank->size = size_limit;
 		}
 #endif
 		if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 5a3aba614a4..1fa50100ab6 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -8,6 +8,7 @@
 #include <linux/pagemap.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
+#include <linux/kernel.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -15,9 +16,260 @@
 #include <asm/setup.h>
 #include <asm/traps.h>
 #include <asm/mach/arch.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
 
 #include "mm.h"
 
+#ifdef CONFIG_ARM_MPU
+struct mpu_rgn_info mpu_rgn_info;
+
+/* Region number */
+static void rgnr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c2, 0" : : "r" (v));
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static void dracr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 4" : : "r" (v));
+}
+
+/* Region size register */
+static void drsr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 2" : : "r" (v));
+}
+
+/* Region base address register */
+static void drbar_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 0" : : "r" (v));
+}
+
+static u32 drbar_read(void)
+{
+	u32 v;
+	asm("mrc        p15, 0, %0, c6, c1, 0" : "=r" (v));
+	return v;
+}
+/* Optional instruction-side region attributes */
+
+/* I-side Region access control register */
+static void iracr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 5" : : "r" (v));
+}
+
+/* I-side Region size register */
+static void irsr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 3" : : "r" (v));
+}
+
+/* I-side Region base address register */
+static void irbar_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 1" : : "r" (v));
+}
+
+static unsigned long irbar_read(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c6, c1, 1" : "=r" (v));
+	return v;
+}
+
+/* MPU initialisation functions */
+void __init sanity_check_meminfo_mpu(void)
+{
+	int i;
+	struct membank *bank = meminfo.bank;
+	phys_addr_t phys_offset = PHYS_OFFSET;
+	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
+
+	/* Initially only use memory continuous from PHYS_OFFSET */
+	if (bank_phys_start(&bank[0]) != phys_offset)
+		panic("First memory bank must be contiguous from PHYS_OFFSET");
+
+	/* Banks have already been sorted by start address */
+	for (i = 1; i < meminfo.nr_banks; i++) {
+		if (bank[i].start <= bank_phys_end(&bank[0]) &&
+		    bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) {
+			bank[0].size = bank_phys_end(&bank[i]) - bank[0].start;
+		} else {
+			pr_notice("Ignoring RAM after 0x%.8lx. "
+			"First non-contiguous (ignored) bank start: 0x%.8lx\n",
+				(unsigned long)bank_phys_end(&bank[0]),
+				(unsigned long)bank_phys_start(&bank[i]));
+			break;
+		}
+	}
+	/* All contiguous banks are now merged in to the first bank */
+	meminfo.nr_banks = 1;
+	specified_mem_size = bank[0].size;
+
+	/*
+	 * MPU has curious alignment requirements: Size must be power of 2, and
+	 * region start must be aligned to the region size
+	 */
+	if (phys_offset != 0)
+		pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n");
+
+	/*
+	 * Maximum aligned region might overflow phys_addr_t if phys_offset is
+	 * 0. Hence we keep everything below 4G until we take the smaller of
+	 * the aligned_region_size and rounded_mem_size, one of which is
+	 * guaranteed to be smaller than the maximum physical address.
+	 */
+	aligned_region_size = (phys_offset - 1) ^ (phys_offset);
+	/* Find the max power-of-two sized region that fits inside our bank */
+	rounded_mem_size = (1 <<  __fls(bank[0].size)) - 1;
+
+	/* The actual region size is the smaller of the two */
+	aligned_region_size = aligned_region_size < rounded_mem_size
+				? aligned_region_size + 1
+				: rounded_mem_size + 1;
+
+	if (aligned_region_size != specified_mem_size)
+		pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)",
+				(unsigned long)specified_mem_size,
+				(unsigned long)aligned_region_size);
+
+	meminfo.bank[0].size = aligned_region_size;
+	pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n",
+		(unsigned long)phys_offset,
+		(unsigned long)aligned_region_size,
+		(unsigned long)bank_phys_end(&bank[0]));
+
+}
+
+static int mpu_present(void)
+{
+	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
+}
+
+static int mpu_max_regions(void)
+{
+	/*
+	 * We don't support a different number of I/D side regions so if we
+	 * have separate instruction and data memory maps then return
+	 * whichever side has a smaller number of supported regions.
+	 */
+	u32 dregions, iregions, mpuir;
+	mpuir = read_cpuid(CPUID_MPUIR);
+
+	dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	/* Check for separate d-side and i-side memory maps */
+	if (mpuir & MPUIR_nU)
+		iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION;
+
+	/* Use the smallest of the two maxima */
+	return min(dregions, iregions);
+}
+
+static int mpu_iside_independent(void)
+{
+	/* MPUIR.nU specifies whether there is *not* a unified memory map */
+	return read_cpuid(CPUID_MPUIR) & MPUIR_nU;
+}
+
+static int mpu_min_region_order(void)
+{
+	u32 drbar_result, irbar_result;
+	/* We've kept a region free for this probing */
+	rgnr_write(MPU_PROBE_REGION);
+	isb();
+	/*
+	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
+	 * region order
+	*/
+	drbar_write(0xFFFFFFFC);
+	drbar_result = irbar_result = drbar_read();
+	drbar_write(0x0);
+	/* If the MPU is non-unified, we use the larger of the two minima*/
+	if (mpu_iside_independent()) {
+		irbar_write(0xFFFFFFFC);
+		irbar_result = irbar_read();
+		irbar_write(0x0);
+	}
+	isb(); /* Ensure that MPU region operations have completed */
+	/* Return whichever result is larger */
+	return __ffs(max(drbar_result, irbar_result));
+}
+
+static int mpu_setup_region(unsigned int number, phys_addr_t start,
+			unsigned int size_order, unsigned int properties)
+{
+	u32 size_data;
+
+	/* We kept a region free for probing resolution of MPU regions*/
+	if (number > mpu_max_regions() || number == MPU_PROBE_REGION)
+		return -ENOENT;
+
+	if (size_order > 32)
+		return -ENOMEM;
+
+	if (size_order < mpu_min_region_order())
+		return -ENOMEM;
+
+	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
+	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
+
+	dsb(); /* Ensure all previous data accesses occur with old mappings */
+	rgnr_write(number);
+	isb();
+	drbar_write(start);
+	dracr_write(properties);
+	isb(); /* Propagate properties before enabling region */
+	drsr_write(size_data);
+
+	/* Check for independent I-side registers */
+	if (mpu_iside_independent()) {
+		irbar_write(start);
+		iracr_write(properties);
+		isb();
+		irsr_write(size_data);
+	}
+	isb();
+
+	/* Store region info (we treat i/d side the same, so only store d) */
+	mpu_rgn_info.rgns[number].dracr = properties;
+	mpu_rgn_info.rgns[number].drbar = start;
+	mpu_rgn_info.rgns[number].drsr = size_data;
+	return 0;
+}
+
+/*
+* Set up default MPU regions, doing nothing if there is no MPU
+*/
+void __init mpu_setup(void)
+{
+	int region_err;
+	if (!mpu_present())
+		return;
+
+	region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET,
+					ilog2(meminfo.bank[0].size),
+					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL);
+	if (region_err) {
+		panic("MPU region initialization failure! %d", region_err);
+	} else {
+		pr_info("Using ARMv7 PMSA Compliant MPU. "
+			 "Region independence: %s, Max regions: %d\n",
+			mpu_iside_independent() ? "Yes" : "No",
+			mpu_max_regions());
+	}
+}
+#else
+static void sanity_check_meminfo_mpu(void) {}
+static void __init mpu_setup(void) {}
+#endif /* CONFIG_ARM_MPU */
+
 void __init arm_mm_memblock_reserve(void)
 {
 #ifndef CONFIG_CPU_V7M
@@ -37,7 +289,9 @@ void __init arm_mm_memblock_reserve(void)
 
 void __init sanity_check_meminfo(void)
 {
-	phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
+	phys_addr_t end;
+	sanity_check_meminfo_mpu();
+	end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
 	high_memory = __va(end - 1) + 1;
 }
 
@@ -48,6 +302,7 @@ void __init sanity_check_meminfo(void)
 void __init paging_init(struct machine_desc *mdesc)
 {
 	early_trap_init((void *)CONFIG_VECTORS_BASE);
+	mpu_setup();
 	bootmem_init();
 }
 
@@ -94,16 +349,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset,
 	return __arm_ioremap_pfn(pfn, offset, size, mtype);
 }
 
-void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size,
+void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size,
 			    unsigned int mtype)
 {
 	return (void __iomem *)phys_addr;
 }
 EXPORT_SYMBOL(__arm_ioremap);
 
-void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *);
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
 
-void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
+void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 				   unsigned int mtype, void *caller)
 {
 	return __arm_ioremap(phys_addr, size, mtype);
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 919405e20b8..2d1ef87328a 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -140,8 +140,10 @@ ENTRY(cpu_v6_set_pte_ext)
 ENTRY(cpu_v6_do_suspend)
 	stmfd	sp!, {r4 - r9, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
 	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+#endif
 	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
 	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
 	mrc	p15, 0, r9, c1, c0, 0	@ control register
@@ -158,14 +160,16 @@ ENTRY(cpu_v6_do_resume)
 	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
 	ldmia	r0, {r4 - r9}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
 	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
 	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
 	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
 	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
+#endif
 	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
 	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
-	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
 	mcr	p15, 0, ip, c7, c5, 4	@ ISB
 	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 363027e811d..5ffe1956c6d 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -39,6 +39,14 @@
 #define TTB_FLAGS_SMP	(TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA)
 #define PMD_FLAGS_SMP	(PMD_SECT_WBWA|PMD_SECT_S)
 
+#ifndef __ARMEB__
+#  define rpgdl	r0
+#  define rpgdh	r1
+#else
+#  define rpgdl	r1
+#  define rpgdh	r0
+#endif
+
 /*
  * cpu_v7_switch_mm(pgd_phys, tsk)
  *
@@ -47,10 +55,10 @@
  */
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
-	mmid	r1, r1				@ get mm->context.id
-	asid	r3, r1
-	mov	r3, r3, lsl #(48 - 32)		@ ASID
-	mcrr	p15, 0, r0, r3, c2		@ set TTB 0
+	mmid	r2, r2
+	asid	r2, r2
+	orr	rpgdh, rpgdh, r2, lsl #(48 - 32)	@ upper 32-bits of pgd
+	mcrr	p15, 0, rpgdl, rpgdh, c2		@ set TTB 0
 	isb
 #endif
 	mov	pc, lr
@@ -106,7 +114,8 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
 	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET? (branch below)
+	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
+	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
 	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
 	orr	\tmp, \tmp, #TTB_EAE
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
@@ -114,27 +123,21 @@ ENDPROC(cpu_v7_set_pte_ext)
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP << 16)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP << 16)
 	/*
-	 * TTBR0/TTBR1 split (PAGE_OFFSET):
-	 *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
-	 *   0x80000000: T0SZ = 0, T1SZ = 1
-	 *   0xc0000000: T0SZ = 0, T1SZ = 2
-	 *
-	 * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
-	 * booting secondary CPUs would end up using TTBR1 for the identity
-	 * mapping set up in TTBR0.
+	 * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above),
+	 * otherwise booting secondary CPUs would end up using TTBR1 for the
+	 * identity mapping set up in TTBR0.
 	 */
-	bhi	9001f				@ PHYS_OFFSET > PAGE_OFFSET?
-	orr	\tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ
-#if defined CONFIG_VMSPLIT_2G
-	/* PAGE_OFFSET == 0x80000000, T1SZ == 1 */
-	add	\ttbr1, \ttbr1, #1 << 4		@ skip two L1 entries
-#elif defined CONFIG_VMSPLIT_3G
-	/* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */
-	add	\ttbr1, \ttbr1, #4096 * (1 + 3)	@ only L2 used, skip pgd+3*pmd
-#endif
-	/* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */
-9001:	mcr	p15, 0, \tmp, c2, c0, 2		@ TTB control register
-	mcrr	p15, 1, \ttbr1, \zero, c2	@ load TTBR1
+	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
+	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
+	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
+	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
+	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
+	mcrr	p15, 1, \ttbr1, \zero, c2			@ load TTBR1
+	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
+	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
+	mcrr	p15, 0, \ttbr0, \zero, c2			@ load TTBR0
+	mcrr	p15, 1, \ttbr1, \zero, c2			@ load TTBR1
+	mcrr	p15, 0, \ttbr0, \zero, c2			@ load TTBR0
 	.endm
 
 	__CPUINIT
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index e35fec34453..7ef3ad05df3 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -98,9 +98,11 @@ ENTRY(cpu_v7_do_suspend)
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
 	stmia	r0!, {r4 - r5}
+#ifdef CONFIG_MMU
 	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
 	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mrc	p15, 0, r11, c2, c0, 2	@ TTB control register
+#endif
 	mrc	p15, 0, r8, c1, c0, 0	@ Control register
 	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
 	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
@@ -110,13 +112,14 @@ ENDPROC(cpu_v7_do_suspend)
 
 ENTRY(cpu_v7_do_resume)
 	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
 	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
 	ldmia	r0!, {r4 - r5}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
 	ldmia	r0, {r6 - r11}
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
 #ifndef CONFIG_ARM_LPAE
 	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
@@ -125,14 +128,15 @@ ENTRY(cpu_v7_do_resume)
 	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
 	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register
-	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
-	teq	r4, r9			@ Is it already set?
-	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
-	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	ldr	r4, =PRRR		@ PRRR
 	ldr	r5, =NMRR		@ NMRR
 	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
 	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
+#endif	/* CONFIG_MMU */
+	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
+	teq	r4, r9			@ Is it already set?
+	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
+	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	isb
 	dsb
 	mov	r0, r8			@ control register
@@ -178,7 +182,8 @@ ENDPROC(cpu_pj4b_do_idle)
  */
 __v7_ca5mp_setup:
 __v7_ca9mp_setup:
-	mov	r10, #(1 << 0)			@ TLB ops broadcasting
+__v7_cr7mp_setup:
+	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
 	b	1f
 __v7_ca7mp_setup:
 __v7_ca15mp_setup:
@@ -443,6 +448,16 @@ __v7_pj4b_proc_info:
 #endif
 
 	/*
+	 * ARM Ltd. Cortex R7 processor.
+	 */
+	.type	__v7_cr7mp_proc_info, #object
+__v7_cr7mp_proc_info:
+	.long	0x410fc170
+	.long	0xff0ffff0
+	__v7_proc __v7_cr7mp_setup
+	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
+
+	/*
 	 * ARM Ltd. Cortex A7 processor.
 	 */
 	.type	__v7_ca7mp_proc_info, #object
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 1ff6a37e893..a4d1f8de3b5 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -192,12 +192,10 @@ static int __init iop3xx_adma_cap_init(void)
 
 	#ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */
 	dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
-	dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
 	#else
 	dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
 	dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
-	dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
 	#endif
 
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index c019b7aaf77..c66d163d7a2 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -666,14 +666,9 @@ void __init orion_xor0_init(unsigned long mapbase_low,
 	orion_xor0_shared_resources[3].start = irq_1;
 	orion_xor0_shared_resources[3].end = irq_1;
 
-	/*
-	 * two engines can't do memset simultaneously, this limitation
-	 * satisfied by removing memset support from one of the engines.
-	 */
 	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask);
 	dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask);
 	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask);
 	dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask);
 
@@ -732,14 +727,9 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 	orion_xor1_shared_resources[3].start = irq_1;
 	orion_xor1_shared_resources[3].end = irq_1;
 
-	/*
-	 * two engines can't do memset simultaneously, this limitation
-	 * satisfied by removing memset support from one of the engines.
-	 */
 	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask);
 	dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask);
 	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask);
 	dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask);
 
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 249fe6333e1..6816192a756 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -426,7 +426,7 @@ static void gpio_irq_handler(unsigned irq, struct irq_desc *desc)
 		if (!(cause & (1 << i)))
 			continue;
 
-		type = irqd_get_trigger_type(irq_get_irq_data(irq));
+		type = irq_get_trigger_type(irq);
 		if ((type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) {
 			/* Swap polarity (race with GPIO line) */
 			u32 polarity;
diff --git a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
index d7e17150028..7231c8e4975 100644
--- a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
+++ b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
@@ -285,7 +285,7 @@ static inline int s3c_cpufreq_addfreq(struct cpufreq_frequency_table *table,
 		s3c_freq_dbg("%s: { %d = %u kHz }\n",
 			     __func__, index, freq);
 
-		table[index].index = index;
+		table[index].driver_data = index;
 		table[index].frequency = freq;
 	}
 
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S
index b178d44e9ea..2677bc3762d 100644
--- a/arch/arm/plat-versatile/headsmp.S
+++ b/arch/arm/plat-versatile/headsmp.S
@@ -11,8 +11,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 
-	__INIT
-
 /*
  * Realview/Versatile Express specific entry point for secondary CPUs.
  * This provides a "holding pen" into which all secondary cores are held
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 56b3f6d447a..4143d9b0d87 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,6 +7,7 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
@@ -111,6 +112,11 @@ config ARCH_VEXPRESS
 	  This enables support for the ARMv8 software model (Versatile
 	  Express).
 
+config ARCH_XGENE
+	bool "AppliedMicro X-Gene SOC Family"
+	help
+	  This enables support for AppliedMicro X-Gene SOC Family
+
 endmenu
 
 menu "Bus support"
@@ -148,6 +154,8 @@ config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
 	depends on SMP
+	# These have to remain sorted largest to smallest
+	default "8" if ARCH_XGENE
 	default "4"
 
 source kernel/Kconfig.preempt
@@ -180,8 +188,35 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y if !ARM64_64K_PAGES
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	def_bool y
+
 source "mm/Kconfig"
 
+config XEN_DOM0
+	def_bool y
+	depends on XEN
+
+config XEN
+	bool "Xen guest support on ARM64 (EXPERIMENTAL)"
+	depends on ARM64 && OF
+	help
+	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+	default "11"
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index c95c5cb212f..d90cf79f233 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,6 +37,8 @@ TEXT_OFFSET := 0x00080000
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
+core-$(CONFIG_XEN) += arch/arm64/xen/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
@@ -60,6 +62,10 @@ zinstall install: vmlinux
 dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts dtbs
 
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 68457e9e097..c52bdb051f6 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,4 +1,5 @@
 dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
+dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
 
 targets += dtbs
 targets += $(dtb-y)
diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts
new file mode 100644
index 00000000000..1247ca1200b
--- /dev/null
+++ b/arch/arm64/boot/dts/apm-mustang.dts
@@ -0,0 +1,26 @@
+/*
+ * dts file for AppliedMicro (APM) Mustang Board
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/dts-v1/;
+
+/include/ "apm-storm.dtsi"
+
+/ {
+	model = "APM X-Gene Mustang board";
+	compatible = "apm,mustang", "apm,xgene-storm";
+
+	chosen { };
+
+	memory {
+		device_type = "memory";
+		reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
+	};
+};
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
new file mode 100644
index 00000000000..bfdc5783492
--- /dev/null
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -0,0 +1,116 @@
+/*
+ * dts file for AppliedMicro (APM) X-Gene Storm SOC
+ *
+ * Copyright (C) 2013, Applied Micro Circuits Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/ {
+	compatible = "apm,xgene-storm";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@000 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x000>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@001 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x001>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@100 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x100>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@101 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x101>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@200 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x200>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@201 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x201>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@300 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x300>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+		cpu@301 {
+			device_type = "cpu";
+			compatible = "apm,potenza", "arm,armv8";
+			reg = <0x0 0x301>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0x1 0x0000fff8>;
+		};
+	};
+
+	gic: interrupt-controller@78010000 {
+		compatible = "arm,cortex-a15-gic";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		reg = <0x0 0x78010000 0x0 0x1000>,	/* GIC Dist */
+		      <0x0 0x78020000 0x0 0x1000>,	/* GIC CPU */
+		      <0x0 0x78040000 0x0 0x2000>,	/* GIC VCPU Control */
+		      <0x0 0x78060000 0x0 0x2000>;	/* GIC VCPU */
+		interrupts = <1 9 0xf04>;	/* GIC Maintenence IRQ */
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 0 0xff01>,	/* Secure Phys IRQ */
+			     <1 13 0xff01>,	/* Non-secure Phys IRQ */
+			     <1 14 0xff01>,	/* Virt IRQ */
+			     <1 15 0xff01>;	/* Hyp IRQ */
+		clock-frequency = <50000000>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		serial0: serial@1c020000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0 0x1c020000 0x0 0x1000>;
+			reg-shift = <2>;
+			clock-frequency = <10000000>; /* Updated by bootloader */
+			interrupt-parent = <&gic>;
+			interrupts = <0x0 0x4c 0x4>;
+		};
+	};
+};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 8d9696adb44..5b3e83217b0 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -24,6 +24,7 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_XGENE=y
 CONFIG_SMP=y
 CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_CMDLINE="console=ttyAMA0"
@@ -54,6 +55,9 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_LEGACY_PTY_COUNT=16
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index bf6ab242f04..d56ed11ba9a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void)
 	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
-
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 3300cbd18a8..fea9ee32720 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -123,9 +123,6 @@ static inline void __flush_icache_all(void)
 #define flush_dcache_mmap_unlock(mapping) \
 	spin_unlock_irq(&(mapping)->tree_lock)
 
-#define flush_icache_user_range(vma,page,addr,len) \
-	flush_dcache_page(page)
-
 /*
  * We don't appear to need to do anything here.  In fact, if we did, we'd
  * duplicate cache flushing elsewhere performed by flush_dcache_page().
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index cf2749488cd..5fe138e0b82 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -37,11 +37,14 @@
 })
 
 #define ARM_CPU_IMP_ARM		0x41
+#define ARM_CPU_IMP_APM		0x50
 
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_FOUNDATION	0xD000
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 
+#define APM_CPU_PART_POTENZA	0x0000
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7eaa0b30249..ef8235c68c0 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 }
 #endif
 
+#ifdef CONFIG_COMPAT
+int aarch32_break_handler(struct pt_regs *regs);
+#else
+static int aarch32_break_handler(struct pt_regs *regs)
+{
+	return -EFAULT;
+}
+#endif
+
 #endif	/* __ASSEMBLY */
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 0d8453c755a..cf98b362094 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -18,6 +18,9 @@
 
 struct dev_archdata {
 	struct dma_map_ops *dma_ops;
+#ifdef CONFIG_IOMMU_API
+	void *iommu;			/* private IOMMU data */
+#endif
 };
 
 struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 99477689419..8d1810001ae 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -81,8 +81,12 @@ static inline void dma_mark_clean(void *addr, size_t size)
 {
 }
 
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flags)
+#define dma_alloc_coherent(d, s, h, f)	dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, h, f)	dma_free_attrs(d, s, h, f, NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flags,
+				    struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	void *vaddr;
@@ -90,13 +94,14 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 	if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr))
 		return vaddr;
 
-	vaddr = ops->alloc(dev, size, dma_handle, flags, NULL);
+	vaddr = ops->alloc(dev, size, dma_handle, flags, attrs);
 	debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr);
 	return vaddr;
 }
 
-static inline void dma_free_coherent(struct device *dev, size_t size,
-				     void *vaddr, dma_addr_t dev_addr)
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dev_addr,
+				  struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
@@ -104,7 +109,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 		return;
 
 	debug_dma_free_coherent(dev, size, vaddr, dev_addr);
-	ops->free(dev, size, vaddr, dev_addr, NULL);
+	ops->free(dev, size, vaddr, dev_addr, attrs);
 }
 
 /*
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
new file mode 100644
index 00000000000..5b7ca8ace95
--- /dev/null
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -0,0 +1,117 @@
+/*
+ * arch/arm64/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_HUGETLB_H
+#define __ASM_HUGETLB_H
+
+#include <asm-generic/hugetlb.h>
+#include <asm/page.h>
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
new file mode 100644
index 00000000000..d2c79049ff1
--- /dev/null
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_ARM64_HYPERVISOR_H
+#define _ASM_ARM64_HYPERVISOR_H
+
+#include <asm/xen/hypervisor.h>
+
+#endif
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 2e12258aa7e..1d12f89140b 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -228,10 +228,12 @@ extern void __iounmap(volatile void __iomem *addr);
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
 
 #define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
+#define ioremap_cached(addr, size)	__ioremap((addr), (size), __pgprot(PROT_NORMAL))
 #define iounmap				__iounmap
 
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 00000000000..a5f28e2720c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID		(UL(1) << 33)
+#define HCR_CD		(UL(1) << 32)
+#define HCR_RW_SHIFT	31
+#define HCR_RW		(UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM	(UL(1) << 30)
+#define HCR_HCD		(UL(1) << 29)
+#define HCR_TDZ		(UL(1) << 28)
+#define HCR_TGE		(UL(1) << 27)
+#define HCR_TVM		(UL(1) << 26)
+#define HCR_TTLB	(UL(1) << 25)
+#define HCR_TPU		(UL(1) << 24)
+#define HCR_TPC		(UL(1) << 23)
+#define HCR_TSW		(UL(1) << 22)
+#define HCR_TAC		(UL(1) << 21)
+#define HCR_TIDCP	(UL(1) << 20)
+#define HCR_TSC		(UL(1) << 19)
+#define HCR_TID3	(UL(1) << 18)
+#define HCR_TID2	(UL(1) << 17)
+#define HCR_TID1	(UL(1) << 16)
+#define HCR_TID0	(UL(1) << 15)
+#define HCR_TWE		(UL(1) << 14)
+#define HCR_TWI		(UL(1) << 13)
+#define HCR_DC		(UL(1) << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(UL(1) << 10)
+#define HCR_FB		(UL(1) << 9)
+#define HCR_VA		(UL(1) << 8)
+#define HCR_VI		(UL(1) << 7)
+#define HCR_VF		(UL(1) << 6)
+#define HCR_AMO		(UL(1) << 5)
+#define HCR_IMO		(UL(1) << 4)
+#define HCR_FMO		(UL(1) << 3)
+#define HCR_PTW		(UL(1) << 2)
+#define HCR_SWIO	(UL(1) << 1)
+#define HCR_VM		(UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW:		64bit by default, can be overriden for 32bit VMs
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TSW:		Trap cache operations by set/way
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+			 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE	(1 << 25)
+#define SCTLR_EL2_WXN	(1 << 19)
+#define SCTLR_EL2_I	(1 << 12)
+#define SCTLR_EL2_SA	(1 << 3)
+#define SCTLR_EL2_C	(1 << 2)
+#define SCTLR_EL2_A	(1 << 1)
+#define SCTLR_EL2_M	1
+#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
+			 SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI	(1 << 20)
+#define TCR_EL2_PS	(7 << 16)
+#define TCR_EL2_PS_40B	(2 << 16)
+#define TCR_EL2_TG0	(1 << 14)
+#define TCR_EL2_SH0	(3 << 12)
+#define TCR_EL2_ORGN0	(3 << 10)
+#define TCR_EL2_IRGN0	(3 << 8)
+#define TCR_EL2_T0SZ	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
+			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK	(7 << 16)
+#define VTCR_EL2_PS_40B		(2 << 16)
+#define VTCR_EL2_TG0_MASK	(1 << 14)
+#define VTCR_EL2_TG0_4K		(0 << 14)
+#define VTCR_EL2_TG0_64K	(1 << 14)
+#define VTCR_EL2_SH0_MASK	(3 << 12)
+#define VTCR_EL2_SH0_INNER	(3 << 12)
+#define VTCR_EL2_ORGN0_MASK	(3 << 10)
+#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
+#define VTCR_EL2_IRGN0_MASK	(3 << 8)
+#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
+#define VTCR_EL2_SL0_MASK	(3 << 6)
+#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_T0SZ_40B	24
+
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE	(1 << 16)
+#define HSTR_EL2_T(x)	(1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TFP	(1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA		(1 << 11)
+#define MDCR_EL2_TDOSA		(1 << 10)
+#define MDCR_EL2_TDA		(1 << 9)
+#define MDCR_EL2_TDE		(1 << 8)
+#define MDCR_EL2_HPME		(1 << 7)
+#define MDCR_EL2_TPM		(1 << 6)
+#define MDCR_EL2_TPMCR		(1 << 5)
+#define MDCR_EL2_HPMN_MASK	(0x1F)
+
+/* Exception Syndrome Register (ESR) bits */
+#define ESR_EL2_EC_SHIFT	(26)
+#define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISV_SHIFT	(24)
+#define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_SAS_SHIFT	(22)
+#define ESR_EL2_SAS		(3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SSE		(1 << 21)
+#define ESR_EL2_SRT_SHIFT	(16)
+#define ESR_EL2_SRT_MASK	(0x1f << ESR_EL2_SRT_SHIFT)
+#define ESR_EL2_SF 		(1 << 15)
+#define ESR_EL2_AR 		(1 << 14)
+#define ESR_EL2_EA 		(1 << 9)
+#define ESR_EL2_CM 		(1 << 8)
+#define ESR_EL2_S1PTW 		(1 << 7)
+#define ESR_EL2_WNR		(1 << 6)
+#define ESR_EL2_FSC		(0x3f)
+#define ESR_EL2_FSC_TYPE	(0x3c)
+
+#define ESR_EL2_CV_SHIFT	(24)
+#define ESR_EL2_CV		(1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_COND_SHIFT	(20)
+#define ESR_EL2_COND		(0xfU << ESR_EL2_COND_SHIFT)
+
+
+#define FSC_FAULT	(0x04)
+#define FSC_PERM	(0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~0xFUL)
+
+#define ESR_EL2_EC_UNKNOWN	(0x00)
+#define ESR_EL2_EC_WFI		(0x01)
+#define ESR_EL2_EC_CP15_32	(0x03)
+#define ESR_EL2_EC_CP15_64	(0x04)
+#define ESR_EL2_EC_CP14_MR	(0x05)
+#define ESR_EL2_EC_CP14_LS	(0x06)
+#define ESR_EL2_EC_FP_ASIMD	(0x07)
+#define ESR_EL2_EC_CP10_ID	(0x08)
+#define ESR_EL2_EC_CP14_64	(0x0C)
+#define ESR_EL2_EC_ILL_ISS	(0x0E)
+#define ESR_EL2_EC_SVC32	(0x11)
+#define ESR_EL2_EC_HVC32	(0x12)
+#define ESR_EL2_EC_SMC32	(0x13)
+#define ESR_EL2_EC_SVC64	(0x15)
+#define ESR_EL2_EC_HVC64	(0x16)
+#define ESR_EL2_EC_SMC64	(0x17)
+#define ESR_EL2_EC_SYS64	(0x18)
+#define ESR_EL2_EC_IABT		(0x20)
+#define ESR_EL2_EC_IABT_HYP	(0x21)
+#define ESR_EL2_EC_PC_ALIGN	(0x22)
+#define ESR_EL2_EC_DABT		(0x24)
+#define ESR_EL2_EC_DABT_HYP	(0x25)
+#define ESR_EL2_EC_SP_ALIGN	(0x26)
+#define ESR_EL2_EC_FP_EXC32	(0x28)
+#define ESR_EL2_EC_FP_EXC64	(0x2C)
+#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_BREAKPT	(0x30)
+#define ESR_EL2_EC_BREAKPT_HYP	(0x31)
+#define ESR_EL2_EC_SOFTSTP	(0x32)
+#define ESR_EL2_EC_SOFTSTP_HYP	(0x33)
+#define ESR_EL2_EC_WATCHPT	(0x34)
+#define ESR_EL2_EC_WATCHPT_HYP	(0x35)
+#define ESR_EL2_EC_BKPT32	(0x38)
+#define ESR_EL2_EC_VECTOR32	(0x3A)
+#define ESR_EL2_EC_BRK64	(0x3C)
+
+#define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
+
+#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 00000000000..c92de4163eb
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
+#define	CSSELR_EL1	2	/* Cache Size Selection Register */
+#define	SCTLR_EL1	3	/* System Control Register */
+#define	ACTLR_EL1	4	/* Auxilliary Control Register */
+#define	CPACR_EL1	5	/* Coprocessor Access Control */
+#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
+#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
+#define	TCR_EL1		8	/* Translation Control Register */
+#define	ESR_EL1		9	/* Exception Syndrome Register */
+#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
+#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
+#define	FAR_EL1		12	/* Fault Address Register */
+#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
+#define	VBAR_EL1	14	/* Vector Base Address Register */
+#define	CONTEXTIDR_EL1	15	/* Context ID Register */
+#define	TPIDR_EL0	16	/* Thread ID, User R/W */
+#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
+#define	TPIDR_EL1	18	/* Thread ID, Privileged */
+#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
+#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+/* 32bit specific registers. Keep them at the end of the range */
+#define	DACR32_EL2	21	/* Domain Access Control Register */
+#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	27
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+#define NR_CP15_REGS	(NR_SYS_REGS * 2)
+
+#define ARM_EXCEPTION_IRQ	  0
+#define ARM_EXCEPTION_TRAP	  1
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+extern char __kvm_hyp_code_start[];
+extern char __kvm_hyp_code_end[];
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 00000000000..9a59301cd01
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+	const struct sys_reg_desc *table;
+	size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+	struct kvm_sys_reg_table table64;
+	struct kvm_sys_reg_table table32;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 00000000000..eec07387521
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return kvm_condition_valid32(vcpu);
+
+	return true;
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		kvm_skip_instr32(vcpu, is_wide_instr);
+	else
+		*vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
+	return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+	return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 00000000000..644d7395686
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#define KVM_MAX_VCPUS 4
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 2
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x)	0
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+
+struct kvm_vcpu;
+int kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* 1-level 2nd stage table and lock */
+	spinlock_t pgd_lock;
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
+	u64    vttbr;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
+
+	/* Timer */
+	struct arch_timer_kvm	timer;
+};
+
+#define KVM_NR_MEM_OBJS     40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 far_el2;		/* Hyp Fault Address Register */
+	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+	struct kvm_regs	gp_regs;
+	union {
+		u64 sys_regs[NR_SYS_REGS];
+		u32 cp15[NR_CP15_REGS];
+	};
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+	struct kvm_cpu_context ctxt;
+
+	/* HYP configuration */
+	u64 hcr_el2;
+
+	/* Exception Information */
+	struct kvm_vcpu_fault_info fault;
+
+	/* Pointer to host CPU context */
+	kvm_cpu_context_t *host_cpu_context;
+
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+	struct arch_timer_cpu timer_cpu;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+	/* dcache set/way operation pending */
+	int last_pcpu;
+	cpumask_t require_dcache_flush;
+
+	/* Don't run the guest */
+	bool pause;
+
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
+	/* Interrupt related fields */
+	u64 irq_lines;		/* IRQ and FIQ levels */
+
+	/* Cache some mmu pages needed inside spinlock regions */
+	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Target CPU and feature flags */
+	u32 target;
+	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
+};
+
+#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_wakeup;
+};
+
+struct kvm_vcpu_init;
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+struct kvm_one_reg;
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code.
+	 */
+	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		     hyp_stack_ptr, vector_ptr);
+}
+
+#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 00000000000..fc2f689c069
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	u8		data[8];
+	u32		len;
+	bool		is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	run->mmio.phys_addr	= mmio->phys_addr;
+	run->mmio.len		= mmio->len;
+	run->mmio.is_write	= mmio->is_write;
+	memcpy(run->mmio.data, mmio->data, mmio->len);
+	run->exit_reason	= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 00000000000..efe609c6a3c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
+#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va	reg
+	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+
+#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * Align KVM with the kernel's view of physical memory. Should be
+ * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ */
+#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+/* Make sure we get the right size, and thus the right alignment */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
+#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+
+static inline bool kvm_is_write_fault(unsigned long esr)
+{
+	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
+
+	if (esr_ec == ESR_EL2_EC_IABT)
+		return false;
+
+	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
+		return false;
+
+	return true;
+}
+
+static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+struct kvm;
+
+static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+	if (!icache_is_aliasing()) {		/* PIPT */
+		unsigned long hva = gfn_to_hva(kvm, gfn);
+		flush_icache_range(hva, hva + PAGE_SIZE);
+	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 00000000000..e301a481635
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+bool kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 381f556b664..20925bcf4e2 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -90,6 +90,12 @@
 #define MT_NORMAL_NC		3
 #define MT_NORMAL		4
 
+/*
+ * Memory types for Stage-2 translation
+ */
+#define MT_S2_NORMAL		0xf
+#define MT_S2_DEVICE_nGnRE	0x1
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index e2bc385adb6..a9eee33dfa6 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
-#ifdef CONFIG_SMP
-	/* check for possible thread migration */
-	if (!cpumask_empty(mm_cpumask(next)) &&
-	    !cpumask_test_cpu(cpu, mm_cpumask(next)))
-		__flush_icache_all();
-#endif
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
 		check_and_switch_context(next, tsk);
 }
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 75fd13d289b..e182a356c97 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -25,16 +25,27 @@
 /*
  * Hardware page table definitions.
  *
+ * Level 1 descriptor (PUD).
+ */
+
+#define PUD_TABLE_BIT		(_AT(pgdval_t, 1) << 1)
+
+/*
  * Level 2 descriptor (PMD).
  */
 #define PMD_TYPE_MASK		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 
 /*
  * Section
  */
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_PROT_NONE	(_AT(pmdval_t, 1) << 2)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_NG		(_AT(pmdval_t, 1) << 11)
@@ -53,6 +64,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
 #define PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
@@ -68,6 +80,24 @@
 #define PTE_ATTRINDX_MASK	(_AT(pteval_t, 7) << 2)
 
 /*
+ * 2nd stage PTE definitions
+ */
+#define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
+#define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+
+/*
+ * Memory Attribute override for Stage-2 (MemAttr[3:0])
+ */
+#define PTE_S2_MEMATTR(t)	(_AT(pteval_t, (t)) << 2)
+#define PTE_S2_MEMATTR_MASK	(_AT(pteval_t, 0xf) << 2)
+
+/*
+ * EL2/HYP PTE/PMD definitions
+ */
+#define PMD_HYP			PMD_SECT_USER
+#define PTE_HYP			PTE_USER
+
+/*
  * 40-bit physical address supported.
  */
 #define PHYS_MASK_SHIFT		(40)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3a768e96cf0..f0bebc5e22c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -25,8 +25,8 @@
  * Software defined PTE bits definition.
  */
 #define PTE_VALID		(_AT(pteval_t, 1) << 0)
-#define PTE_PROT_NONE		(_AT(pteval_t, 1) << 1)	/* only when !PTE_VALID */
-#define PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !pte_present() */
+#define PTE_PROT_NONE		(_AT(pteval_t, 1) << 2)	/* only when !PTE_VALID */
+#define PTE_FILE		(_AT(pteval_t, 1) << 3)	/* only when !pte_present() */
 #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
 
@@ -66,7 +66,7 @@ extern pgprot_t pgprot_default;
 
 #define _MOD_PROT(p, b)		__pgprot_modify(p, 0, b)
 
-#define PAGE_NONE		__pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE)
+#define PAGE_NONE		__pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
 #define PAGE_COPY		_MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
@@ -76,7 +76,13 @@ extern pgprot_t pgprot_default;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY)
 #define PAGE_KERNEL_EXEC	_MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY)
 
-#define __PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE)
+#define PAGE_HYP		_MOD_PROT(pgprot_default, PTE_HYP)
+#define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+
+#define PAGE_S2			__pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
+
+#define __PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 #define __PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
 #define __PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
 #define __PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
@@ -119,7 +125,7 @@ extern struct page *empty_zero_page;
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + __pte_index(addr))
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
@@ -173,12 +179,76 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 /*
  * Huge pte definitions.
  */
-#define pte_huge(pte)		((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE)
-#define pte_mkhuge(pte)		(__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE))
+#define pte_huge(pte)		(!(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+/*
+ * Hugetlb definitions.
+ */
+#define HUGE_MAX_HSTATE		2
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 
 #define __HAVE_ARCH_PTE_SPECIAL
 
 /*
+ * Software PMD bits for THP
+ */
+
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 57)
+
+/*
+ * THP definitions.
+ */
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
+			      PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
+			      PMD_SECT_VALID;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+#define set_pmd_at(mm, addr, pmdp, pmd)	set_pmd(pmdp, pmd)
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
+/*
  * Mark the prot value as uncacheable and unbufferable.
  */
 #define pgprot_noncached(prot) \
@@ -197,6 +267,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 #define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
 
+#define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_TABLE)
+#define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+				 PMD_TYPE_SECT)
+
+
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
@@ -263,7 +339,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #endif
 
 /* Find an entry in the third-level page table.. */
-#define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
@@ -281,12 +357,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 
 /*
  * Encode and decode a swap entry:
- *	bits 0-1:	present (must be zero)
- *	bit  2:		PTE_FILE
- *	bits 3-8:	swap type
+ *	bits 0, 2:	present (must both be zero)
+ *	bit  3:		PTE_FILE
+ *	bits 4-8:	swap type
  *	bits 9-63:	swap offset
  */
-#define __SWP_TYPE_SHIFT	3
+#define __SWP_TYPE_SHIFT	4
 #define __SWP_TYPE_BITS		6
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -306,15 +382,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
 
 /*
  * Encode and decode a file entry:
- *	bits 0-1:	present (must be zero)
- *	bit  2:		PTE_FILE
- *	bits 3-63:	file offset / PAGE_SIZE
+ *	bits 0, 2:	present (must both be zero)
+ *	bit  3:		PTE_FILE
+ *	bits 4-63:	file offset / PAGE_SIZE
  */
 #define pte_file(pte)		(pte_val(pte) & PTE_FILE)
-#define pte_to_pgoff(x)		(pte_val(x) >> 3)
-#define pgoff_to_pte(x)		__pte(((x) << 3) | PTE_FILE)
+#define pte_to_pgoff(x)		(pte_val(x) >> 4)
+#define pgoff_to_pte(x)		__pte(((x) << 4) | PTE_FILE)
 
-#define PTE_FILE_MAX_BITS	61
+#define PTE_FILE_MAX_BITS	60
 
 extern int kern_addr_valid(unsigned long addr);
 
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 41a71ee4c3d..0dacbbf9458 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
-extern int aarch32_break_trap(struct pt_regs *regs);
-
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 7065e920149..0defa0728a9 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -59,9 +59,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	unsigned int tmp;
 
 	asm volatile(
-	"	ldaxr	%w0, %1\n"
+	"2:	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1f\n"
 	"	stxr	%w0, %w2, %1\n"
+	"	cbnz	%w0, 2b\n"
 	"1:\n"
 	: "=&r" (tmp), "+Q" (lock->lock)
 	: "r" (1)
diff --git a/arch/arm64/include/asm/sync_bitops.h b/arch/arm64/include/asm/sync_bitops.h
new file mode 100644
index 00000000000..8da0bf4f765
--- /dev/null
+++ b/arch/arm64/include/asm/sync_bitops.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_SYNC_BITOPS_H__
+#define __ASM_SYNC_BITOPS_H__
+
+#include <asm/bitops.h>
+#include <asm/cmpxchg.h>
+
+/* sync_bitops functions are equivalent to the SMP implementation of the
+ * original functions, independently from CONFIG_SMP being defined.
+ *
+ * We need them because _set_bit etc are not SMP safe if !CONFIG_SMP. But
+ * under Xen you might be communicating with a completely external entity
+ * who might be on another CPU (e.g. two uniprocessor guests communicating
+ * via event channels and grant tables). So we need a variant of the bit
+ * ops which are SMP safe even on a UP kernel.
+ */
+
+#define sync_set_bit(nr, p)            set_bit(nr, p)
+#define sync_clear_bit(nr, p)          clear_bit(nr, p)
+#define sync_change_bit(nr, p)         change_bit(nr, p)
+#define sync_test_and_set_bit(nr, p)   test_and_set_bit(nr, p)
+#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p)
+#define sync_test_and_change_bit(nr, p)        test_and_change_bit(nr, p)
+#define sync_test_bit(nr, addr)                test_bit(nr, addr)
+#define sync_cmpxchg                   cmpxchg
+
+#endif
diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h
index b24a31a7e2c..81a076eb37f 100644
--- a/arch/arm64/include/asm/timex.h
+++ b/arch/arm64/include/asm/timex.h
@@ -16,14 +16,14 @@
 #ifndef __ASM_TIMEX_H
 #define __ASM_TIMEX_H
 
+#include <asm/arch_timer.h>
+
 /*
  * Use the current timer as a cycle counter since this is what we use for
  * the delay loop.
  */
-#define get_cycles()	({ cycles_t c; read_current_timer(&c); c; })
+#define get_cycles()	arch_counter_get_cntvct()
 
 #include <asm-generic/timex.h>
 
-#define ARCH_HAS_READ_CURRENT_TIMER
-
 #endif
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 654f0968030..46b3beb4b77 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 122d6320f74..8b482035cfc 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	dsb();
 }
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 008f8481da6..edb3d5c73a3 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -166,7 +166,7 @@ do {									\
 
 #define get_user(x, ptr)						\
 ({									\
-	might_sleep();							\
+	might_fault();							\
 	access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ?			\
 		__get_user((x), (ptr)) :				\
 		((x) = 0, -EFAULT);					\
@@ -227,7 +227,7 @@ do {									\
 
 #define put_user(x, ptr)						\
 ({									\
-	might_sleep();							\
+	might_fault();							\
 	access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ?		\
 		__put_user((x), (ptr)) :				\
 		-EFAULT;						\
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h
new file mode 100644
index 00000000000..86553213c13
--- /dev/null
+++ b/arch/arm64/include/asm/xen/events.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_ARM64_XEN_EVENTS_H
+#define _ASM_ARM64_XEN_EVENTS_H
+
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+
+enum ipi_vector {
+	XEN_PLACEHOLDER_VECTOR,
+
+	/* Xen IPIs go here */
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return raw_irqs_disabled_flags((unsigned long) regs->pstate);
+}
+
+#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+
+#endif /* _ASM_ARM64_XEN_EVENTS_H */
diff --git a/arch/arm64/include/asm/xen/hypercall.h b/arch/arm64/include/asm/xen/hypercall.h
new file mode 100644
index 00000000000..74b0c423ff5
--- /dev/null
+++ b/arch/arm64/include/asm/xen/hypercall.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/hypercall.h>
diff --git a/arch/arm64/include/asm/xen/hypervisor.h b/arch/arm64/include/asm/xen/hypervisor.h
new file mode 100644
index 00000000000..f263da8e876
--- /dev/null
+++ b/arch/arm64/include/asm/xen/hypervisor.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/hypervisor.h>
diff --git a/arch/arm64/include/asm/xen/interface.h b/arch/arm64/include/asm/xen/interface.h
new file mode 100644
index 00000000000..44457aebeed
--- /dev/null
+++ b/arch/arm64/include/asm/xen/interface.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/interface.h>
diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h
new file mode 100644
index 00000000000..bed87ec3678
--- /dev/null
+++ b/arch/arm64/include/asm/xen/page.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/xen/page.h>
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 00000000000..5031f426393
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+
+#define KVM_ARM_NUM_TARGETS		3
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX		127
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		0
+#define KVM_PSCI_RET_NI			((unsigned long)-1)
+#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
+#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a2a4d810bea..49c162c03b6 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,5 +104,38 @@ int main(void)
   BLANK();
   DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
+  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
+  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
+  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
+  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
+  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
+  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
+  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
+  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
+  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
+  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
+  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
+  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index f4726dc054b..08018e3df58 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/ptrace.h>
 #include <linux/stat.h>
+#include <linux/uaccess.h>
 
 #include <asm/debug-monitors.h>
 #include <asm/local.h>
@@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
 	return 0;
 }
 
-static int __init single_step_init(void)
+static int brk_handler(unsigned long addr, unsigned int esr,
+		       struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if (!user_mode(regs))
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = (void __user *)instruction_pointer(regs),
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+int aarch32_break_handler(struct pt_regs *regs)
+{
+	siginfo_t info;
+	unsigned int instr;
+	bool bp = false;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!compat_user_mode(regs))
+		return -EFAULT;
+
+	if (compat_thumb_mode(regs)) {
+		/* get 16-bit Thumb instruction */
+		get_user(instr, (u16 __user *)pc);
+		if (instr == AARCH32_BREAK_THUMB2_LO) {
+			/* get second half of 32-bit Thumb-2 instruction */
+			get_user(instr, (u16 __user *)(pc + 2));
+			bp = instr == AARCH32_BREAK_THUMB2_HI;
+		} else {
+			bp = instr == AARCH32_BREAK_THUMB;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		get_user(instr, (u32 __user *)pc);
+		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
+	}
+
+	if (!bp)
+		return -EFAULT;
+
+	info = (siginfo_t) {
+		.si_signo = SIGTRAP,
+		.si_errno = 0,
+		.si_code  = TRAP_BRKPT,
+		.si_addr  = pc,
+	};
+
+	force_sig_info(SIGTRAP, &info, current);
+	return 0;
+}
+
+static int __init debug_traps_init(void)
 {
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_HWBKPT, "single-step handler");
+	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
+			      TRAP_BRKPT, "ptrace BRK handler");
 	return 0;
 }
-arch_initcall(single_step_init);
+arch_initcall(debug_traps_init);
 
 /* Re-enable single step for syscall restarting. */
 void user_rewind_single_step(struct task_struct *task)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6e1e77f1831..fecdbf7de82 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child)
 {
 }
 
-/*
- * Handle hitting a breakpoint.
- */
-static int ptrace_break(struct pt_regs *regs)
-{
-	siginfo_t info = {
-		.si_signo = SIGTRAP,
-		.si_errno = 0,
-		.si_code  = TRAP_BRKPT,
-		.si_addr  = (void __user *)instruction_pointer(regs),
-	};
-
-	force_sig_info(SIGTRAP, &info, current);
-	return 0;
-}
-
-static int arm64_break_trap(unsigned long addr, unsigned int esr,
-			    struct pt_regs *regs)
-{
-	return ptrace_break(regs);
-}
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 /*
  * Handle hitting a HW-breakpoint.
@@ -817,33 +795,6 @@ static const struct user_regset_view user_aarch32_view = {
 	.regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets)
 };
 
-int aarch32_break_trap(struct pt_regs *regs)
-{
-	unsigned int instr;
-	bool bp = false;
-	void __user *pc = (void __user *)instruction_pointer(regs);
-
-	if (compat_thumb_mode(regs)) {
-		/* get 16-bit Thumb instruction */
-		get_user(instr, (u16 __user *)pc);
-		if (instr == AARCH32_BREAK_THUMB2_LO) {
-			/* get second half of 32-bit Thumb-2 instruction */
-			get_user(instr, (u16 __user *)(pc + 2));
-			bp = instr == AARCH32_BREAK_THUMB2_HI;
-		} else {
-			bp = instr == AARCH32_BREAK_THUMB;
-		}
-	} else {
-		/* 32-bit ARM instruction */
-		get_user(instr, (u32 __user *)pc);
-		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM;
-	}
-
-	if (bp)
-		return ptrace_break(regs);
-	return 1;
-}
-
 static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
 				   compat_ulong_t __user *ret)
 {
@@ -1111,16 +1062,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ptrace_request(child, request, addr, data);
 }
 
-
-static int __init ptrace_break_init(void)
-{
-	hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP,
-			      TRAP_BRKPT, "ptrace BRK handler");
-	return 0;
-}
-core_initcall(ptrace_break_init);
-
-
 asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
 {
 	unsigned long saved_reg;
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index a551f88ae2c..03dc3718eb1 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void)
 	return arch_timer_read_counter() * sched_clock_mult;
 }
 
-int read_current_timer(unsigned long *timer_value)
-{
-	*timer_value = arch_timer_read_counter();
-	return 0;
-}
-
 void __init time_init(void)
 {
 	u32 arch_timer_rate;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f30852d2859..7ffadddb645 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
+#include <asm/debug-monitors.h>
 #include <asm/traps.h>
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
@@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
-#ifdef CONFIG_COMPAT
 	/* check for AArch32 breakpoint instructions */
-	if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0)
+	if (!aarch32_break_handler(regs))
 		return;
-#endif
 
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3fae2be8b01..f5e55747242 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,19 @@ ENTRY(stext)
 
 jiffies = jiffies_64;
 
+#define HYPERVISOR_TEXT					\
+	/*						\
+	 * Force the alignment to be compatible with	\
+	 * the vectors requirements			\
+	 */						\
+	. = ALIGN(2048);				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	*(.hyp.idmap.text)				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
+	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	*(.hyp.text)					\
+	VMLINUX_SYMBOL(__hyp_text_end) = .;
+
 SECTIONS
 {
 	/*
@@ -49,6 +62,7 @@ SECTIONS
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			HYPERVISOR_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
@@ -56,7 +70,7 @@ SECTIONS
 	}
 
 	RO_DATA(PAGE_SIZE)
-
+	EXCEPTION_TABLE(8)
 	_etext = .;			/* End of text and rodata section */
 
 	. = ALIGN(PAGE_SIZE);
@@ -99,14 +113,6 @@ SECTIONS
 		READ_MOSTLY_DATA(64)
 
 		/*
-		 * The exception fixup table (might need resorting at runtime)
-		 */
-		. = ALIGN(32);
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-
-		/*
 		 * and the usual data section
 		 */
 		DATA_DATA
@@ -124,3 +130,9 @@ SECTIONS
 	STABS_DEBUG
 	.comment 0 : { *(.comment) }
 }
+
+/*
+ * The HYP init code can't be more than a page long.
+ */
+ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
+       "HYP init code too big")
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 00000000000..72a9fd583ad
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,23 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 00000000000..124418d1704
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,158 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+	if (esr & ESR_EL2_CV)
+		return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT;
+
+	return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr;
+	u32 cpsr_cond;
+	int cond;
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	/* Is condition field valid? */
+	cond = kvm_vcpu_get_condition(vcpu);
+	if (cond == 0xE)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	if (cond < 0) {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	cpsr_cond = cpsr >> 28;
+
+	if (!((cc_map[cond] >> cpsr_cond) & 1))
+		return false;
+
+	return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+	if (!(cpsr & COMPAT_PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~COMPAT_PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 00000000000..2c3ff67a8ec
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/*
+	 * Because the kvm_regs structure is a mix of 32, 64 and
+	 * 128bit fields, we index it as if it was a 32bit
+	 * array. Hence below, nr_regs is the number of entries, and
+	 * off the index in the "array".
+	 */
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	u32 off;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	__uint128_t tmp;
+	void *valp = &tmp;
+	u64 off;
+	int err = 0;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+		switch (mode) {
+		case COMPAT_PSR_MODE_USR:
+		case COMPAT_PSR_MODE_FIQ:
+		case COMPAT_PSR_MODE_IRQ:
+		case COMPAT_PSR_MODE_SVC:
+		case COMPAT_PSR_MODE_ABT:
+		case COMPAT_PSR_MODE_UND:
+		case PSR_MODE_EL0t:
+		case PSR_MODE_EL1t:
+		case PSR_MODE_EL1h:
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+	return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+
+	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+		if (put_user(core_reg | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we want a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return get_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we set a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return set_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	if (implementor != ARM_CPU_IMP_ARM)
+		return -EINVAL;
+
+	switch (part_number) {
+	case ARM_CPU_PART_AEM_V8:
+		return KVM_ARM_TARGET_AEM_V8;
+	case ARM_CPU_PART_FOUNDATION:
+		return KVM_ARM_TARGET_FOUNDATION_V8;
+	case ARM_CPU_PART_CORTEX_A57:
+		/* Currently handled by the generic backend */
+		return KVM_ARM_TARGET_CORTEX_A57;
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	vcpu->arch.target = phys_target;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		if (init->features[i / 32] & (1 << (i % 32))) {
+			if (i >= KVM_VCPU_MAX_FEATURES)
+				return -ENOENT;
+			set_bit(i, vcpu->arch.features);
+		}
+	}
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 00000000000..9beaca03343
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (kvm_psci_call(vcpu))
+		return 1;
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (kvm_psci_call(vcpu))
+		return 1;
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu:	the vcpu pointer
+ *
+ * Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_vcpu_block(vcpu);
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
+	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_HVC32]	= handle_hvc,
+	[ESR_EL2_EC_SMC32]	= handle_smc,
+	[ESR_EL2_EC_HVC64]	= handle_hvc,
+	[ESR_EL2_EC_SMC64]	= handle_smc,
+	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
+	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unkown exception class: hsr: %#08x\n",
+			(unsigned int)kvm_vcpu_get_hsr(vcpu));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_TRAP:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 00000000000..ba84e6705e2
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.idmap.text, "ax"
+
+	.align	11
+
+ENTRY(__kvm_hyp_init)
+	ventry	__invalid		// Synchronous EL2t
+	ventry	__invalid		// IRQ EL2t
+	ventry	__invalid		// FIQ EL2t
+	ventry	__invalid		// Error EL2t
+
+	ventry	__invalid		// Synchronous EL2h
+	ventry	__invalid		// IRQ EL2h
+	ventry	__invalid		// FIQ EL2h
+	ventry	__invalid		// Error EL2h
+
+	ventry	__do_hyp_init		// Synchronous 64-bit EL1
+	ventry	__invalid		// IRQ 64-bit EL1
+	ventry	__invalid		// FIQ 64-bit EL1
+	ventry	__invalid		// Error 64-bit EL1
+
+	ventry	__invalid		// Synchronous 32-bit EL1
+	ventry	__invalid		// IRQ 32-bit EL1
+	ventry	__invalid		// FIQ 32-bit EL1
+	ventry	__invalid		// Error 32-bit EL1
+
+__invalid:
+	b	.
+
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP pgd
+	 * x2: HYP stack
+	 * x3: HYP vectors
+	 */
+__do_hyp_init:
+
+	msr	ttbr0_el2, x0
+
+	mrs	x4, tcr_el1
+	ldr	x5, =TCR_EL2_MASK
+	and	x4, x4, x5
+	ldr	x5, =TCR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	tcr_el2, x4
+
+	ldr	x4, =VTCR_EL2_FLAGS
+	msr	vtcr_el2, x4
+
+	mrs	x4, mair_el1
+	msr	mair_el2, x4
+	isb
+
+	mov	x4, #SCTLR_EL2_FLAGS
+	msr	sctlr_el2, x4
+	isb
+
+	/* MMU is now enabled. Get ready for the trampoline dance */
+	ldr	x4, =TRAMPOLINE_VA
+	adr	x5, target
+	bfi	x4, x5, #0, #PAGE_SHIFT
+	br	x4
+
+target: /* We're now in the trampoline code, switch page tables */
+	msr	ttbr0_el2, x1
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Set the stack and new vectors */
+	kern_hyp_va	x2
+	mov	sp, x2
+	kern_hyp_va	x3
+	msr	vbar_el2, x3
+
+	/* Hello, World! */
+	eret
+ENDPROC(__kvm_hyp_init)
+
+	.ltorg
+
+	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 00000000000..ff985e3d8b7
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,831 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+	.align	PAGE_SHIFT
+
+__kvm_hyp_code_start:
+	.globl __kvm_hyp_code_start
+
+.macro save_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	stp	x19, x20, [x3]
+	stp	x21, x22, [x3, #16]
+	stp	x23, x24, [x3, #32]
+	stp	x25, x26, [x3, #48]
+	stp	x27, x28, [x3, #64]
+	stp	x29, lr, [x3, #80]
+
+	mrs	x19, sp_el0
+	mrs	x20, elr_el2		// EL1 PC
+	mrs	x21, spsr_el2		// EL1 pstate
+
+	stp	x19, x20, [x3, #96]
+	str	x21, [x3, #112]
+
+	mrs	x22, sp_el1
+	mrs	x23, elr_el1
+	mrs	x24, spsr_el1
+
+	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+	msr	sp_el1, x22
+	msr	elr_el1, x23
+	msr	spsr_el1, x24
+
+	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
+	ldp	x19, x20, [x3]
+	ldr	x21, [x3, #16]
+
+	msr	sp_el0, x19
+	msr	elr_el2, x20 				// EL1 PC
+	msr	spsr_el2, x21 				// EL1 pstate
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	ldp	x19, x20, [x3]
+	ldp	x21, x22, [x3, #16]
+	ldp	x23, x24, [x3, #32]
+	ldp	x25, x26, [x3, #48]
+	ldp	x27, x28, [x3, #64]
+	ldp	x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+	save_common_regs
+.endm
+
+.macro restore_host_regs
+	restore_common_regs
+.endm
+
+.macro save_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+	// x0 is the vcpu address
+	// x1 is the return code, do not corrupt!
+	// x2 is the cpu context
+	// x3 is a tmp register
+	// Guest's x0-x3 are on the stack
+
+	// Compute base to save registers
+	add	x3, x2, #CPU_XREG_OFFSET(4)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	str	x18, [x3, #112]
+
+	pop	x6, x7			// x2, x3
+	pop	x4, x5			// x0, x1
+
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	save_common_regs
+.endm
+
+.macro restore_guest_regs
+	// x0 is the vcpu address.
+	// x2 is the cpu context
+	// x3 is a tmp register
+
+	// Prepare x0-x3 for later restore
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	push	x4, x5		// Push x0-x3 on the stack
+	push	x6, x7
+
+	// x4-x18
+	ldp	x4, x5, [x3, #32]
+	ldp	x6, x7, [x3, #48]
+	ldp	x8, x9, [x3, #64]
+	ldp	x10, x11, [x3, #80]
+	ldp	x12, x13, [x3, #96]
+	ldp	x14, x15, [x3, #112]
+	ldp	x16, x17, [x3, #128]
+	ldr	x18, [x3, #144]
+
+	// x19-x29, lr, sp*, elr*, spsr*
+	restore_common_regs
+
+	// Last bits of the 64bit state
+	pop	x2, x3
+	pop	x0, x1
+
+	// Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	mrs	x4,	vmpidr_el2
+	mrs	x5,	csselr_el1
+	mrs	x6,	sctlr_el1
+	mrs	x7,	actlr_el1
+	mrs	x8,	cpacr_el1
+	mrs	x9,	ttbr0_el1
+	mrs	x10,	ttbr1_el1
+	mrs	x11,	tcr_el1
+	mrs	x12,	esr_el1
+	mrs	x13, 	afsr0_el1
+	mrs	x14,	afsr1_el1
+	mrs	x15,	far_el1
+	mrs	x16,	mair_el1
+	mrs	x17,	vbar_el1
+	mrs	x18,	contextidr_el1
+	mrs	x19,	tpidr_el0
+	mrs	x20,	tpidrro_el0
+	mrs	x21,	tpidr_el1
+	mrs	x22, 	amair_el1
+	mrs	x23, 	cntkctl_el1
+
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	stp	x18, x19, [x3, #112]
+	stp	x20, x21, [x3, #128]
+	stp	x22, x23, [x3, #144]
+.endm
+
+.macro restore_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	ldp	x8, x9, [x3, #32]
+	ldp	x10, x11, [x3, #48]
+	ldp	x12, x13, [x3, #64]
+	ldp	x14, x15, [x3, #80]
+	ldp	x16, x17, [x3, #96]
+	ldp	x18, x19, [x3, #112]
+	ldp	x20, x21, [x3, #128]
+	ldp	x22, x23, [x3, #144]
+
+	msr	vmpidr_el2,	x4
+	msr	csselr_el1,	x5
+	msr	sctlr_el1,	x6
+	msr	actlr_el1,	x7
+	msr	cpacr_el1,	x8
+	msr	ttbr0_el1,	x9
+	msr	ttbr1_el1,	x10
+	msr	tcr_el1,	x11
+	msr	esr_el1,	x12
+	msr	afsr0_el1,	x13
+	msr	afsr1_el1,	x14
+	msr	far_el1,	x15
+	msr	mair_el1,	x16
+	msr	vbar_el1,	x17
+	msr	contextidr_el1,	x18
+	msr	tpidr_el0,	x19
+	msr	tpidrro_el0,	x20
+	msr	tpidr_el1,	x21
+	msr	amair_el1,	x22
+	msr	cntkctl_el1,	x23
+.endm
+
+.macro skip_32bit_state tmp, target
+	// Skip 32bit state if not needed
+	mrs	\tmp, hcr_el2
+	tbnz	\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+	// Skip ThumbEE state if not needed
+	mrs	\tmp, id_pfr0_el1
+	tbz	\tmp, #12, \target
+.endm
+
+.macro save_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	mrs	x4, spsr_abt
+	mrs	x5, spsr_und
+	mrs	x6, spsr_irq
+	mrs	x7, spsr_fiq
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	mrs	x4, dacr32_el2
+	mrs	x5, ifsr32_el2
+	mrs	x6, fpexc32_el2
+	mrs	x7, dbgvcr32_el2
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	mrs	x4, teecr32_el1
+	mrs	x5, teehbr32_el1
+	stp	x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	spsr_abt, x4
+	msr	spsr_und, x5
+	msr	spsr_irq, x6
+	msr	spsr_fiq, x7
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	dacr32_el2, x4
+	msr	ifsr32_el2, x5
+	msr	fpexc32_el2, x6
+	msr	dbgvcr32_el2, x7
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	ldp	x4, x5, [x3]
+	msr	teecr32_el1, x4
+	msr	teehbr32_el1, x5
+1:
+.endm
+
+.macro activate_traps
+	ldr	x2, [x0, #VCPU_IRQ_LINES]
+	ldr	x1, [x0, #VCPU_HCR_EL2]
+	orr	x2, x2, x1
+	msr	hcr_el2, x2
+
+	ldr	x2, =(CPTR_EL2_TTA)
+	msr	cptr_el2, x2
+
+	ldr	x2, =(1 << 15)	// Trap CP15 Cr=15
+	msr	hstr_el2, x2
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	msr	mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+	mov	x2, #HCR_RW
+	msr	hcr_el2, x2
+	msr	cptr_el2, xzr
+	msr	hstr_el2, xzr
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	msr	mdcr_el2, x2
+.endm
+
+.macro activate_vm
+	ldr	x1, [x0, #VCPU_KVM]
+	kern_hyp_va	x1
+	ldr	x2, [x1, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+	msr	vttbr_el2, xzr
+.endm
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+
+	str	w4, [x3, #VGIC_CPU_HCR]
+	str	w5, [x3, #VGIC_CPU_VMCR]
+	str	w6, [x3, #VGIC_CPU_MISR]
+	str	w7, [x3, #VGIC_CPU_EISR]
+	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x2], #4
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro restore_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_CPU_HCR]
+	ldr	w5, [x3, #VGIC_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_CPU_APR]
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x3], #4
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
+.macro save_timer_state
+	// x0: vcpu pointer
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	mrs	x3, cntv_ctl_el0
+	and	x3, x3, #3
+	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
+	bic	x3, x3, #1		// Clear Enable
+	msr	cntv_ctl_el0, x3
+
+	isb
+
+	mrs	x3, cntv_cval_el0
+	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+	// Allow physical timer/counter access for the host
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #3
+	msr	cnthctl_el2, x2
+
+	// Clear cntvoff for the host
+	msr	cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+	// x0: vcpu pointer
+	// Disallow physical timer access for the guest
+	// Physical counter access is allowed
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #1
+	bic	x2, x2, #2
+	msr	cnthctl_el2, x2
+
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
+	msr	cntvoff_el2, x3
+	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+	msr	cntv_cval_el0, x2
+	isb
+
+	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
+	and	x2, x2, #3
+	msr	cntv_ctl_el0, x2
+1:
+.endm
+
+__save_sysregs:
+	save_sysregs
+	ret
+
+__restore_sysregs:
+	restore_sysregs
+	ret
+
+__save_fpsimd:
+	save_fpsimd
+	ret
+
+__restore_fpsimd:
+	restore_fpsimd
+	ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+	kern_hyp_va	x0
+	msr	tpidr_el2, x0	// Save the vcpu register
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	save_host_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	activate_traps
+	activate_vm
+
+	restore_vgic_state
+	restore_timer_state
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_guest_32bit_state
+	restore_guest_regs
+
+	// That's it, no more messing around.
+	eret
+
+__kvm_vcpu_return:
+	// Assume x0 is the vcpu pointer, x1 the return code
+	// Guest's x0-x3 are on the stack
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	save_guest_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+	save_guest_32bit_state
+
+	save_timer_state
+	save_vgic_state
+
+	deactivate_traps
+	deactivate_vm
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_host_regs
+
+	mov	x0, x1
+	ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+	kern_hyp_va	x0
+	ldr	x2, [x0, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+	isb
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	tlbi	ipas2e1is, x1
+	dsb	sy
+	tlbi	vmalle1is
+	dsb	sy
+	isb
+
+	msr	vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+ENTRY(__kvm_flush_vm_context)
+	tlbi	alle1is
+	ic	ialluis
+	dsb	sy
+	ret
+ENDPROC(__kvm_flush_vm_context)
+
+__kvm_hyp_panic:
+	// Guess the context by looking at VTTBR:
+	// If zero, then we're already a host.
+	// Otherwise restore a minimal host context before panicing.
+	mrs	x0, vttbr_el2
+	cbz	x0, 1f
+
+	mrs	x0, tpidr_el2
+
+	deactivate_traps
+	deactivate_vm
+
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+
+1:	adr	x0, __hyp_panic_str
+	adr	x1, 2f
+	ldp	x2, x3, [x1]
+	sub	x0, x0, x2
+	add	x0, x0, x3
+	mrs	x1, spsr_el2
+	mrs	x2, elr_el2
+	mrs	x3, esr_el2
+	mrs	x4, far_el2
+	mrs	x5, hpfar_el2
+	mrs	x6, par_el1
+	mrs	x7, tpidr_el2
+
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+
+	.align	3
+2:	.quad	HYP_PAGE_OFFSET
+	.quad	PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+	.align	2
+
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector	label, target
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
+	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_error_invalid, __kvm_hyp_panic
+
+el1_sync:					// Guest trapped into EL2
+	push	x0, x1
+	push	x2, x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_EL2_EC_SHIFT
+
+	cmp	x2, #ESR_EL2_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap			// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	pop	x2, x3
+	pop	x0, x1
+
+	push	lr, xzr
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	pop	lr, xzr
+	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+	cmp	x2, #ESR_EL2_EC_DABT
+	mov	x0, #ESR_EL2_EC_IABT
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+	and	x2, x1, #ESR_EL2_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	x1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__kvm_vcpu_return
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	pop	x2, x3
+	pop	x0, x1
+
+	eret
+
+el1_irq:
+	push	x0, x1
+	push	x2, x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+__kvm_hyp_code_end:
+	.globl	__kvm_hyp_code_end
+
+	.popsection
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 00000000000..81a02a8762b
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,203 @@
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+				 PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+	unsigned long cpsr;
+	unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+	bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+	cpsr = mode | COMPAT_PSR_I_BIT;
+
+	if (sctlr & (1 << 30))
+		cpsr |= COMPAT_PSR_T_BIT;
+	if (sctlr & (1 << 25))
+		cpsr |= COMPAT_PSR_E_BIT;
+
+	*vcpu_cpsr(vcpu) = cpsr;
+
+	/* Note: These now point to the banked copies */
+	*vcpu_spsr(vcpu) = new_spsr_value;
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+	/* Branch to exception vector */
+	if (sctlr & (1 << 13))
+		vect_offset += 0xffff0000;
+	else /* always have security exceptions */
+		vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+	*vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+			 unsigned long addr)
+{
+	u32 vect_offset;
+	u32 *far, *fsr;
+	bool is_lpae;
+
+	if (is_pabt) {
+		vect_offset = 12;
+		far = &vcpu_cp15(vcpu, c6_IFAR);
+		fsr = &vcpu_cp15(vcpu, c5_IFSR);
+	} else { /* !iabt */
+		vect_offset = 16;
+		far = &vcpu_cp15(vcpu, c6_DFAR);
+		fsr = &vcpu_cp15(vcpu, c5_DFSR);
+	}
+
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+	*far = addr;
+
+	/* Give the guest an IMPLEMENTATION DEFINED exception */
+	is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+	if (is_lpae)
+		*fsr = 1 << 9 | 0x34;
+	else
+		*fsr = 0x14;
+}
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_aarch32;
+	u32 esr = 0;
+
+	is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+	/*
+	 * Build an {i,d}abort, depending on the level and the
+	 * instruction set. Report an external synchronous abort.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	/*
+	 * Here, the guest runs in AArch64 mode when in EL1. If we get
+	 * an AArch32 fault, it means we managed to trap an EL0 fault.
+	 */
+	if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+		esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT);
+	else
+		esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT);
+
+	if (!is_iabt)
+		esr |= ESR_EL1_EC_DABT_EL0;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	/*
+	 * Build an unknown exception, depending on the instruction
+	 * set.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, false, addr);
+
+	inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, true, addr);
+
+	inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_undef32(vcpu);
+
+	inject_undef64(vcpu);
+}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 00000000000..bbc6ae32e4a
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 00000000000..70a7816535c
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <kvm/arm_arch_timer.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+			PSR_F_BIT | PSR_D_BIT),
+};
+
+static const struct kvm_regs default_regs_reset32 = {
+	.regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+			COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+	.irq	= 27,
+	.level	= 1,
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x20);
+}
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_ARM_EL1_32BIT:
+		r = cpu_has_32bit_el1();
+		break;
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+	const struct kvm_irq_level *cpu_vtimer_irq;
+	const struct kvm_regs *cpu_reset;
+
+	switch (vcpu->arch.target) {
+	default:
+		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+			if (!cpu_has_32bit_el1())
+				return -EINVAL;
+			cpu_reset = &default_regs_reset32;
+			vcpu->arch.hcr_el2 &= ~HCR_RW;
+		} else {
+			cpu_reset = &default_regs_reset;
+		}
+
+		cpu_vtimer_irq = &default_vtimer_irq;
+		break;
+	}
+
+	/* Reset core registers */
+	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+	/* Reset system registers */
+	kvm_reset_sys_regs(vcpu);
+
+	/* Reset timer */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
+	return 0;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 00000000000..94923609753
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,1050 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+	u32 ccsidr;
+
+	/* Make sure noone else changes CSSELR during this! */
+	local_irq_disable();
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	local_irq_enable();
+
+	return ccsidr;
+}
+
+static void do_dc_cisw(u32 val)
+{
+	asm volatile("dc cisw, %x0" : : "r" (val));
+	dsb();
+}
+
+static void do_dc_csw(u32 val)
+{
+	asm volatile("dc csw, %x0" : : "r" (val));
+	dsb();
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	unsigned long val;
+	int cpu;
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	cpu = get_cpu();
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+	/* If we were already preempted, take the long way around */
+	if (cpu != vcpu->arch.last_pcpu) {
+		flush_cache_all();
+		goto done;
+	}
+
+	val = *vcpu_reg(vcpu, p->Rt);
+
+	switch (p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		do_dc_cisw(val);
+		break;
+
+	case 10:		/* DCCSW */
+		do_dc_csw(val);
+		break;
+	}
+
+done:
+	put_cpu();
+
+	return true;
+}
+
+/*
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ */
+static bool pm_fake(struct kvm_vcpu *vcpu,
+		    const struct sys_reg_params *p,
+		    const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 amair;
+
+	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	/*
+	 * Simply map the vcpu_id into the Aff0 field of the MPIDR.
+	 */
+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
+}
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+	/* DC ISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+	  access_dcsw },
+	/* DC CSW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+	  access_dcsw },
+	/* DC CISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+	  access_dcsw },
+
+	/* TEECR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEECR32_EL1, 0 },
+	/* TEEHBR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEEHBR32_EL1, 0 },
+	/* DBGVCR32_EL2 */
+	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+	  NULL, reset_val, DBGVCR32_EL2, 0 },
+
+	/* MPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+	  NULL, reset_mpidr, MPIDR_EL1 },
+	/* SCTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+	/* CPACR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, CPACR_EL1, 0 },
+	/* TTBR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, TTBR0_EL1 },
+	/* TTBR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, TTBR1_EL1 },
+	/* TCR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, TCR_EL1, 0 },
+
+	/* AFSR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+	  NULL, reset_unknown, AFSR0_EL1 },
+	/* AFSR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+	  NULL, reset_unknown, AFSR1_EL1 },
+	/* ESR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, ESR_EL1 },
+	/* FAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, FAR_EL1 },
+
+	/* PMINTENSET_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+	  pm_fake },
+	/* PMINTENCLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+	  pm_fake },
+
+	/* MAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, MAIR_EL1 },
+	/* AMAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+	  NULL, reset_amair_el1, AMAIR_EL1 },
+
+	/* VBAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, VBAR_EL1, 0 },
+	/* CONTEXTIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
+	/* TPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+	  NULL, reset_unknown, TPIDR_EL1 },
+
+	/* CNTKCTL_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+	  NULL, reset_val, CNTKCTL_EL1, 0},
+
+	/* CSSELR_EL1 */
+	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, CSSELR_EL1 },
+
+	/* PMCR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+	  pm_fake },
+	/* PMCNTENSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+	  pm_fake },
+	/* PMCNTENCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+	  pm_fake },
+	/* PMOVSCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+	  pm_fake },
+	/* PMSWINC_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+	  pm_fake },
+	/* PMSELR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+	  pm_fake },
+	/* PMCEID0_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+	  pm_fake },
+	/* PMCEID1_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+	  pm_fake },
+	/* PMCCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+	  pm_fake },
+	/* PMXEVTYPER_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+	  pm_fake },
+	/* PMXEVCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+	  pm_fake },
+	/* PMUSERENR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+	  pm_fake },
+	/* PMOVSSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+	  pm_fake },
+
+	/* TPIDR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+	  NULL, reset_unknown, TPIDR_EL0 },
+	/* TPIDRRO_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+	  NULL, reset_unknown, TPIDRRO_EL0 },
+
+	/* DACR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, DACR32_EL2 },
+	/* IFSR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, IFSR32_EL2 },
+	/* FPEXC32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+/* Trapped cp15 registers */
+static const struct sys_reg_desc cp15_regs[] = {
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table)
+{
+	target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target,
+						   bool mode_is_64,
+						   size_t *num)
+{
+	struct kvm_sys_reg_target_table *table;
+
+	table = target_tables[target];
+	if (mode_is_64) {
+		*num = table->table64.num;
+		return table->table64.table;
+	} else {
+		*num = table->table32.num;
+		return table->table32.table;
+	}
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct sys_reg_desc *r = &table[i];
+
+		if (params->Op0 != r->Op0)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void emulate_cp15(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, false, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return;
+		}
+		/* If access function fails, it should complain. */
+	}
+
+	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt2 = (hsr >> 10) & 0xf;
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.CRn = 0;
+
+	/*
+	 * Massive hack here. Store Rt2 in the top 32bits so we only
+	 * have one register to deal with. As we use the same trap
+	 * backends between AArch32 and AArch64, we get away with it.
+	 */
+	if (params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val &= 0xffffffff;
+		val |= *vcpu_reg(vcpu, Rt2) << 32;
+		*vcpu_reg(vcpu, params.Rt) = val;
+	}
+
+	emulate_cp15(vcpu, &params);
+
+	/* Do the opposite hack for the read side */
+	if (!params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val >>= 32;
+		*vcpu_reg(vcpu, Rt2) = val;
+	}
+
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt  = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+	params.CRn = (hsr >> 10) & 0xf;
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 14) & 0x7;
+	params.Op2 = (hsr >> 17) & 0x7;
+
+	emulate_cp15(vcpu, &params);
+	return 1;
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest sys_reg access at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+			      const struct sys_reg_desc *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+	params.Op0 = (esr >> 20) & 3;
+	params.Op1 = (esr >> 14) & 0x7;
+	params.CRn = (esr >> 10) & 0xf;
+	params.CRm = (esr >> 1) & 0xf;
+	params.Op2 = (esr >> 17) & 0x7;
+	params.Rt = (esr >> 5) & 0x1f;
+	params.is_write = !(esr & 1);
+
+	return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U64:
+		/* Any unused index bits means it's not valid. */
+		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+			      | KVM_REG_ARM_COPROC_MASK
+			      | KVM_REG_ARM64_SYSREG_OP0_MASK
+			      | KVM_REG_ARM64_SYSREG_OP1_MASK
+			      | KVM_REG_ARM64_SYSREG_CRN_MASK
+			      | KVM_REG_ARM64_SYSREG_CRM_MASK
+			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
+			return false;
+		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+						    u64 id)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+	struct sys_reg_params params;
+
+	/* We only do sys_reg for now. */
+	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+		return NULL;
+
+	if (!index_to_params(id, &params))
+		return NULL;
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	r = find_reg(&params, table, num);
+	if (!r)
+		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	/* Not saved in the sys_reg array? */
+	if (r && !r->reg)
+		r = NULL;
+
+	return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg)						\
+	static void get_##reg(struct kvm_vcpu *v,			\
+			      const struct sys_reg_desc *r)		\
+	{								\
+		u64 val;						\
+									\
+		asm volatile("mrs %0, " __stringify(reg) "\n"		\
+			     : "=r" (val));				\
+		((struct sys_reg_desc *)r)->val = val;			\
+	}
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, get_midr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+	  NULL, get_revidr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  NULL, get_id_pfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+	  NULL, get_id_pfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+	  NULL, get_id_dfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+	  NULL, get_id_afr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+	  NULL, get_id_mmfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+	  NULL, get_id_mmfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+	  NULL, get_id_mmfr2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+	  NULL, get_id_mmfr3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  NULL, get_id_isar0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+	  NULL, get_id_isar1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  NULL, get_id_isar2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+	  NULL, get_id_isar3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+	  NULL, get_id_isar4_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+	  NULL, get_id_isar5_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_clidr_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+	  NULL, get_aidr_el1 },
+	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+	int err;
+	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (r->val != val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+	u32 level, ctype;
+
+	if (val >= CSSELR_MAX)
+		return -ENOENT;
+
+	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+	level = (val >> 1);
+	ctype = (cache_levels >> (level * 3)) & 7;
+
+	switch (ctype) {
+	case 0: /* No cache */
+		return false;
+	case 1: /* Instruction cache only */
+		return (val & 1);
+	case 2: /* Data cache only */
+	case 4: /* Unified cache */
+		return !(val & 1);
+	case 3: /* Separate instruction and data caches */
+		return true;
+	default: /* Reserved: we can't know instruction or data. */
+		return false;
+	}
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+	u32 val;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		return put_user(get_ccsidr(val), uval);
+	default:
+		return -ENOENT;
+	}
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+	u32 val, newval;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		if (get_user(newval, uval))
+			return -EFAULT;
+
+		/* This is also invariant: you can't change it. */
+		if (newval != get_ccsidr(val))
+			return -EINVAL;
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_get(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return get_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_set(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return set_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < CSSELR_MAX; i++)
+		if (is_valid_cache(i))
+			count++;
+
+	return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	unsigned int i;
+
+	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if (!is_valid_cache(i))
+			continue;
+		if (put_user(val | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+	return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+		KVM_REG_ARM64_SYSREG |
+		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+	if (!*uind)
+		return true;
+
+	if (put_user(sys_reg_to_index(reg), *uind))
+		return false;
+
+	(*uind)++;
+	return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+	const struct sys_reg_desc *i1, *i2, *end1, *end2;
+	unsigned int total = 0;
+	size_t num;
+
+	/* We check for duplicates here, to allow arch-specific overrides. */
+	i1 = get_target_table(vcpu->arch.target, true, &num);
+	end1 = i1 + num;
+	i2 = sys_reg_descs;
+	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+	BUG_ON(i1 == end1 || i2 == end2);
+
+	/* Walk carefully, as both tables may refer to the same register. */
+	while (i1 || i2) {
+		int cmp = cmp_sys_reg(i1, i2);
+		/* target-specific overrides generic entry. */
+		if (cmp <= 0) {
+			/* Ignore registers we trap but don't save. */
+			if (i1->reg) {
+				if (!copy_reg_to_user(i1, &uind))
+					return -EFAULT;
+				total++;
+			}
+		} else {
+			/* Ignore registers we trap but don't save. */
+			if (i2->reg) {
+				if (!copy_reg_to_user(i2, &uind))
+					return -EFAULT;
+				total++;
+			}
+		}
+
+		if (cmp <= 0 && ++i1 == end1)
+			i1 = NULL;
+		if (cmp >= 0 && ++i2 == end2)
+			i2 = NULL;
+	}
+	return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+	return ARRAY_SIZE(invariant_sys_regs)
+		+ num_demux_regs()
+		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	int err;
+
+	/* Then give them all the invariant registers' indices. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	err = walk_sys_regs(vcpu, uindices);
+	if (err < 0)
+		return err;
+	uindices += err;
+
+	return write_demux_regids(uindices);
+}
+
+void kvm_sys_reg_table_init(void)
+{
+	unsigned int i;
+	struct sys_reg_desc clidr;
+
+	/* Make sure tables are unique and in order. */
+	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
+		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+
+	/* We abuse the reset function to overwrite the table itself. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+	/*
+	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
+	 *
+	 *   If software reads the Cache Type fields from Ctype1
+	 *   upwards, once it has seen a value of 0b000, no caches
+	 *   exist at further-out levels of the hierarchy. So, for
+	 *   example, if Ctype3 is the first Cache Type field with a
+	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
+	 *   ignored.
+	 */
+	get_clidr_el1(NULL, &clidr); /* Ugly... */
+	cache_levels = clidr.val;
+	for (i = 0; i < 7; i++)
+		if (((cache_levels >> (i*3)) & 7) == 0)
+			break;
+	/* Clear all higher bits. */
+	cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+	size_t num;
+	const struct sys_reg_desc *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+	/* Generic chip reset first (so target could override). */
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	table = get_target_table(vcpu->arch.target, true, &num);
+	reset_sys_reg_descs(vcpu, table, num);
+
+	for (num = 1; num < NR_SYS_REGS; num++)
+		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 00000000000..d50d3722998
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+	u8	Rt;
+	bool	is_write;
+};
+
+struct sys_reg_desc {
+	/* MRS/MSR instruction which accesses it. */
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct sys_reg_params *,
+		       const struct sys_reg_desc *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+	/* Index into sys_reg[], or 0 if we don't need to save it. */
+	int reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct sys_reg_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg write to read-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg read to write-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+			      const struct sys_reg_desc *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->Op0 != i2->Op0)
+		return i1->Op0 - i2->Op0;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) 	.Op0 = _x
+#define Op1(_x) 	.Op1 = _x
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op2(_x) 	.Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 00000000000..4268ab9356b
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 actlr;
+
+	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+	/* ACTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+	/* ACTLR */
+	{ Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+	.table64 = {
+		.table = genericv8_sys_regs,
+		.num = ARRAY_SIZE(genericv8_sys_regs),
+	},
+	.table32 = {
+		.table = genericv8_cp15_regs,
+		.num = ARRAY_SIZE(genericv8_cp15_regs),
+	},
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+		BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+			       &genericv8_sys_regs[i]) >= 0);
+
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+					  &genericv8_target_table);
+	return 0;
+}
+late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 3140a2abcdc..b51d36401d8 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -2,3 +2,4 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   cache.o copypage.o flush.o \
 				   ioremap.o mmap.o pgd.o mmu.o \
 				   context.o tlb.o proc.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1426468b77f..0ecac8980aa 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -365,17 +365,6 @@ static int __kprobes do_translation_fault(unsigned long addr,
 }
 
 /*
- * Some section permission faults need to be handled gracefully.  They can
- * happen due to a __{get,put}_user during an oops.
- */
-static int do_sect_fault(unsigned long addr, unsigned int esr,
-			 struct pt_regs *regs)
-{
-	do_bad_area(addr, esr, regs);
-	return 0;
-}
-
-/*
  * This abort handler always returns "fault".
  */
 static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
@@ -398,12 +387,12 @@ static struct fault_info {
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 88611c3a421..e4193e3adc7 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -70,23 +70,16 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 #endif
 }
 
-void __flush_dcache_page(struct page *page)
-{
-	__flush_dcache_area(page_address(page), PAGE_SIZE);
-}
-
 void __sync_icache_dcache(pte_t pte, unsigned long addr)
 {
-	unsigned long pfn;
-	struct page *page;
+	struct page *page = pte_page(pte);
 
-	pfn = pte_pfn(pte);
-	if (!pfn_valid(pfn))
+	/* no flushing needed for anonymous pages */
+	if (!page_mapping(page))
 		return;
 
-	page = pfn_to_page(pfn);
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		__flush_dcache_page(page);
+		__flush_dcache_area(page_address(page), PAGE_SIZE);
 		__flush_icache_all();
 	} else if (icache_is_aivivt()) {
 		__flush_icache_all();
@@ -94,28 +87,14 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 }
 
 /*
- * Ensure cache coherency between kernel mapping and userspace mapping of this
- * page.
+ * This function is called when a page has been modified by the kernel. Mark
+ * it as dirty for later flushing when mapped in user space (if executable,
+ * see __sync_icache_dcache).
  */
 void flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping;
-
-	/*
-	 * The zero page is never written to, so never has any dirty cache
-	 * lines, and therefore never needs to be flushed.
-	 */
-	if (page == ZERO_PAGE(0))
-		return;
-
-	mapping = page_mapping(page);
-	if (mapping && mapping_mapped(mapping)) {
-		__flush_dcache_page(page);
-		__flush_icache_all();
-		set_bit(PG_dcache_clean, &page->flags);
-	} else {
+	if (test_bit(PG_dcache_clean, &page->flags))
 		clear_bit(PG_dcache_clean, &page->flags);
-	}
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
new file mode 100644
index 00000000000..2fc8258bab2
--- /dev/null
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -0,0 +1,70 @@
+/*
+ * arch/arm64/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2013 Linaro Ltd.
+ *
+ * Based on arch/x86/mm/hugetlbpage.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+#endif
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
+
+int pud_huge(pud_t pud)
+{
+	return !(pud_val(pud) & PUD_TABLE_BIT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long ps = memparse(opt, &opt);
+	if (ps == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (ps == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
+		return 0;
+	}
+	return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f497ca77925..67e8d7ce3fe 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -197,14 +197,6 @@ void __init bootmem_init(void)
 	max_pfn = max_low_pfn = max;
 }
 
-/*
- * Poison init memory with an undefined instruction (0x0).
- */
-static inline void poison_init_mem(void *s, size_t count)
-{
-	memset(s, 0, count);
-}
-
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
 static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 {
@@ -280,59 +272,17 @@ static void __init free_unused_memmap(void)
  */
 void __init mem_init(void)
 {
-	unsigned long reserved_pages, free_pages;
-	struct memblock_region *reg;
-
 	arm64_swiotlb_init();
 
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap();
 #endif
+	/* this will put all unused low memory onto the freelists */
+	free_all_bootmem();
 
-	totalram_pages += free_all_bootmem();
-
-	reserved_pages = free_pages = 0;
-
-	for_each_memblock(memory, reg) {
-		unsigned int pfn1, pfn2;
-		struct page *page, *end;
-
-		pfn1 = __phys_to_pfn(reg->base);
-		pfn2 = pfn1 + __phys_to_pfn(reg->size);
-
-		page = pfn_to_page(pfn1);
-		end  = pfn_to_page(pfn2 - 1) + 1;
-
-		do {
-			if (PageReserved(page))
-				reserved_pages++;
-			else if (!page_count(page))
-				free_pages++;
-			page++;
-		} while (page < end);
-	}
-
-	/*
-	 * Since our memory may not be contiguous, calculate the real number
-	 * of pages we have in this system.
-	 */
-	pr_info("Memory:");
-	num_physpages = 0;
-	for_each_memblock(memory, reg) {
-		unsigned long pages = memblock_region_memory_end_pfn(reg) -
-			memblock_region_memory_base_pfn(reg);
-		num_physpages += pages;
-		printk(" %ldMB", pages >> (20 - PAGE_SHIFT));
-	}
-	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
-
-	pr_notice("Memory: %luk/%luk available, %luk reserved\n",
-		  nr_free_pages() << (PAGE_SHIFT-10),
-		  free_pages << (PAGE_SHIFT-10),
-		  reserved_pages << (PAGE_SHIFT-10));
+	mem_init_print_info(NULL);
 
 #define MLK(b, t) b, t, ((t) - (b)) >> 10
 #define MLM(b, t) b, t, ((t) - (b)) >> 20
@@ -374,7 +324,7 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE_64			> MODULES_VADDR);
 	BUG_ON(TASK_SIZE_64				> MODULES_VADDR);
 
-	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
 		/*
 		 * On a machine this small we won't get anywhere without
@@ -386,7 +336,6 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	poison_init_mem(__init_begin, __init_end - __init_begin);
 	free_initmem_default(0);
 }
 
@@ -396,10 +345,8 @@ static int keep_initrd;
 
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (!keep_initrd) {
-		poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
-		free_reserved_area(start, end, 0, "initrd");
-	}
+	if (!keep_initrd)
+		free_reserved_area((void *)start, (void *)end, 0, "initrd");
 }
 
 static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index 916701e6d04..d519f4f50c8 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,3 +1,2 @@
-extern void __flush_dcache_page(struct page *page);
 extern void __init bootmem_init(void);
 extern void __init arm64_swiotlb_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index eeecc9c8ed6..a8d1059b91b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -297,6 +297,16 @@ static void __init map_mem(void)
 {
 	struct memblock_region *reg;
 
+	/*
+	 * Temporarily limit the memblock range. We need to do this as
+	 * create_mapping requires puds, pmds and ptes to be allocated from
+	 * memory addressable from the initial direct kernel mapping.
+	 *
+	 * The initial direct kernel mapping, located at swapper_pg_dir,
+	 * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned).
+	 */
+	memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
+
 	/* map all the memory banks */
 	for_each_memblock(memory, reg) {
 		phys_addr_t start = reg->base;
@@ -307,6 +317,9 @@ static void __init map_mem(void)
 
 		create_mapping(start, __phys_to_virt(start), end - start);
 	}
+
+	/* Limit no longer required. */
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
 /*
@@ -317,12 +330,6 @@ void __init paging_init(void)
 {
 	void *zero_page;
 
-	/*
-	 * Maximum PGDIR_SIZE addressable via the initial direct kernel
-	 * mapping in swapper_pg_dir.
-	 */
-	memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);
-
 	init_mem_pgprot();
 	map_mem();
 
@@ -339,7 +346,6 @@ void __init paging_init(void)
 	bootmem_init();
 
 	empty_zero_page = virt_to_page(zero_page);
-	__flush_dcache_page(empty_zero_page);
 
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile
new file mode 100644
index 00000000000..be240404ba9
--- /dev/null
+++ b/arch/arm64/xen/Makefile
@@ -0,0 +1,2 @@
+xen-arm-y	+= $(addprefix ../../arm/xen/, enlighten.o grant-table.o)
+obj-y		:= xen-arm.o hypercall.o
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
new file mode 100644
index 00000000000..2816c479cd4
--- /dev/null
+++ b/arch/arm64/xen/hypercall.S
@@ -0,0 +1,92 @@
+/******************************************************************************
+ * hypercall.S
+ *
+ * Xen hypercall wrappers
+ *
+ * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * The Xen hypercall calling convention is very similar to the procedure
+ * call standard for the ARM 64-bit architecture: the first parameter is
+ * passed in x0, the second in x1, the third in x2, the fourth in x3 and
+ * the fifth in x4.
+ *
+ * The hypercall number is passed in x16.
+ *
+ * The return value is in x0.
+ *
+ * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM
+ * hypercall tag.
+ *
+ * Parameter structs passed to hypercalls are laid out according to
+ * the ARM 64-bit EABI standard.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <xen/interface/xen.h>
+
+
+#define XEN_IMM 0xEA1
+
+#define HYPERCALL_SIMPLE(hypercall)		\
+ENTRY(HYPERVISOR_##hypercall)			\
+	mov x16, #__HYPERVISOR_##hypercall;	\
+	hvc XEN_IMM;				\
+	ret;					\
+ENDPROC(HYPERVISOR_##hypercall)
+
+#define HYPERCALL0 HYPERCALL_SIMPLE
+#define HYPERCALL1 HYPERCALL_SIMPLE
+#define HYPERCALL2 HYPERCALL_SIMPLE
+#define HYPERCALL3 HYPERCALL_SIMPLE
+#define HYPERCALL4 HYPERCALL_SIMPLE
+#define HYPERCALL5 HYPERCALL_SIMPLE
+
+                .text
+
+HYPERCALL2(xen_version);
+HYPERCALL3(console_io);
+HYPERCALL3(grant_table_op);
+HYPERCALL2(sched_op);
+HYPERCALL2(event_channel_op);
+HYPERCALL2(hvm_op);
+HYPERCALL2(memory_op);
+HYPERCALL2(physdev_op);
+HYPERCALL3(vcpu_op);
+
+ENTRY(privcmd_call)
+	mov x16, x0
+	mov x0, x1
+	mov x1, x2
+	mov x2, x3
+	mov x3, x4
+	mov x4, x5
+	hvc XEN_IMM
+	ret
+ENDPROC(privcmd_call);
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index e7b61494c31..c2731003ede 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -341,7 +341,7 @@ unsigned long get_wchan(struct task_struct *p)
 		 * is actually quite ugly. It might be possible to
 		 * determine the frame size automatically at build
 		 * time by doing this:
-		 *   - compile sched.c
+		 *   - compile sched/core.c
 		 *   - disassemble the resulting sched.o
 		 *   - look for 'sub sp,??' shortly after '<schedule>:'
 		 */
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index b4247f47806..209ae5ad349 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -555,7 +555,7 @@ void __init setup_arch (char **cmdline_p)
 {
 	struct clk *cpu_clk;
 
-	init_mm.start_code = (unsigned long)_text;
+	init_mm.start_code = (unsigned long)_stext;
 	init_mm.end_code = (unsigned long)_etext;
 	init_mm.end_data = (unsigned long)_edata;
 	init_mm.brk = (unsigned long)_end;
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index 9cd2bd91d64..a4589176bed 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -23,7 +23,7 @@ SECTIONS
 {
 	. = CONFIG_ENTRY_ADDRESS;
 	.init		: AT(ADDR(.init) - LOAD_OFFSET) {
-		_stext = .;
+		_text = .;
 		__init_begin = .;
 			_sinittext = .;
 			*(.text.reset)
@@ -46,7 +46,7 @@ SECTIONS
 
 	.text		: AT(ADDR(.text) - LOAD_OFFSET) {
 		_evba = .;
-		_text = .;
+		_stext = .;
 		*(.ex.text)
 		*(.irq.text)
 		KPROBES_TEXT
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 7c2f6685bf4..7f8759a8a92 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1060,7 +1060,9 @@ struct platform_device *__init at32_add_device_usart(unsigned int id)
 
 void __init at32_setup_serial_console(unsigned int usart_id)
 {
+#ifdef CONFIG_SERIAL_ATMEL
 	atmel_default_console_device = at32_usarts[usart_id];
+#endif
 }
 
 /* --------------------------------------------------------------------
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index e66e8406f99..def5391d927 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -100,60 +100,26 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-	int nid, i;
+	pg_data_t *pgdat;
 
-	reservedpages = 0;
 	high_memory = NULL;
+	for_each_online_pgdat(pgdat)
+		high_memory = max_t(void *, high_memory,
+				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-	/* this will put all low memory onto the freelists */
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		unsigned long node_pages = 0;
-		void *node_high_memory;
-
-		num_physpages += pgdat->node_present_pages;
-
-		if (pgdat->node_spanned_pages != 0)
-			node_pages = free_all_bootmem_node(pgdat);
-
-		totalram_pages += node_pages;
-
-		for (i = 0; i < node_pages; i++)
-			if (PageReserved(pgdat->node_mem_map + i))
-				reservedpages++;
-
-		node_high_memory = (void *)((pgdat->node_start_pfn
-					     + pgdat->node_spanned_pages)
-					    << PAGE_SHIFT);
-		if (node_high_memory > high_memory)
-			high_memory = node_high_memory;
-	}
-
-	max_mapnr = MAP_NR(high_memory);
-
-	codesize = (unsigned long)_etext - (unsigned long)_text;
-	datasize = (unsigned long)_edata - (unsigned long)_data;
-	initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
-
-	printk ("Memory: %luk/%luk available (%dk kernel code, "
-		"%dk reserved, %dk data, %dk init)\n",
-		nr_free_pages() << (PAGE_SHIFT - 10),
-		totalram_pages << (PAGE_SHIFT - 10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT - 10),
-		datasize >> 10,
-		initsize >> 10);
+	set_max_mapnr(MAP_NR(high_memory));
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 }
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
diff --git a/arch/blackfin/mach-bf527/boards/ad7160eval.c b/arch/blackfin/mach-bf527/boards/ad7160eval.c
index d58f50e5aa4..1e7be62fccb 100644
--- a/arch/blackfin/mach-bf527/boards/ad7160eval.c
+++ b/arch/blackfin/mach-bf527/boards/ad7160eval.c
@@ -283,14 +283,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
-};
-#endif
-
 static struct spi_board_info bfin_spi_board_info[] __initdata = {
 #if defined(CONFIG_MTD_M25P80) \
 	|| defined(CONFIG_MTD_M25P80_MODULE)
@@ -800,10 +792,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
 	&bfin_i2s,
 #endif
-
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
-#endif
 };
 
 static int __init ad7160eval_init(void)
diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c
index 29f16e5c37b..d0a0c5e527c 100644
--- a/arch/blackfin/mach-bf527/boards/ezkit.c
+++ b/arch/blackfin/mach-bf527/boards/ezkit.c
@@ -493,8 +493,7 @@ static const struct ad7879_platform_data bfin_ad7879_ts_info = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \
-	defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
+#if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
 
 static const u16 bfin_snd_pin[][7] = {
 	{P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS,
@@ -549,13 +548,6 @@ static struct platform_device bfin_i2s_pcm = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm_pcm = {
-	.name = "bfin-tdm-pcm-audio",
-	.id = -1,
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97_pcm = {
 	.name = "bfin-ac97-pcm-audio",
@@ -575,22 +567,10 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	.num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
-	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
-	.dev = {
-		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
-	},
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
 	        || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 static const char * const ad1836_link[] = {
-	"bfin-tdm.0",
+	"bfin-i2s.0",
 	"spi0.4",
 };
 static struct platform_device bfin_ad1836_machine = {
@@ -1269,10 +1249,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s_pcm,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm_pcm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97_pcm,
 #endif
@@ -1281,10 +1257,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) || \
 	defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 	&bfin_ad1836_machine,
diff --git a/arch/blackfin/mach-bf533/boards/ezkit.c b/arch/blackfin/mach-bf533/boards/ezkit.c
index 07811c209b9..90fb0d14b14 100644
--- a/arch/blackfin/mach-bf533/boards/ezkit.c
+++ b/arch/blackfin/mach-bf533/boards/ezkit.c
@@ -450,14 +450,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
 	.name = "bfin-ac97",
@@ -516,10 +508,6 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97,
 #endif
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c
index 6fca8698bf3..4a8c2e3fd7e 100644
--- a/arch/blackfin/mach-bf533/boards/stamp.c
+++ b/arch/blackfin/mach-bf533/boards/stamp.c
@@ -542,8 +542,7 @@ static struct platform_device bfin_dpmc = {
 };
 
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \
-	defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) \
-	|| defined(CONFIG_SND_BF5XX_AC97) || \
+	defined(CONFIG_SND_BF5XX_AC97) || \
 	defined(CONFIG_SND_BF5XX_AC97_MODULE)
 
 #include <asm/bfin_sport.h>
@@ -603,13 +602,6 @@ static struct platform_device bfin_i2s_pcm = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm_pcm = {
-	.name = "bfin-tdm-pcm-audio",
-	.id = -1,
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97_pcm = {
 	.name = "bfin-ac97-pcm-audio",
@@ -620,7 +612,7 @@ static struct platform_device bfin_ac97_pcm = {
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
 	        || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 static const char * const ad1836_link[] = {
-	"bfin-tdm.0",
+	"bfin-i2s.0",
 	"spi0.4",
 };
 static struct platform_device bfin_ad1836_machine = {
@@ -675,20 +667,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \
-	defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	.num_resources =
-		ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
-	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
-	.dev = {
-		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
-	},
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AC97) || \
 	defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
@@ -761,10 +739,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s_pcm,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm_pcm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97_pcm,
 #endif
@@ -792,11 +766,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_TDM) || \
-	defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AC97) || \
 	defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 	&bfin_ac97,
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index 6a3a14bcd3a..44fd1d4682a 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -2570,7 +2570,6 @@ static struct platform_device bfin_dpmc = {
 };
 
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \
-	defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) || \
 	defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 
 #define SPORT_REQ(x) \
@@ -2628,13 +2627,6 @@ static struct platform_device bfin_i2s_pcm = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm_pcm = {
-	.name = "bfin-tdm-pcm-audio",
-	.id = -1,
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97_pcm = {
 	.name = "bfin-ac97-pcm-audio",
@@ -2645,7 +2637,7 @@ static struct platform_device bfin_ac97_pcm = {
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
 	        || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 static const char * const ad1836_link[] = {
-	"bfin-tdm.0",
+	"bfin-i2s.0",
 	"spi0.4",
 };
 static struct platform_device bfin_ad1836_machine = {
@@ -2699,18 +2691,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	.num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
-	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
-	.dev = {
-		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
-	},
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
 	.name = "bfin-ac97",
@@ -2935,10 +2915,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s_pcm,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm_pcm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97_pcm,
 #endif
@@ -2961,10 +2937,6 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 	&bfin_ac97,
 #endif
diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c
index c4d07f04094..372eb54944e 100644
--- a/arch/blackfin/mach-bf548/boards/ezkit.c
+++ b/arch/blackfin/mach-bf548/boards/ezkit.c
@@ -1393,7 +1393,6 @@ static struct platform_device bfin_dpmc = {
 };
 
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE) || \
-	defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE) || \
 	defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 
 #define SPORT_REQ(x) \
@@ -1461,13 +1460,6 @@ static struct platform_device bfin_i2s_pcm = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm_pcm = {
-	.name = "bfin-tdm-pcm-audio",
-	.id = -1,
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97_pcm = {
 	.name = "bfin-ac97-pcm-audio",
@@ -1501,18 +1493,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_TDM) || defined(CONFIG_SND_BF5XX_SOC_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	.num_resources = ARRAY_SIZE(bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM]),
-	.resource = bfin_snd_resources[CONFIG_SND_BF5XX_SPORT_NUM],
-	.dev = {
-		.platform_data = &bfin_snd_data[CONFIG_SND_BF5XX_SPORT_NUM],
-	},
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_SOC_AC97) || defined(CONFIG_SND_BF5XX_SOC_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
 	.name = "bfin-ac97",
@@ -1646,9 +1626,7 @@ static struct platform_device *ezkit_devices[] __initdata = {
 #if defined(CONFIG_SND_BF5XX_I2S) || defined(CONFIG_SND_BF5XX_I2S_MODULE)
 	&bfin_i2s_pcm,
 #endif
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm_pcm,
-#endif
+
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97_pcm,
 #endif
@@ -1661,10 +1639,6 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97,
 #endif
diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
index 551f866172c..92938e79b9e 100644
--- a/arch/blackfin/mach-bf561/boards/ezkit.c
+++ b/arch/blackfin/mach-bf561/boards/ezkit.c
@@ -523,14 +523,6 @@ static struct platform_device bfin_i2s = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-static struct platform_device bfin_tdm = {
-	.name = "bfin-tdm",
-	.id = CONFIG_SND_BF5XX_SPORT_NUM,
-	/* TODO: add platform data here */
-};
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 static struct platform_device bfin_ac97 = {
 	.name = "bfin-ac97",
@@ -542,7 +534,7 @@ static struct platform_device bfin_ac97 = {
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
 	        || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 static const char * const ad1836_link[] = {
-	"bfin-tdm.0",
+	"bfin-i2s.0",
 	"spi0.4",
 };
 static struct platform_device bfin_ad1836_machine = {
@@ -611,10 +603,6 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bfin_i2s,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_TDM) || defined(CONFIG_SND_BF5XX_TDM_MODULE)
-	&bfin_tdm,
-#endif
-
 #if defined(CONFIG_SND_BF5XX_AC97) || defined(CONFIG_SND_BF5XX_AC97_MODULE)
 	&bfin_ac97,
 #endif
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index 97d70163958..bba40aed427 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -821,7 +821,7 @@ static struct platform_device bfin_i2s = {
 #if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
 	        || defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 static const char * const ad1836_link[] = {
-	"bfin-tdm.0",
+	"bfin-i2s.0",
 	"spi0.76",
 };
 static struct platform_device bfin_ad1836_machine = {
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 82d01a71207..166842de3dc 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -90,50 +90,24 @@ asmlinkage void __init init_pda(void)
 
 void __init mem_init(void)
 {
-	unsigned int codek = 0, datak = 0, initk = 0;
-	unsigned int reservedpages = 0, freepages = 0;
-	unsigned long tmp;
-	unsigned long start_mem = memory_start;
-	unsigned long end_mem = memory_end;
+	char buf[64];
 
-	end_mem &= PAGE_MASK;
-	high_memory = (void *)end_mem;
-
-	start_mem = PAGE_ALIGN(start_mem);
-	max_mapnr = num_physpages = MAP_NR(high_memory);
-	printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages);
+	high_memory = (void *)(memory_end & PAGE_MASK);
+	max_mapnr = MAP_NR(high_memory);
+	printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", max_mapnr);
 
 	/* This will put all low memory onto the freelists. */
-	totalram_pages = free_all_bootmem();
-
-	reservedpages = 0;
-	for (tmp = ARCH_PFN_OFFSET; tmp < max_mapnr; tmp++)
-		if (PageReserved(pfn_to_page(tmp)))
-			reservedpages++;
-	freepages =  max_mapnr - ARCH_PFN_OFFSET - reservedpages;
-
-	/* do not count in kernel image between _rambase and _ramstart */
-	reservedpages -= (_ramstart - _rambase) >> PAGE_SHIFT;
-#if (defined(CONFIG_BFIN_EXTMEM_ICACHEABLE) && ANOMALY_05000263)
-	reservedpages += (_ramend - memory_end - DMA_UNCACHED_REGION) >> PAGE_SHIFT;
-#endif
-
-	codek = (_etext - _stext) >> 10;
-	initk = (__init_end - __init_begin) >> 10;
-	datak = ((_ramstart - _rambase) >> 10) - codek - initk;
+	free_all_bootmem();
 
-	printk(KERN_INFO
-	     "Memory available: %luk/%luk RAM, "
-		"(%uk init code, %uk kernel code, %uk data, %uk dma, %uk reserved)\n",
-		(unsigned long) freepages << (PAGE_SHIFT-10), (_ramend - CONFIG_PHY_RAM_BASE_ADDRESS) >> 10,
-		initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10)));
+	snprintf(buf, sizeof(buf) - 1, "%uK DMA", DMA_UNCACHED_REGION >> 10);
+	mem_init_print_info(buf);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
 #ifndef CONFIG_MPU
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 #endif
 }
 #endif
@@ -141,7 +115,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 void __init_refok free_initmem(void)
 {
 #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU
-	free_initmem_default(0);
+	free_initmem_default(-1);
 	if (memory_start == (unsigned long)(&__init_end))
 		memory_start = (unsigned long)(&__init_begin);
 #endif
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 4258b088aa9..e49f918531a 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -55,3 +55,4 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 1d81c4c129e..279d8072512 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -54,16 +54,15 @@ SECTIONS
 	}
 
 	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
 	.init :
 	{
-		_stext = .;
 		_sinittext = .;
 		HEAD_TEXT
 		INIT_TEXT
 		_einittext = .;
 	}
 
-	__init_begin = _stext;
 	INIT_DATA_SECTION(16)
 
 	PERCPU_SECTION(128)
@@ -74,6 +73,7 @@ SECTIONS
 	.text :
 	{
 		_text = .;
+		_stext = .;
 		TEXT_TEXT
 		SCHED_TEXT
 		LOCK_TEXT
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index a9fcd89b251..63f5560d6eb 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -18,6 +18,7 @@
 #include <linux/initrd.h>
 
 #include <asm/sections.h>
+#include <asm/uaccess.h>
 
 /*
  * ZERO_PAGE is a special page that is used for zero-initialized
@@ -57,31 +58,22 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	int codek, datak;
-	unsigned long tmp;
-	unsigned long len = memory_end - memory_start;
-
 	high_memory = (void *)(memory_end & PAGE_MASK);
 
 	/* this will put all memory onto the freelists */
-	totalram_pages = free_all_bootmem();
-
-	codek = (_etext - _stext) >> 10;
-	datak = (_end - _sdata) >> 10;
+	free_all_bootmem();
 
-	tmp = nr_free_pages() << PAGE_SHIFT;
-	printk(KERN_INFO "Memory: %luk/%luk RAM (%dk kernel code, %dk data)\n",
-	       tmp >> 10, len >> 10, codek, datak);
+	mem_init_print_info(NULL);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
 void __init free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 8769a9045a5..3201ddb8da6 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -134,11 +134,13 @@ config SVINTO_SIM
 
 config ETRAXFS
 	bool "ETRAX-FS-V32"
+	select CPU_FREQ_TABLE if CPU_FREQ
 	help
 	  Support CRIS V32.
 
 config CRIS_MACH_ARTPEC3
         bool "ARTPEC-3"
+	select CPU_FREQ_TABLE if CPU_FREQ
         help
           Support Axis ARTPEC-3.
 
@@ -637,40 +639,10 @@ endchoice
 
 endmenu
 
-source "drivers/base/Kconfig"
-
-# standard linux drivers
-source "drivers/mtd/Kconfig"
-
-source "drivers/parport/Kconfig"
-
-source "drivers/pnp/Kconfig"
-
-source "drivers/block/Kconfig"
-
-source "drivers/ide/Kconfig"
-
-source "drivers/net/Kconfig"
-
-source "drivers/i2c/Kconfig"
-
-source "drivers/rtc/Kconfig"
-
-#
-# input before char - char/joystick depends on it. As does USB.
-#
-source "drivers/input/Kconfig"
-
-source "drivers/char/Kconfig"
+source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "drivers/usb/Kconfig"
-
-source "drivers/uwb/Kconfig"
-
-source "drivers/staging/Kconfig"
-
 source "arch/cris/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c
index 37e6d2c50b7..22d846bfc57 100644
--- a/arch/cris/arch-v10/kernel/kgdb.c
+++ b/arch/cris/arch-v10/kernel/kgdb.c
@@ -230,46 +230,6 @@ struct register_image
 	unsigned int    usp;   /* 0x66 User mode stack pointer */
 } registers;
 
-/************** Prototypes for local library functions ***********************/
-
-/* Copy of strcpy from libc. */
-static char *gdb_cris_strcpy (char *s1, const char *s2);
-
-/* Copy of strlen from libc. */
-static int gdb_cris_strlen (const char *s);
-
-/* Copy of memchr from libc. */
-static void *gdb_cris_memchr (const void *s, int c, int n);
-
-/* Copy of strtol from libc. Does only support base 16. */
-static int gdb_cris_strtol (const char *s, char **endptr, int base);
-
-/********************** Prototypes for local functions. **********************/
-/* Copy the content of a register image into another. The size n is
-   the size of the register image. Due to struct assignment generation of
-   memcpy in libc. */
-static void copy_registers (registers *dptr, registers *sptr, int n);
-
-/* Copy the stored registers from the stack. Put the register contents
-   of thread thread_id in the struct reg. */
-static void copy_registers_from_stack (int thread_id, registers *reg);
-
-/* Copy the registers to the stack. Put the register contents of thread
-   thread_id from struct reg to the stack. */
-static void copy_registers_to_stack (int thread_id, registers *reg);
-
-/* Write a value to a specified register regno in the register image
-   of the current thread. */
-static int write_register (int regno, char *val);
-
-/* Write a value to a specified register in the stack of a thread other
-   than the current thread. */
-static int write_stack_register(int thread_id, int regno, char *valptr);
-
-/* Read a value from a specified register in the register image. Returns the
-   status of the read operation. The register value is returned in valptr. */
-static int read_register (char regno, unsigned int *valptr);
-
 /* Serial port, reads one character. ETRAX 100 specific. from debugport.c */
 int getDebugChar (void);
 
@@ -278,42 +238,6 @@ void putDebugChar (int val);
 
 void enableDebugIRQ (void);
 
-/* Returns the integer equivalent of a hexadecimal character. */
-static int hex (char ch);
-
-/* Convert the memory, pointed to by mem into hexadecimal representation.
-   Put the result in buf, and return a pointer to the last character
-   in buf (null). */
-static char *mem2hex (char *buf, unsigned char *mem, int count);
-
-/* Convert the array, in hexadecimal representation, pointed to by buf into
-   binary representation. Put the result in mem, and return a pointer to
-   the character after the last byte written. */
-static unsigned char *hex2mem (unsigned char *mem, char *buf, int count);
-
-/* Put the content of the array, in binary representation, pointed to by buf
-   into memory pointed to by mem, and return a pointer to
-   the character after the last byte written. */
-static unsigned char *bin2mem (unsigned char *mem, unsigned char *buf, int count);
-
-/* Await the sequence $<data>#<checksum> and store <data> in the array buffer
-   returned. */
-static void getpacket (char *buffer);
-
-/* Send $<data>#<checksum> from the <data> in the array buffer. */
-static void putpacket (char *buffer);
-
-/* Build and send a response packet in order to inform the host the
-   stub is stopped. */
-static void stub_is_stopped (int sigval);
-
-/* All expected commands are sent from remote.c. Send a response according
-   to the description in remote.c. */
-static void handle_exception (int sigval);
-
-/* Performs a complete re-start from scratch. ETRAX specific. */
-static void kill_restart (void);
-
 /******************** Prototypes for global functions. ***********************/
 
 /* The string str is prepended with the GDB printout token and sent. */
@@ -336,10 +260,6 @@ extern unsigned char executing_task;
 /* The number of characters used for a 64 bit thread identifier. */
 #define HEXCHARS_IN_THREAD_ID 16
 
-/* Avoid warning as the internal_stack is not used in the C-code. */
-#define USEDVAR(name)    { if (name) { ; } }
-#define USEDFUN(name) { void (*pf)(void) = (void *)name; USEDVAR(pf) }
-
 /********************************** Packet I/O ******************************/
 /* BUFMAX defines the maximum number of characters in
    inbound/outbound buffers */
@@ -405,7 +325,7 @@ static int register_size[] =
 
 /* Contains the register image of the executing thread in the assembler
    part of the code in order to avoid horrible addressing modes. */
-static registers reg;
+registers cris_reg;
 
 /* FIXME: Should this be used? Delete otherwise. */
 /* Contains the assumed consistency state of the register image. Uses the
@@ -413,7 +333,7 @@ static registers reg;
 static int consistency_status = SUCCESS;
 
 /********************************** Handle exceptions ************************/
-/* The variable reg contains the register image associated with the
+/* The variable cris_reg contains the register image associated with the
    current_thread_c variable. It is a complete register image created at
    entry. The reg_g contains a register image of a task where the general
    registers are taken from the stack and all special registers are taken
@@ -421,18 +341,10 @@ static int consistency_status = SUCCESS;
    in order to provide access mainly for 'g', 'G' and 'P'.
 */
 
-/* Need two task id pointers in order to handle Hct and Hgt commands. */
-static int current_thread_c = 0;
-static int current_thread_g = 0;
-
-/* Need two register images in order to handle Hct and Hgt commands. The
-   variable reg_g is in addition to reg above. */
-static registers reg_g;
-
 /********************************** Breakpoint *******************************/
 /* Use an internal stack in the breakpoint and interrupt response routines */
 #define INTERNAL_STACK_SIZE 1024
-static char internal_stack[INTERNAL_STACK_SIZE];
+char internal_stack[INTERNAL_STACK_SIZE];
 
 /* Due to the breakpoint return pointer, a state variable is needed to keep
    track of whether it is a static (compiled) or dynamic (gdb-invoked)
@@ -500,164 +412,6 @@ gdb_cris_strtol (const char *s, char **endptr, int base)
 	return x;
 }
 
-/********************************* Register image ****************************/
-/* Copy the content of a register image into another. The size n is
-   the size of the register image. Due to struct assignment generation of
-   memcpy in libc. */
-static void
-copy_registers (registers *dptr, registers *sptr, int n)
-{
-	unsigned char *dreg;
-	unsigned char *sreg;
-	
-	for (dreg = (unsigned char*)dptr, sreg = (unsigned char*)sptr; n > 0; n--)
-		*dreg++ = *sreg++;
-}
-
-#ifdef PROCESS_SUPPORT
-/* Copy the stored registers from the stack. Put the register contents
-   of thread thread_id in the struct reg. */
-static void
-copy_registers_from_stack (int thread_id, registers *regptr)
-{
-	int j;
-	stack_registers *s = (stack_registers *)stack_list[thread_id];
-	unsigned int *d = (unsigned int *)regptr;
-	
-	for (j = 13; j >= 0; j--)
-		*d++ = s->r[j];
-	regptr->sp = (unsigned int)stack_list[thread_id];
-	regptr->pc = s->pc;
-	regptr->dccr = s->dccr;
-	regptr->srp = s->srp;
-}
-
-/* Copy the registers to the stack. Put the register contents of thread
-   thread_id from struct reg to the stack. */
-static void
-copy_registers_to_stack (int thread_id, registers *regptr)
-{
-	int i;
-	stack_registers *d = (stack_registers *)stack_list[thread_id];
-	unsigned int *s = (unsigned int *)regptr;
-	
-	for (i = 0; i < 14; i++) {
-		d->r[i] = *s++;
-	}
-	d->pc = regptr->pc;
-	d->dccr = regptr->dccr;
-	d->srp = regptr->srp;
-}
-#endif
-
-/* Write a value to a specified register in the register image of the current
-   thread. Returns status code SUCCESS, E02 or E05. */
-static int
-write_register (int regno, char *val)
-{
-	int status = SUCCESS;
-	registers *current_reg = &reg;
-
-        if (regno >= R0 && regno <= PC) {
-		/* 32-bit register with simple offset. */
-		hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int),
-			 val, sizeof(unsigned int));
-	}
-        else if (regno == P0 || regno == VR || regno == P4 || regno == P8) {
-		/* Do not support read-only registers. */
-		status = E02;
-	}
-        else if (regno == CCR) {
-		/* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, 
-                   and P7 (MOF) is 32 bits in ETRAX 100LX. */
-		hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short),
-			 val, sizeof(unsigned short));
-	}
-	else if (regno >= MOF && regno <= USP) {
-		/* 32 bit register with complex offset.  (P8 has been taken care of.) */
-		hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int),
-			 val, sizeof(unsigned int));
-	} 
-        else {
-		/* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */
-		status = E05;
-	}
-	return status;
-}
-
-#ifdef PROCESS_SUPPORT
-/* Write a value to a specified register in the stack of a thread other
-   than the current thread. Returns status code SUCCESS or E07. */
-static int
-write_stack_register (int thread_id, int regno, char *valptr)
-{
-	int status = SUCCESS;
-	stack_registers *d = (stack_registers *)stack_list[thread_id];
-	unsigned int val;
-	
-	hex2mem ((unsigned char *)&val, valptr, sizeof(unsigned int));
-	if (regno >= R0 && regno < SP) {
-		d->r[regno] = val;
-	}
-	else if (regno == SP) {
-		stack_list[thread_id] = val;
-	}
-	else if (regno == PC) {
-		d->pc = val;
-	}
-	else if (regno == SRP) {
-		d->srp = val;
-	}
-	else if (regno == DCCR) {
-		d->dccr = val;
-	}
-	else {
-		/* Do not support registers in the current thread. */
-		status = E07;
-	}
-	return status;
-}
-#endif
-
-/* Read a value from a specified register in the register image. Returns the
-   value in the register or -1 for non-implemented registers.
-   Should check consistency_status after a call which may be E05 after changes
-   in the implementation. */
-static int
-read_register (char regno, unsigned int *valptr)
-{
-	registers *current_reg = &reg;
-
-	if (regno >= R0 && regno <= PC) {
-		/* 32-bit register with simple offset. */
-		*valptr = *(unsigned int *)((char *)current_reg + regno * sizeof(unsigned int));
-                return SUCCESS;
-	}
-	else if (regno == P0 || regno == VR) {
-		/* 8 bit register with complex offset. */
-		*valptr = (unsigned int)(*(unsigned char *)
-                                         ((char *)&(current_reg->p0) + (regno-P0) * sizeof(char)));
-                return SUCCESS;
-	}
-	else if (regno == P4 || regno == CCR) {
-		/* 16 bit register with complex offset. */
-		*valptr = (unsigned int)(*(unsigned short *)
-                                         ((char *)&(current_reg->p4) + (regno-P4) * sizeof(unsigned short)));
-                return SUCCESS;
-	}
-	else if (regno >= MOF && regno <= USP) {
-		/* 32 bit register with complex offset. */
-		*valptr = *(unsigned int *)((char *)&(current_reg->p8)
-                                            + (regno-P8) * sizeof(unsigned int));
-                return SUCCESS;
-	}
-	else {
-		/* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */
-		consistency_status = E05;
-		return E05;
-	}
-}
-
 /********************************** Packet I/O ******************************/
 /* Returns the integer equivalent of a hexadecimal character. */
 static int
@@ -676,8 +430,6 @@ hex (char ch)
    Put the result in buf, and return a pointer to the last character
    in buf (null). */
 
-static int do_printk = 0;
-
 static char *
 mem2hex(char *buf, unsigned char *mem, int count)
 {
@@ -761,7 +513,7 @@ getpacket (char *buffer)
 		xmitcsum = -1;
 		count = 0;
 		/* Read until a # or the end of the buffer is reached */
-		while (count < BUFMAX) {
+		while (count < BUFMAX - 1) {
 			ch = getDebugChar ();
 			if (ch == '#')
 				break;
@@ -845,6 +597,81 @@ putDebugString (const unsigned char *str, int length)
         putpacket(remcomOutBuffer);
 }
 
+/********************************* Register image ****************************/
+/* Write a value to a specified register in the register image of the current
+   thread. Returns status code SUCCESS, E02 or E05. */
+static int
+write_register (int regno, char *val)
+{
+	int status = SUCCESS;
+	registers *current_reg = &cris_reg;
+
+        if (regno >= R0 && regno <= PC) {
+		/* 32-bit register with simple offset. */
+		hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int),
+			 val, sizeof(unsigned int));
+	}
+        else if (regno == P0 || regno == VR || regno == P4 || regno == P8) {
+		/* Do not support read-only registers. */
+		status = E02;
+	}
+        else if (regno == CCR) {
+		/* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, 
+                   and P7 (MOF) is 32 bits in ETRAX 100LX. */
+		hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short),
+			 val, sizeof(unsigned short));
+	}
+	else if (regno >= MOF && regno <= USP) {
+		/* 32 bit register with complex offset.  (P8 has been taken care of.) */
+		hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int),
+			 val, sizeof(unsigned int));
+	} 
+        else {
+		/* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */
+		status = E05;
+	}
+	return status;
+}
+
+/* Read a value from a specified register in the register image. Returns the
+   value in the register or -1 for non-implemented registers.
+   Should check consistency_status after a call which may be E05 after changes
+   in the implementation. */
+static int
+read_register (char regno, unsigned int *valptr)
+{
+	registers *current_reg = &cris_reg;
+
+	if (regno >= R0 && regno <= PC) {
+		/* 32-bit register with simple offset. */
+		*valptr = *(unsigned int *)((char *)current_reg + regno * sizeof(unsigned int));
+                return SUCCESS;
+	}
+	else if (regno == P0 || regno == VR) {
+		/* 8 bit register with complex offset. */
+		*valptr = (unsigned int)(*(unsigned char *)
+                                         ((char *)&(current_reg->p0) + (regno-P0) * sizeof(char)));
+                return SUCCESS;
+	}
+	else if (regno == P4 || regno == CCR) {
+		/* 16 bit register with complex offset. */
+		*valptr = (unsigned int)(*(unsigned short *)
+                                         ((char *)&(current_reg->p4) + (regno-P4) * sizeof(unsigned short)));
+                return SUCCESS;
+	}
+	else if (regno >= MOF && regno <= USP) {
+		/* 32 bit register with complex offset. */
+		*valptr = *(unsigned int *)((char *)&(current_reg->p8)
+                                            + (regno-P8) * sizeof(unsigned int));
+                return SUCCESS;
+	}
+	else {
+		/* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */
+		consistency_status = E05;
+		return E05;
+	}
+}
+
 /********************************** Handle exceptions ************************/
 /* Build and send a response packet in order to inform the host the
    stub is stopped. TAAn...:r...;n...:r...;n...:r...;
@@ -891,26 +718,6 @@ stub_is_stopped(int sigval)
                 
 	}
 
-#ifdef PROCESS_SUPPORT
-	/* Store the registers of the executing thread. Assume that both step,
-	   continue, and register content requests are with respect to this
-	   thread. The executing task is from the operating system scheduler. */
-
-	current_thread_c = executing_task;
-	current_thread_g = executing_task;
-
-	/* A struct assignment translates into a libc memcpy call. Avoid
-	   all libc functions in order to prevent recursive break points. */
-	copy_registers (&reg_g, &reg, sizeof(registers));
-
-	/* Store thread:r...; with the executing task TID. */
-	gdb_cris_strcpy (&remcomOutBuffer[pos], "thread:");
-	pos += gdb_cris_strlen ("thread:");
-	remcomOutBuffer[pos++] = hex_asc_hi(executing_task);
-	remcomOutBuffer[pos++] = hex_asc_lo(executing_task);
-	gdb_cris_strcpy (&remcomOutBuffer[pos], ";");
-#endif
-
 	/* null-terminate and send it off */
 
 	*ptr = 0;
@@ -918,16 +725,18 @@ stub_is_stopped(int sigval)
 	putpacket (remcomOutBuffer);
 }
 
+/* Performs a complete re-start from scratch. */
+static void
+kill_restart (void)
+{
+	machine_restart("");
+}
+
 /* All expected commands are sent from remote.c. Send a response according
    to the description in remote.c. */
-static void
+void
 handle_exception (int sigval)
 {
-	/* Avoid warning of not used. */
-
-	USEDFUN(handle_exception);
-	USEDVAR(internal_stack[0]);
-
 	/* Send response. */
 
 	stub_is_stopped (sigval);
@@ -943,19 +752,7 @@ handle_exception (int sigval)
 				   in a register  are in the same order the machine uses.
 				   Failure: void. */
 				
-				{
-#ifdef PROCESS_SUPPORT
-					/* Use the special register content in the executing thread. */
-					copy_registers (&reg_g, &reg, sizeof(registers));
-					/* Replace the content available on the stack. */
-					if (current_thread_g != executing_task) {
-						copy_registers_from_stack (current_thread_g, &reg_g);
-					}
-					mem2hex ((unsigned char *)remcomOutBuffer, (unsigned char *)&reg_g, sizeof(registers));
-#else
-					mem2hex(remcomOutBuffer, (char *)&reg, sizeof(registers));
-#endif
-				}
+				mem2hex(remcomOutBuffer, (char *)&cris_reg, sizeof(registers));
 				break;
 				
 			case 'G':
@@ -963,17 +760,7 @@ handle_exception (int sigval)
 				   Each byte of register data  is described by two hex digits.
 				   Success: OK
 				   Failure: void. */
-#ifdef PROCESS_SUPPORT
-				hex2mem ((unsigned char *)&reg_g, &remcomInBuffer[1], sizeof(registers));
-				if (current_thread_g == executing_task) {
-					copy_registers (&reg, &reg_g, sizeof(registers));
-				}
-				else {
-					copy_registers_to_stack(current_thread_g, &reg_g);
-				}
-#else
-				hex2mem((char *)&reg, &remcomInBuffer[1], sizeof(registers));
-#endif
+				hex2mem((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers));
 				gdb_cris_strcpy (remcomOutBuffer, "OK");
 				break;
 				
@@ -989,12 +776,7 @@ handle_exception (int sigval)
 					char *suffix;
 					int regno = gdb_cris_strtol (&remcomInBuffer[1], &suffix, 16);
 					int status;
-#ifdef PROCESS_SUPPORT
-					if (current_thread_g != executing_task)
-						status = write_stack_register (current_thread_g, regno, suffix+1);
-					else
-#endif
-						status = write_register (regno, suffix+1);
+					status = write_register (regno, suffix+1);
 
 					switch (status) {
 						case E02:
@@ -1073,7 +855,7 @@ handle_exception (int sigval)
 				   Success: return to the executing thread.
 				   Failure: will never know. */
 				if (remcomInBuffer[1] != '\0') {
-					reg.pc = gdb_cris_strtol (&remcomInBuffer[1], 0, 16);
+					cris_reg.pc = gdb_cris_strtol (&remcomInBuffer[1], 0, 16);
 				}
 				enableDebugIRQ();
 				return;
@@ -1129,119 +911,6 @@ handle_exception (int sigval)
 				   Not supported: E04 */
 				gdb_cris_strcpy (remcomOutBuffer, error_message[E04]);
 				break;
-#ifdef PROCESS_SUPPORT
-
-			case 'T':
-				/* Thread alive. TXX
-				   Is thread XX alive?
-				   Success: OK, thread XX is alive.
-				   Failure: E03, thread XX is dead. */
-				{
-					int thread_id = (int)gdb_cris_strtol (&remcomInBuffer[1], 0, 16);
-					/* Cannot tell whether it is alive or not. */
-					if (thread_id >= 0 && thread_id < number_of_tasks)
-						gdb_cris_strcpy (remcomOutBuffer, "OK");
-				}
-				break;
-								
-			case 'H':
-				/* Set thread for subsequent operations: Hct
-				   c = 'c' for thread used in step and continue;
-				   t can be -1 for all threads.
-				   c = 'g' for thread used in other  operations.
-				   t = 0 means pick any thread.
-				   Success: OK
-				   Failure: E01 */
-				{
-					int thread_id = gdb_cris_strtol (&remcomInBuffer[2], 0, 16);
-					if (remcomInBuffer[1] == 'c') {
-						/* c = 'c' for thread used in step and continue */
-						/* Do not change current_thread_c here. It would create a mess in
-						   the scheduler. */
-						gdb_cris_strcpy (remcomOutBuffer, "OK");
-					}
-					else if (remcomInBuffer[1] == 'g') {
-						/* c = 'g' for thread used in other  operations.
-						   t = 0 means pick any thread. Impossible since the scheduler does
-						   not allow that. */
-						if (thread_id >= 0 && thread_id < number_of_tasks) {
-							current_thread_g = thread_id;
-							gdb_cris_strcpy (remcomOutBuffer, "OK");
-						}
-						else {
-							/* Not expected - send an error message. */
-							gdb_cris_strcpy (remcomOutBuffer, error_message[E01]);
-						}
-					}
-					else {
-						/* Not expected - send an error message. */
-						gdb_cris_strcpy (remcomOutBuffer, error_message[E01]);
-					}
-				}
-				break;
-				
-			case 'q':
-			case 'Q':
-				/* Query of general interest. qXXXX
-				   Set general value XXXX. QXXXX=yyyy */
-				{
-					int pos;
-					int nextpos;
-					int thread_id;
-					
-					switch (remcomInBuffer[1]) {
-						case 'C':
-							/* Identify the remote current thread. */
-							gdb_cris_strcpy (&remcomOutBuffer[0], "QC");
-							remcomOutBuffer[2] = hex_asc_hi(current_thread_c);
-							remcomOutBuffer[3] = hex_asc_lo(current_thread_c);
-							remcomOutBuffer[4] = '\0';
-							break;
-						case 'L':
-							gdb_cris_strcpy (&remcomOutBuffer[0], "QM");
-							/* Reply with number of threads. */
-							if (os_is_started()) {
-								remcomOutBuffer[2] = hex_asc_hi(number_of_tasks);
-								remcomOutBuffer[3] = hex_asc_lo(number_of_tasks);
-							}
-							else {
-								remcomOutBuffer[2] = hex_asc_hi(0);
-								remcomOutBuffer[3] = hex_asc_lo(1);
-							}
-							/* Done with the reply. */
-							remcomOutBuffer[4] = hex_asc_lo(1);
-							pos = 5;
-							/* Expects the argument thread id. */
-							for (; pos < (5 + HEXCHARS_IN_THREAD_ID); pos++)
-								remcomOutBuffer[pos] = remcomInBuffer[pos];
-							/* Reply with the thread identifiers. */
-							if (os_is_started()) {
-								/* Store the thread identifiers of all tasks. */
-								for (thread_id = 0; thread_id < number_of_tasks; thread_id++) {
-									nextpos = pos + HEXCHARS_IN_THREAD_ID - 1;
-									for (; pos < nextpos; pos ++)
-										remcomOutBuffer[pos] = hex_asc_lo(0);
-									remcomOutBuffer[pos++] = hex_asc_lo(thread_id);
-								}
-							}
-							else {
-								/* Store the thread identifier of the boot task. */
-								nextpos = pos + HEXCHARS_IN_THREAD_ID - 1;
-								for (; pos < nextpos; pos ++)
-									remcomOutBuffer[pos] = hex_asc_lo(0);
-								remcomOutBuffer[pos++] = hex_asc_lo(current_thread_c);
-							}
-							remcomOutBuffer[pos] = '\0';
-							break;
-						default:
-							/* Not supported: "" */
-							/* Request information about section offsets: qOffsets. */
-							remcomOutBuffer[0] = 0;
-							break;
-					}
-				}
-				break;
-#endif /* PROCESS_SUPPORT */
 				
 			default:
 				/* The stub should ignore other request and send an empty
@@ -1254,13 +923,6 @@ handle_exception (int sigval)
 	}
 }
 
-/* Performs a complete re-start from scratch. */
-static void
-kill_restart ()
-{
-	machine_restart("");
-}
-
 /********************************** Breakpoint *******************************/
 /* The hook for both a static (compiled) and a dynamic breakpoint set by GDB.
    An internal stack is used by the stub. The register image of the caller is
@@ -1270,93 +932,93 @@ kill_restart ()
 
 void kgdb_handle_breakpoint(void);
 
-asm ("
-  .global kgdb_handle_breakpoint
-kgdb_handle_breakpoint:
-;;
-;; Response to the break-instruction
-;;
-;; Create a register image of the caller
-;;
-  move     $dccr,[reg+0x5E] ; Save the flags in DCCR before disable interrupts
-  di                        ; Disable interrupts
-  move.d   $r0,[reg]        ; Save R0
-  move.d   $r1,[reg+0x04]   ; Save R1
-  move.d   $r2,[reg+0x08]   ; Save R2
-  move.d   $r3,[reg+0x0C]   ; Save R3
-  move.d   $r4,[reg+0x10]   ; Save R4
-  move.d   $r5,[reg+0x14]   ; Save R5
-  move.d   $r6,[reg+0x18]   ; Save R6
-  move.d   $r7,[reg+0x1C]   ; Save R7
-  move.d   $r8,[reg+0x20]   ; Save R8
-  move.d   $r9,[reg+0x24]   ; Save R9
-  move.d   $r10,[reg+0x28]  ; Save R10
-  move.d   $r11,[reg+0x2C]  ; Save R11
-  move.d   $r12,[reg+0x30]  ; Save R12
-  move.d   $r13,[reg+0x34]  ; Save R13
-  move.d   $sp,[reg+0x38]   ; Save SP (R14)
-;; Due to the old assembler-versions BRP might not be recognized
-  .word 0xE670              ; move brp,$r0
-  subq     2,$r0             ; Set to address of previous instruction.
-  move.d   $r0,[reg+0x3c]   ; Save the address in PC (R15)
-  clear.b  [reg+0x40]      ; Clear P0
-  move     $vr,[reg+0x41]   ; Save special register P1
-  clear.w  [reg+0x42]      ; Clear P4
-  move     $ccr,[reg+0x44]  ; Save special register CCR
-  move     $mof,[reg+0x46]  ; P7
-  clear.d  [reg+0x4A]      ; Clear P8
-  move     $ibr,[reg+0x4E]  ; P9,
-  move     $irp,[reg+0x52]  ; P10,
-  move     $srp,[reg+0x56]  ; P11,
-  move     $dtp0,[reg+0x5A] ; P12, register BAR, assembler might not know BAR
-                            ; P13, register DCCR already saved
-;; Due to the old assembler-versions BRP might not be recognized
-  .word 0xE670              ; move brp,r0
-;; Static (compiled) breakpoints must return to the next instruction in order
-;; to avoid infinite loops. Dynamic (gdb-invoked) must restore the instruction
-;; in order to execute it when execution is continued.
-  test.b   [is_dyn_brkp]    ; Is this a dynamic breakpoint?
-  beq      is_static         ; No, a static breakpoint
-  nop
-  subq     2,$r0              ; rerun the instruction the break replaced
-is_static:
-  moveq    1,$r1
-  move.b   $r1,[is_dyn_brkp] ; Set the state variable to dynamic breakpoint
-  move.d   $r0,[reg+0x62]    ; Save the return address in BRP
-  move     $usp,[reg+0x66]   ; USP
-;;
-;; Handle the communication
-;;
-  move.d   internal_stack+1020,$sp ; Use the internal stack which grows upward
-  moveq    5,$r10                   ; SIGTRAP
-  jsr      handle_exception       ; Interactive routine
-;;
-;; Return to the caller
-;;
-   move.d  [reg],$r0         ; Restore R0
-   move.d  [reg+0x04],$r1    ; Restore R1
-   move.d  [reg+0x08],$r2    ; Restore R2
-   move.d  [reg+0x0C],$r3    ; Restore R3
-   move.d  [reg+0x10],$r4    ; Restore R4
-   move.d  [reg+0x14],$r5    ; Restore R5
-   move.d  [reg+0x18],$r6    ; Restore R6
-   move.d  [reg+0x1C],$r7    ; Restore R7
-   move.d  [reg+0x20],$r8    ; Restore R8
-   move.d  [reg+0x24],$r9    ; Restore R9
-   move.d  [reg+0x28],$r10   ; Restore R10
-   move.d  [reg+0x2C],$r11   ; Restore R11
-   move.d  [reg+0x30],$r12   ; Restore R12
-   move.d  [reg+0x34],$r13   ; Restore R13
-;;
-;; FIXME: Which registers should be restored?
-;;
-   move.d  [reg+0x38],$sp    ; Restore SP (R14)
-   move    [reg+0x56],$srp   ; Restore the subroutine return pointer.
-   move    [reg+0x5E],$dccr  ; Restore DCCR
-   move    [reg+0x66],$usp   ; Restore USP
-   jump    [reg+0x62]       ; A jump to the content in register BRP works.
-   nop                       ;
-");
+asm ("\n"
+"  .global kgdb_handle_breakpoint\n"
+"kgdb_handle_breakpoint:\n"
+";;\n"
+";; Response to the break-instruction\n"
+";;\n"
+";; Create a register image of the caller\n"
+";;\n"
+"  move     $dccr,[cris_reg+0x5E] ; Save the flags in DCCR before disable interrupts\n"
+"  di                        ; Disable interrupts\n"
+"  move.d   $r0,[cris_reg]        ; Save R0\n"
+"  move.d   $r1,[cris_reg+0x04]   ; Save R1\n"
+"  move.d   $r2,[cris_reg+0x08]   ; Save R2\n"
+"  move.d   $r3,[cris_reg+0x0C]   ; Save R3\n"
+"  move.d   $r4,[cris_reg+0x10]   ; Save R4\n"
+"  move.d   $r5,[cris_reg+0x14]   ; Save R5\n"
+"  move.d   $r6,[cris_reg+0x18]   ; Save R6\n"
+"  move.d   $r7,[cris_reg+0x1C]   ; Save R7\n"
+"  move.d   $r8,[cris_reg+0x20]   ; Save R8\n"
+"  move.d   $r9,[cris_reg+0x24]   ; Save R9\n"
+"  move.d   $r10,[cris_reg+0x28]  ; Save R10\n"
+"  move.d   $r11,[cris_reg+0x2C]  ; Save R11\n"
+"  move.d   $r12,[cris_reg+0x30]  ; Save R12\n"
+"  move.d   $r13,[cris_reg+0x34]  ; Save R13\n"
+"  move.d   $sp,[cris_reg+0x38]   ; Save SP (R14)\n"
+";; Due to the old assembler-versions BRP might not be recognized\n"
+"  .word 0xE670              ; move brp,$r0\n"
+"  subq     2,$r0             ; Set to address of previous instruction.\n"
+"  move.d   $r0,[cris_reg+0x3c]   ; Save the address in PC (R15)\n"
+"  clear.b  [cris_reg+0x40]      ; Clear P0\n"
+"  move     $vr,[cris_reg+0x41]   ; Save special register P1\n"
+"  clear.w  [cris_reg+0x42]      ; Clear P4\n"
+"  move     $ccr,[cris_reg+0x44]  ; Save special register CCR\n"
+"  move     $mof,[cris_reg+0x46]  ; P7\n"
+"  clear.d  [cris_reg+0x4A]      ; Clear P8\n"
+"  move     $ibr,[cris_reg+0x4E]  ; P9,\n"
+"  move     $irp,[cris_reg+0x52]  ; P10,\n"
+"  move     $srp,[cris_reg+0x56]  ; P11,\n"
+"  move     $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n"
+"                            ; P13, register DCCR already saved\n"
+";; Due to the old assembler-versions BRP might not be recognized\n"
+"  .word 0xE670              ; move brp,r0\n"
+";; Static (compiled) breakpoints must return to the next instruction in order\n"
+";; to avoid infinite loops. Dynamic (gdb-invoked) must restore the instruction\n"
+";; in order to execute it when execution is continued.\n"
+"  test.b   [is_dyn_brkp]    ; Is this a dynamic breakpoint?\n"
+"  beq      is_static         ; No, a static breakpoint\n"
+"  nop\n"
+"  subq     2,$r0              ; rerun the instruction the break replaced\n"
+"is_static:\n"
+"  moveq    1,$r1\n"
+"  move.b   $r1,[is_dyn_brkp] ; Set the state variable to dynamic breakpoint\n"
+"  move.d   $r0,[cris_reg+0x62]    ; Save the return address in BRP\n"
+"  move     $usp,[cris_reg+0x66]   ; USP\n"
+";;\n"
+";; Handle the communication\n"
+";;\n"
+"  move.d   internal_stack+1020,$sp ; Use the internal stack which grows upward\n"
+"  moveq    5,$r10                   ; SIGTRAP\n"
+"  jsr      handle_exception       ; Interactive routine\n"
+";;\n"
+";; Return to the caller\n"
+";;\n"
+"   move.d  [cris_reg],$r0         ; Restore R0\n"
+"   move.d  [cris_reg+0x04],$r1    ; Restore R1\n"
+"   move.d  [cris_reg+0x08],$r2    ; Restore R2\n"
+"   move.d  [cris_reg+0x0C],$r3    ; Restore R3\n"
+"   move.d  [cris_reg+0x10],$r4    ; Restore R4\n"
+"   move.d  [cris_reg+0x14],$r5    ; Restore R5\n"
+"   move.d  [cris_reg+0x18],$r6    ; Restore R6\n"
+"   move.d  [cris_reg+0x1C],$r7    ; Restore R7\n"
+"   move.d  [cris_reg+0x20],$r8    ; Restore R8\n"
+"   move.d  [cris_reg+0x24],$r9    ; Restore R9\n"
+"   move.d  [cris_reg+0x28],$r10   ; Restore R10\n"
+"   move.d  [cris_reg+0x2C],$r11   ; Restore R11\n"
+"   move.d  [cris_reg+0x30],$r12   ; Restore R12\n"
+"   move.d  [cris_reg+0x34],$r13   ; Restore R13\n"
+";;\n"
+";; FIXME: Which registers should be restored?\n"
+";;\n"
+"   move.d  [cris_reg+0x38],$sp    ; Restore SP (R14)\n"
+"   move    [cris_reg+0x56],$srp   ; Restore the subroutine return pointer.\n"
+"   move    [cris_reg+0x5E],$dccr  ; Restore DCCR\n"
+"   move    [cris_reg+0x66],$usp   ; Restore USP\n"
+"   jump    [cris_reg+0x62]       ; A jump to the content in register BRP works.\n"
+"   nop                       ;\n"
+"\n");
 
 /* The hook for an interrupt generated by GDB. An internal stack is used
    by the stub. The register image of the caller is stored in the structure
@@ -1367,94 +1029,94 @@ is_static:
 
 void kgdb_handle_serial(void);
 
-asm ("
-  .global kgdb_handle_serial
-kgdb_handle_serial:
-;;
-;; Response to a serial interrupt
-;;
-
-  move     $dccr,[reg+0x5E] ; Save the flags in DCCR
-  di                        ; Disable interrupts
-  move.d   $r0,[reg]        ; Save R0
-  move.d   $r1,[reg+0x04]   ; Save R1
-  move.d   $r2,[reg+0x08]   ; Save R2
-  move.d   $r3,[reg+0x0C]   ; Save R3
-  move.d   $r4,[reg+0x10]   ; Save R4
-  move.d   $r5,[reg+0x14]   ; Save R5
-  move.d   $r6,[reg+0x18]   ; Save R6
-  move.d   $r7,[reg+0x1C]   ; Save R7
-  move.d   $r8,[reg+0x20]   ; Save R8
-  move.d   $r9,[reg+0x24]   ; Save R9
-  move.d   $r10,[reg+0x28]  ; Save R10
-  move.d   $r11,[reg+0x2C]  ; Save R11
-  move.d   $r12,[reg+0x30]  ; Save R12
-  move.d   $r13,[reg+0x34]  ; Save R13
-  move.d   $sp,[reg+0x38]   ; Save SP (R14)
-  move     $irp,[reg+0x3c]  ; Save the address in PC (R15)
-  clear.b  [reg+0x40]      ; Clear P0
-  move     $vr,[reg+0x41]   ; Save special register P1,
-  clear.w  [reg+0x42]      ; Clear P4
-  move     $ccr,[reg+0x44]  ; Save special register CCR
-  move     $mof,[reg+0x46]  ; P7
-  clear.d  [reg+0x4A]      ; Clear P8
-  move     $ibr,[reg+0x4E]  ; P9,
-  move     $irp,[reg+0x52]  ; P10,
-  move     $srp,[reg+0x56]  ; P11,
-  move     $dtp0,[reg+0x5A] ; P12, register BAR, assembler might not know BAR
-                            ; P13, register DCCR already saved
-;; Due to the old assembler-versions BRP might not be recognized
-  .word 0xE670              ; move brp,r0
-  move.d   $r0,[reg+0x62]   ; Save the return address in BRP
-  move     $usp,[reg+0x66]  ; USP
-
-;; get the serial character (from debugport.c) and check if it is a ctrl-c
-
-  jsr getDebugChar
-  cmp.b 3, $r10
-  bne goback
-  nop
-
-  move.d  [reg+0x5E], $r10		; Get DCCR
-  btstq	   8, $r10			; Test the U-flag.
-  bmi	   goback
-  nop
-
-;;
-;; Handle the communication
-;;
-  move.d   internal_stack+1020,$sp ; Use the internal stack
-  moveq    2,$r10                   ; SIGINT
-  jsr      handle_exception       ; Interactive routine
-
-goback:
-;;
-;; Return to the caller
-;;
-   move.d  [reg],$r0         ; Restore R0
-   move.d  [reg+0x04],$r1    ; Restore R1
-   move.d  [reg+0x08],$r2    ; Restore R2
-   move.d  [reg+0x0C],$r3    ; Restore R3
-   move.d  [reg+0x10],$r4    ; Restore R4
-   move.d  [reg+0x14],$r5    ; Restore R5
-   move.d  [reg+0x18],$r6    ; Restore R6
-   move.d  [reg+0x1C],$r7    ; Restore R7
-   move.d  [reg+0x20],$r8    ; Restore R8
-   move.d  [reg+0x24],$r9    ; Restore R9
-   move.d  [reg+0x28],$r10   ; Restore R10
-   move.d  [reg+0x2C],$r11   ; Restore R11
-   move.d  [reg+0x30],$r12   ; Restore R12
-   move.d  [reg+0x34],$r13   ; Restore R13
-;;
-;; FIXME: Which registers should be restored?
-;;
-   move.d  [reg+0x38],$sp    ; Restore SP (R14)
-   move    [reg+0x56],$srp   ; Restore the subroutine return pointer.
-   move    [reg+0x5E],$dccr  ; Restore DCCR
-   move    [reg+0x66],$usp   ; Restore USP
-   reti                      ; Return from the interrupt routine
-   nop
-");
+asm ("\n"
+"  .global kgdb_handle_serial\n"
+"kgdb_handle_serial:\n"
+";;\n"
+";; Response to a serial interrupt\n"
+";;\n"
+"\n"
+"  move     $dccr,[cris_reg+0x5E] ; Save the flags in DCCR\n"
+"  di                        ; Disable interrupts\n"
+"  move.d   $r0,[cris_reg]        ; Save R0\n"
+"  move.d   $r1,[cris_reg+0x04]   ; Save R1\n"
+"  move.d   $r2,[cris_reg+0x08]   ; Save R2\n"
+"  move.d   $r3,[cris_reg+0x0C]   ; Save R3\n"
+"  move.d   $r4,[cris_reg+0x10]   ; Save R4\n"
+"  move.d   $r5,[cris_reg+0x14]   ; Save R5\n"
+"  move.d   $r6,[cris_reg+0x18]   ; Save R6\n"
+"  move.d   $r7,[cris_reg+0x1C]   ; Save R7\n"
+"  move.d   $r8,[cris_reg+0x20]   ; Save R8\n"
+"  move.d   $r9,[cris_reg+0x24]   ; Save R9\n"
+"  move.d   $r10,[cris_reg+0x28]  ; Save R10\n"
+"  move.d   $r11,[cris_reg+0x2C]  ; Save R11\n"
+"  move.d   $r12,[cris_reg+0x30]  ; Save R12\n"
+"  move.d   $r13,[cris_reg+0x34]  ; Save R13\n"
+"  move.d   $sp,[cris_reg+0x38]   ; Save SP (R14)\n"
+"  move     $irp,[cris_reg+0x3c]  ; Save the address in PC (R15)\n"
+"  clear.b  [cris_reg+0x40]      ; Clear P0\n"
+"  move     $vr,[cris_reg+0x41]   ; Save special register P1,\n"
+"  clear.w  [cris_reg+0x42]      ; Clear P4\n"
+"  move     $ccr,[cris_reg+0x44]  ; Save special register CCR\n"
+"  move     $mof,[cris_reg+0x46]  ; P7\n"
+"  clear.d  [cris_reg+0x4A]      ; Clear P8\n"
+"  move     $ibr,[cris_reg+0x4E]  ; P9,\n"
+"  move     $irp,[cris_reg+0x52]  ; P10,\n"
+"  move     $srp,[cris_reg+0x56]  ; P11,\n"
+"  move     $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n"
+"                            ; P13, register DCCR already saved\n"
+";; Due to the old assembler-versions BRP might not be recognized\n"
+"  .word 0xE670              ; move brp,r0\n"
+"  move.d   $r0,[cris_reg+0x62]   ; Save the return address in BRP\n"
+"  move     $usp,[cris_reg+0x66]  ; USP\n"
+"\n"
+";; get the serial character (from debugport.c) and check if it is a ctrl-c\n"
+"\n"
+"  jsr getDebugChar\n"
+"  cmp.b 3, $r10\n"
+"  bne goback\n"
+"  nop\n"
+"\n"
+"  move.d  [cris_reg+0x5E], $r10		; Get DCCR\n"
+"  btstq	   8, $r10			; Test the U-flag.\n"
+"  bmi	   goback\n"
+"  nop\n"
+"\n"
+";;\n"
+";; Handle the communication\n"
+";;\n"
+"  move.d   internal_stack+1020,$sp ; Use the internal stack\n"
+"  moveq    2,$r10                   ; SIGINT\n"
+"  jsr      handle_exception       ; Interactive routine\n"
+"\n"
+"goback:\n"
+";;\n"
+";; Return to the caller\n"
+";;\n"
+"   move.d  [cris_reg],$r0         ; Restore R0\n"
+"   move.d  [cris_reg+0x04],$r1    ; Restore R1\n"
+"   move.d  [cris_reg+0x08],$r2    ; Restore R2\n"
+"   move.d  [cris_reg+0x0C],$r3    ; Restore R3\n"
+"   move.d  [cris_reg+0x10],$r4    ; Restore R4\n"
+"   move.d  [cris_reg+0x14],$r5    ; Restore R5\n"
+"   move.d  [cris_reg+0x18],$r6    ; Restore R6\n"
+"   move.d  [cris_reg+0x1C],$r7    ; Restore R7\n"
+"   move.d  [cris_reg+0x20],$r8    ; Restore R8\n"
+"   move.d  [cris_reg+0x24],$r9    ; Restore R9\n"
+"   move.d  [cris_reg+0x28],$r10   ; Restore R10\n"
+"   move.d  [cris_reg+0x2C],$r11   ; Restore R11\n"
+"   move.d  [cris_reg+0x30],$r12   ; Restore R12\n"
+"   move.d  [cris_reg+0x34],$r13   ; Restore R13\n"
+";;\n"
+";; FIXME: Which registers should be restored?\n"
+";;\n"
+"   move.d  [cris_reg+0x38],$sp    ; Restore SP (R14)\n"
+"   move    [cris_reg+0x56],$srp   ; Restore the subroutine return pointer.\n"
+"   move    [cris_reg+0x5E],$dccr  ; Restore DCCR\n"
+"   move    [cris_reg+0x66],$usp   ; Restore USP\n"
+"   reti                      ; Return from the interrupt routine\n"
+"   nop\n"
+"\n");
 
 /* Use this static breakpoint in the start-up only. */
 
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index ab725edbc68..acff3df8c43 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -640,8 +640,6 @@ config ETRAX_STREAMCOPROC
 	  This option enables a driver for the stream co-processor
 	  for cryptographic operations.
 
-source drivers/mmc/Kconfig
-
 config ETRAX_MMC_IOP
 	tristate "MMC/SD host driver using IO-processor"
 	depends on ETRAX_ARCH_V32 && MMC
@@ -833,9 +831,4 @@ config ETRAX_SPI_MMC_WP_GPIO_PIN
 	  The pin to use for the SD/MMC write-protect signal for a memory
 	  card.  If defined as " " (space), the card is considered writable.
 
-# Avoid choices causing non-working configs by conditionalizing the inclusion.
-if ETRAX_SPI_MMC
-source drivers/spi/Kconfig
-endif
-
 endif
diff --git a/arch/cris/include/arch-v10/arch/bitops.h b/arch/cris/include/arch-v10/arch/bitops.h
index be85f6de25d..03d9cfd92c8 100644
--- a/arch/cris/include/arch-v10/arch/bitops.h
+++ b/arch/cris/include/arch-v10/arch/bitops.h
@@ -17,7 +17,7 @@ static inline unsigned long cris_swapnwbrlz(unsigned long w)
 	   in another register:
 	   !  __asm__ ("swapnwbr %2\n\tlz %2,%0"
 	   !	      : "=r,r" (res), "=r,X" (dummy) : "1,0" (w));
-	   confuses gcc (sched.c, gcc from cris-dist-1.14).  */
+	   confuses gcc (core.c, gcc from cris-dist-1.14).  */
 
 	unsigned long res;
 	__asm__ ("swapnwbr %0 \n\t"
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index f1e79edc9dd..c8325455520 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -5,5 +5,9 @@ header-y += arch-v32/
 
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += kvm_para.h
+generic-y += linkage.h
 generic-y += module.h
 generic-y += trace_clock.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h
index ac12ae2b928..5d3047e5563 100644
--- a/arch/cris/include/asm/io.h
+++ b/arch/cris/include/asm/io.h
@@ -167,6 +167,9 @@ static inline void outsl(unsigned int port, const void *addr,
 		cris_iops->write_io(port, (void *)addr, 4, count);
 }
 
+#define inb_p(port)             inb(port)
+#define outb_p(val, port)       outb((val), (port))
+
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
  * access
diff --git a/arch/cris/include/asm/linkage.h b/arch/cris/include/asm/linkage.h
deleted file mode 100644
index 291c2d01c44..00000000000
--- a/arch/cris/include/asm/linkage.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-/* Nothing to see here... */
-
-#endif
diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h
index be45ee366be..dfc53f9b88e 100644
--- a/arch/cris/include/asm/page.h
+++ b/arch/cris/include/asm/page.h
@@ -51,7 +51,6 @@ typedef struct page *pgtable_t;
  */ 
 
 #define virt_to_page(kaddr)    (mem_map + (((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT))
-#define VALID_PAGE(page)       (((page) - mem_map) < max_mapnr)
 #define virt_addr_valid(kaddr)	pfn_valid((unsigned)(kaddr) >> PAGE_SHIFT)
 
 /* convert a page (based on mem_map and forward) to a physical address
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 9ac80946dad..c81af5bd916 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -19,9 +19,6 @@ unsigned long empty_zero_page;
 void __init
 mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-	unsigned long tmp;
-
 	BUG_ON(!mem_map);
 
 	/* max/min_low_pfn was set by setup.c
@@ -29,35 +26,9 @@ mem_init(void)
 	 *
 	 * high_memory was also set in setup.c
 	 */
-
-	max_mapnr = num_physpages = max_low_pfn - min_low_pfn;
- 
-	/* this will put all memory onto the freelists */
-        totalram_pages = free_all_bootmem();
-
-	reservedpages = 0;
-	for (tmp = 0; tmp < max_mapnr; tmp++) {
-		/*
-                 * Only count reserved RAM pages
-                 */
-		if (PageReserved(mem_map + tmp))
-			reservedpages++;
-	}
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_stext;
-        datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-        initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-	
-        printk(KERN_INFO
-               "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, "
-	       "%dk init)\n" ,
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       max_mapnr << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10
-               );
+	max_mapnr = max_low_pfn - min_low_pfn;
+        free_all_bootmem();
+	mem_init_print_info(NULL);
 }
 
 /* free the pages occupied by initialization code */
@@ -65,5 +36,5 @@ mem_init(void)
 void 
 free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h
index 0b67ec5b441..3ac9a59d65d 100644
--- a/arch/frv/include/asm/uaccess.h
+++ b/arch/frv/include/asm/uaccess.h
@@ -280,14 +280,14 @@ extern long __memcpy_user(void *dst, const void *src, unsigned long count);
 static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       might_sleep();
+       might_fault();
        return __copy_to_user_inatomic(to, from, n);
 }
 
 static inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       might_sleep();
+       might_fault();
        return __copy_from_user_inatomic(to, from, n);
 }
 
diff --git a/arch/frv/kernel/head.S b/arch/frv/kernel/head.S
index e9a8cc63ac9..a7d0bea9c03 100644
--- a/arch/frv/kernel/head.S
+++ b/arch/frv/kernel/head.S
@@ -479,11 +479,6 @@ __head_mmu_enabled:
 
 	LEDS		0x000c
 
-	# initialise the processor and the peripherals
-	#call		SYMBOL_NAME(processor_init)
-	#call		SYMBOL_NAME(unit_init)
-	#LEDS		0x0aff
-
 	sethi.p		#0xe5e5,gr3
 	setlo		#0xe5e5,gr3
 	or.p		gr3,gr0,gr4
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index a5136474c6f..ae3a6706419 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -735,7 +735,7 @@ static void __init parse_cmdline_early(char *cmdline)
 		/* "mem=XXX[kKmM]" sets SDRAM size to <mem>, overriding the value we worked
 		 * out from the SDRAM controller mask register
 		 */
-		if (!memcmp(cmdline, "mem=", 4)) {
+		if (!strncmp(cmdline, "mem=", 4)) {
 			unsigned long long mem_size;
 
 			mem_size = memparse(cmdline + 4, &cmdline);
@@ -876,6 +876,7 @@ late_initcall(setup_arch_serial);
 static void __init setup_linux_memory(void)
 {
 	unsigned long bootmap_size, low_top_pfn, kstart, kend, high_mem;
+	unsigned long physpages;
 
 	kstart	= (unsigned long) &__kernel_image_start - PAGE_OFFSET;
 	kend	= (unsigned long) &__kernel_image_end - PAGE_OFFSET;
@@ -893,19 +894,19 @@ static void __init setup_linux_memory(void)
 					 );
 
 	/* pass the memory that the kernel can immediately use over to the bootmem allocator */
-	max_mapnr = num_physpages = (memory_end - memory_start) >> PAGE_SHIFT;
+	max_mapnr = physpages = (memory_end - memory_start) >> PAGE_SHIFT;
 	low_top_pfn = (KERNEL_LOWMEM_END - KERNEL_LOWMEM_START) >> PAGE_SHIFT;
 	high_mem = 0;
 
-	if (num_physpages > low_top_pfn) {
+	if (physpages > low_top_pfn) {
 #ifdef CONFIG_HIGHMEM
-		high_mem = num_physpages - low_top_pfn;
+		high_mem = physpages - low_top_pfn;
 #else
-		max_mapnr = num_physpages = low_top_pfn;
+		max_mapnr = physpages = low_top_pfn;
 #endif
 	}
 	else {
-		low_top_pfn = num_physpages;
+		low_top_pfn = physpages;
 	}
 
 	min_low_pfn = memory_start >> PAGE_SHIFT;
@@ -979,7 +980,7 @@ static void __init setup_uclinux_memory(void)
 	free_bootmem(memory_start, memory_end - memory_start);
 
 	high_memory = (void *) (memory_end & PAGE_MASK);
-	max_mapnr = num_physpages = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT;
+	max_mapnr = ((unsigned long) high_memory - PAGE_OFFSET) >> PAGE_SHIFT;
 
 	min_low_pfn = memory_start >> PAGE_SHIFT;
 	max_low_pfn = memory_end >> PAGE_SHIFT;
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 4bff48c19d2..a6d105d61b2 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -523,7 +523,7 @@ void die_if_kernel(const char *str, ...)
 		return;
 
 	va_start(va, str);
-	vsprintf(buffer, str, va);
+	vsnprintf(buffer, sizeof(buffer), str, va);
 	va_end(va);
 
 	console_verbose();
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index dee354fa6b6..88a15974352 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -78,7 +78,7 @@ void __init paging_init(void)
 	memset((void *) empty_zero_page, 0, PAGE_SIZE);
 
 #ifdef CONFIG_HIGHMEM
-	if (num_physpages - num_mappedpages) {
+	if (get_num_physpages() - num_mappedpages) {
 		pgd_t *pge;
 		pud_t *pue;
 		pmd_t *pme;
@@ -96,7 +96,7 @@ void __init paging_init(void)
 	 */
 	zones_size[ZONE_NORMAL]  = max_low_pfn - min_low_pfn;
 #ifdef CONFIG_HIGHMEM
-	zones_size[ZONE_HIGHMEM] = num_physpages - num_mappedpages;
+	zones_size[ZONE_HIGHMEM] = get_num_physpages() - num_mappedpages;
 #endif
 
 	free_area_init(zones_size);
@@ -114,45 +114,24 @@ void __init paging_init(void)
  */
 void __init mem_init(void)
 {
-	unsigned long npages = (memory_end - memory_start) >> PAGE_SHIFT;
-	unsigned long tmp;
-#ifdef CONFIG_MMU
-	unsigned long loop, pfn;
-	int datapages = 0;
-#endif
-	int codek = 0, datak = 0;
+	unsigned long code_size = _etext - _stext;
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages = free_all_bootmem();
-
-#ifdef CONFIG_MMU
-	for (loop = 0 ; loop < npages ; loop++)
-		if (PageReserved(&mem_map[loop]))
-			datapages++;
-
-#ifdef CONFIG_HIGHMEM
-	for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--)
-		free_highmem_page(&mem_map[pfn]);
-#endif
-
-	codek = ((unsigned long) &_etext - (unsigned long) &_stext) >> 10;
-	datak = datapages << (PAGE_SHIFT - 10);
-
-#else
-	codek = (_etext - _stext) >> 10;
-	datak = 0; //(__bss_stop - _sdata) >> 10;
+	free_all_bootmem();
+#if defined(CONFIG_MMU) && defined(CONFIG_HIGHMEM)
+	{
+		unsigned long pfn;
+
+		for (pfn = get_num_physpages() - 1;
+		     pfn >= num_mappedpages; pfn--)
+			free_highmem_page(&mem_map[pfn]);
+	}
 #endif
 
-	tmp = nr_free_pages() << PAGE_SHIFT;
-	printk("Memory available: %luKiB/%luKiB RAM, %luKiB/%luKiB ROM (%dKiB kernel code, %dKiB data)\n",
-	       tmp >> 10,
-	       npages << (PAGE_SHIFT - 10),
-	       (rom_length > 0) ? ((rom_length >> 10) - codek) : 0,
-	       rom_length >> 10,
-	       codek,
-	       datak
-	       );
-
+	mem_init_print_info(NULL);
+	if (rom_length > 0 && rom_length >= code_size)
+		printk("Memory available:  %luKiB/%luKiB ROM\n",
+			(rom_length - code_size) >> 10, rom_length >> 10);
 } /* end mem_init() */
 
 /*****************************************************************************/
@@ -162,7 +141,7 @@ void __init mem_init(void)
 void free_initmem(void)
 {
 #if defined(CONFIG_RAMKERNEL) && !defined(CONFIG_PROTECT_KERNEL)
-	free_initmem_default(0);
+	free_initmem_default(-1);
 #endif
 } /* end free_initmem() */
 
@@ -173,6 +152,6 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 } /* end free_initrd_mem() */
 #endif
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 303e4f9a79d..3d6759ee382 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -94,126 +94,10 @@ endmenu
 
 source "net/Kconfig"
 
-source "drivers/base/Kconfig"
-
-source "drivers/mtd/Kconfig"
-
-source "drivers/block/Kconfig"
-
-source "drivers/ide/Kconfig"
+source "drivers/Kconfig"
 
 source "arch/h8300/Kconfig.ide"
 
-source "drivers/net/Kconfig"
-
-#
-# input - input/joystick depends on it. As does USB.
-#
-source "drivers/input/Kconfig"
-
-menu "Character devices"
-
-config VT
-	bool "Virtual terminal"
-	---help---
-	  If you say Y here, you will get support for terminal devices with
-	  display and keyboard devices. These are called "virtual" because you
-	  can run several virtual terminals (also called virtual consoles) on
-	  one physical terminal. This is rather useful, for example one
-	  virtual terminal can collect system messages and warnings, another
-	  one can be used for a text-mode user session, and a third could run
-	  an X session, all in parallel. Switching between virtual terminals
-	  is done with certain key combinations, usually Alt-<function key>.
-
-	  The setterm command ("man setterm") can be used to change the
-	  properties (such as colors or beeping) of a virtual terminal. The
-	  man page console_codes(4) ("man console_codes") contains the special
-	  character sequences that can be used to change those properties
-	  directly. The fonts used on virtual terminals can be changed with
-	  the setfont ("man setfont") command and the key bindings are defined
-	  with the loadkeys ("man loadkeys") command.
-
-	  You need at least one virtual terminal device in order to make use
-	  of your keyboard and monitor. Therefore, only people configuring an
-	  embedded system would want to say N here in order to save some
-	  memory; the only way to log into such a system is then via a serial
-	  or network connection.
-
-	  If unsure, say Y, or else you won't be able to do much with your new
-	  shiny Linux system :-)
-
-config VT_CONSOLE
-	bool "Support for console on virtual terminal"
-	depends on VT
-	---help---
-	  The system console is the device which receives all kernel messages
-	  and warnings and which allows logins in single user mode. If you
-	  answer Y here, a virtual terminal (the device used to interact with
-	  a physical terminal) can be used as system console. This is the most
-	  common mode of operations, so you should say Y here unless you want
-	  the kernel messages be output only to a serial port (in which case
-	  you should say Y to "Console on serial port", below).
-
-	  If you do say Y here, by default the currently visible virtual
-	  terminal (/dev/tty0) will be used as system console. You can change
-	  that with a kernel command line option such as "console=tty3" which
-	  would use the third virtual terminal as system console. (Try "man
-	  bootparam" or see the documentation of your boot loader (lilo or
-	  loadlin) about how to pass options to the kernel at boot time.)
-
-	  If unsure, say Y.
-
-config HW_CONSOLE
-	bool
-	depends on VT
-	default y
-
-comment "Unix98 PTY support"
-
-config UNIX98_PTYS
-	bool "Unix98 PTY support"
-	---help---
-	  A pseudo terminal (PTY) is a software device consisting of two
-	  halves: a master and a slave. The slave device behaves identical to
-	  a physical terminal; the master device is used by a process to
-	  read data from and write data to the slave, thereby emulating a
-	  terminal. Typical programs for the master side are telnet servers
-	  and xterms.
-
-	  Linux has traditionally used the BSD-like names /dev/ptyxx for
-	  masters and /dev/ttyxx for slaves of pseudo terminals. This scheme
-	  has a number of problems. The GNU C library glibc 2.1 and later,
-	  however, supports the Unix98 naming standard: in order to acquire a
-	  pseudo terminal, a process opens /dev/ptmx; the number of the pseudo
-	  terminal is then made available to the process and the pseudo
-	  terminal slave can be accessed as /dev/pts/<number>. What was
-	  traditionally /dev/ttyp2 will then be /dev/pts/2, for example.
-
-	  The entries in /dev/pts/ are created on the fly by a virtual
-	  file system; therefore, if you say Y here you should say Y to
-	  "/dev/pts file system for Unix98 PTYs" as well.
-
-	  If you want to say Y here, you need to have the C library glibc 2.1
-	  or later (equal to libc-6.1, check with "ls -l /lib/libc.so.*").
-	  Read the instructions in <file:Documentation/Changes> pertaining to
-	  pseudo terminals. It's safe to say N.
-
-source "drivers/char/pcmcia/Kconfig"
-
-source "drivers/tty/serial/Kconfig"
-
-source "drivers/i2c/Kconfig"
-
-source "drivers/hwmon/Kconfig"
-
-source "drivers/usb/Kconfig"
-
-source "drivers/uwb/Kconfig"
-
-endmenu
-
-source "drivers/staging/Kconfig"
-
 source "fs/Kconfig"
 
 source "arch/h8300/Kconfig.debug"
diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu
index 321f3922728..cdee771460e 100644
--- a/arch/h8300/Kconfig.cpu
+++ b/arch/h8300/Kconfig.cpu
@@ -64,6 +64,7 @@ choice
 
 config H83002
 	bool "H8/3001,3002,3003"
+	depends on BROKEN
 	select CPU_H8300H
 
 config H83007
@@ -72,6 +73,7 @@ config H83007
 
 config H83048
 	bool "H8/3044,3045,3046,3047,3048,3052"
+	depends on BROKEN
 	select CPU_H8300H
 
 config H83068
@@ -155,10 +157,12 @@ config H8300_TIMER16_CH
 config H8300_ITU_CH
 	int "ITU channel"
 	depends on H8300_ITU
+	range 0 4
 
 config H8300_TPU_CH
 	int "TPU channel"
 	depends on H8300_TPU
+	range 0 4
 
 source "kernel/Kconfig.preempt"
 
diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile
index 6745cb1ffb4..a6c98fe3bbc 100644
--- a/arch/h8300/boot/compressed/Makefile
+++ b/arch/h8300/boot/compressed/Makefile
@@ -16,7 +16,7 @@ OBJECTS = $(obj)/head.o $(obj)/misc.o
 #
 CONFIG_MEMORY_START     ?= 0x00400000
 CONFIG_BOOT_LINK_OFFSET ?= 0x00140000
-IMAGE_OFFSET := $(shell printf "0x%08x" $$[$(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET)])
+IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET))))
 
 LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds
 
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 51ab6cbd030..4a1e3dd4394 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -79,7 +79,6 @@ static void error(char *m);
 
 int puts(const char *);
 
-extern int _text;		/* Defined in vmlinux.lds.S */
 extern int _end;
 static unsigned long free_mem_ptr;
 static unsigned long free_mem_end_ptr;
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 995eb47e01b..8ada3cf0c98 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1,6 +1,8 @@
 
 generic-y += clkdev.h
 generic-y += exec.h
+generic-y += linkage.h
 generic-y += mmu.h
 generic-y += module.h
 generic-y += trace_clock.h
+generic-y += xor.h
diff --git a/arch/h8300/include/asm/barrier.h b/arch/h8300/include/asm/barrier.h
index c7283c343c5..9e0aa9fc195 100644
--- a/arch/h8300/include/asm/barrier.h
+++ b/arch/h8300/include/asm/barrier.h
@@ -12,6 +12,8 @@
 #define wmb()  asm volatile (""   : : :"memory")
 #define set_mb(var, value) do { xchg(&var, value); } while (0)
 
+#define read_barrier_depends()	do { } while (0)
+
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	rmb()
diff --git a/arch/h8300/include/asm/linkage.h b/arch/h8300/include/asm/linkage.h
deleted file mode 100644
index 1d81604fb0a..00000000000
--- a/arch/h8300/include/asm/linkage.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_LINKAGE_H
-#define _H8300_LINKAGE_H
-
-#undef SYMBOL_NAME_LABEL
-#define SYMBOL_NAME_LABEL(_name_) _##_name_##:
-#endif
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
index 3dea80ad9e6..7f0743051ad 100644
--- a/arch/h8300/include/asm/tlb.h
+++ b/arch/h8300/include/asm/tlb.h
@@ -1,16 +1,3 @@
-/* 
-  include/asm-h8300/tlb.h 
-*/
-
-#ifndef __H8300_TLB_H__
-#define __H8300_TLB_H__
-
-#define tlb_flush(tlb)	do { } while(0)
-
-/* 
-  include/asm-h8300/tlb.h 
-*/
-
 #ifndef __H8300_TLB_H__
 #define __H8300_TLB_H__
 
@@ -19,5 +6,3 @@
 #include <asm-generic/tlb.h>
 
 #endif
-
-#endif
diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S
index 617a6878787..94bd30f11df 100644
--- a/arch/h8300/kernel/entry.S
+++ b/arch/h8300/kernel/entry.S
@@ -87,13 +87,13 @@ INTERRUPTS = 128
 	bne	5f
 
 	/* user mode */
-	mov.l	sp,@SYMBOL_NAME(sw_usp)
+	mov.l	sp,@_sw_usp
 	mov.l	@sp,er0				/* restore saved er0 */
 	orc	#0x10,ccr			/* switch kernel stack */
-	mov.l	@SYMBOL_NAME(sw_ksp),sp
+	mov.l	@_sw_ksp,sp
 	sub.l	#(LRET-LORIG),sp		/* allocate LORIG - LRET */
 	SAVEREGS
-	mov.l   @SYMBOL_NAME(sw_usp),er0
+	mov.l   @_sw_usp,er0
 	mov.l   @(USERRET:16,er0),er1           /* copy the RET addr */
 	mov.l   er1,@(LRET-LER3:16,sp)
 	SAVEEXR
@@ -128,7 +128,7 @@ INTERRUPTS = 128
 	bne	7f
 
 	orc	#0x80,ccr
-	mov.l	@SYMBOL_NAME(sw_usp),er0
+	mov.l	@_sw_usp,er0
 	mov.l	@(LER0-LER1:16,sp),er1		/* restore ER0 */
 	mov.l	er1,@er0
 	RESTOREEXR
@@ -141,7 +141,7 @@ INTERRUPTS = 128
 
 	mov.l	@sp+,er1
 	add.l	#(LRET-LER1),sp			/* remove LORIG - LRET */
-	mov.l	sp,@SYMBOL_NAME(sw_ksp)
+	mov.l	sp,@_sw_ksp
 	andc	#0xef,ccr			/* switch to user mode */
 	mov.l	er0,sp
 	bra	8f
@@ -155,20 +155,20 @@ INTERRUPTS = 128
 	rte
 	.endm
 
-.globl SYMBOL_NAME(system_call)
-.globl SYMBOL_NAME(ret_from_exception)
-.globl SYMBOL_NAME(ret_from_fork)
-.globl SYMBOL_NAME(ret_from_kernel_thread)
-.globl SYMBOL_NAME(ret_from_interrupt)
-.globl SYMBOL_NAME(interrupt_redirect_table)
-.globl SYMBOL_NAME(sw_ksp),SYMBOL_NAME(sw_usp)
-.globl SYMBOL_NAME(resume)
-.globl SYMBOL_NAME(interrupt_entry)
-.globl SYMBOL_NAME(trace_break)
+.globl _system_call
+.globl _ret_from_exception
+.globl _ret_from_fork
+.globl _ret_from_kernel_thread
+.globl _ret_from_interrupt
+.globl _interrupt_redirect_table
+.globl _sw_ksp,_sw_usp
+.globl _resume
+.globl _interrupt_entry
+.globl _trace_break
 
 #if defined(CONFIG_ROMKERNEL)
 	.section .int_redirect,"ax"
-SYMBOL_NAME_LABEL(interrupt_redirect_table)
+_interrupt_redirect_table:
 #if defined(CONFIG_CPU_H8300H)
 	.rept	7
 	.long	0
@@ -178,54 +178,54 @@ SYMBOL_NAME_LABEL(interrupt_redirect_table)
 	.rept	5
 	.long	0
 	.endr
-	jmp	@SYMBOL_NAME(trace_break)
+	jmp	@_trace_break
 	.long	0
 #endif
 
-	jsr	@SYMBOL_NAME(interrupt_entry)	/* NMI */
-	jmp	@SYMBOL_NAME(system_call)	/* TRAPA #0 (System call) */
+	jsr	@_interrupt_entry		/* NMI */
+	jmp	@_system_call			/* TRAPA #0 (System call) */
 	.long	0
 	.long	0
-	jmp	@SYMBOL_NAME(trace_break)	/* TRAPA #3 (breakpoint) */
+	jmp	@_trace_break			/* TRAPA #3 (breakpoint) */
 	.rept	INTERRUPTS-12
-	jsr	@SYMBOL_NAME(interrupt_entry)
+	jsr	@_interrupt_entry
 	.endr
 #endif
 #if defined(CONFIG_RAMKERNEL)
-.globl SYMBOL_NAME(interrupt_redirect_table)
+.globl _interrupt_redirect_table
 	.section .bss
-SYMBOL_NAME_LABEL(interrupt_redirect_table)
+_interrupt_redirect_table:
 	.space	4
 #endif
 
 	.section .text
 	.align	2
-SYMBOL_NAME_LABEL(interrupt_entry)
+_interrupt_entry:
 	SAVE_ALL
 	mov.l	sp,er0
 	add.l	#LVEC,er0
 	btst	#4,r1l
 	bne	1f
 	/* user LVEC */
-	mov.l	@SYMBOL_NAME(sw_usp),er0
+	mov.l	@_sw_usp,er0
 	adds	#4,er0
 1:
 	mov.l	@er0,er0			/* LVEC address */
 #if defined(CONFIG_ROMKERNEL)
-	sub.l	#SYMBOL_NAME(interrupt_redirect_table),er0
+	sub.l	#_interrupt_redirect_table,er0
 #endif
 #if defined(CONFIG_RAMKERNEL)
-	mov.l	@SYMBOL_NAME(interrupt_redirect_table),er1
+	mov.l	@_interrupt_redirect_table,er1
 	sub.l	er1,er0
 #endif
 	SHLR2	er0
 	dec.l	#1,er0
 	mov.l	sp,er1
 	subs	#4,er1				/* adjust ret_pc */
-	jsr	@SYMBOL_NAME(do_IRQ)
-	jmp	@SYMBOL_NAME(ret_from_interrupt)
+	jsr	@_do_IRQ
+	jmp	@_ret_from_interrupt
 
-SYMBOL_NAME_LABEL(system_call)
+_system_call:
 	subs	#4,sp				/* dummy LVEC */
 	SAVE_ALL
 	andc	#0x7f,ccr
@@ -233,21 +233,21 @@ SYMBOL_NAME_LABEL(system_call)
 
 	/* save top of frame */
 	mov.l	sp,er0
-	jsr	@SYMBOL_NAME(set_esp0)
+	jsr	@_set_esp0
 	mov.l	sp,er2
 	and.w	#0xe000,r2
 	mov.b	@((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l
 	btst	#(TIF_SYSCALL_TRACE & 7),r2l
 	beq	1f
-	jsr	@SYMBOL_NAME(do_syscall_trace)
+	jsr	@_do_syscall_trace
 1:
 	cmp.l	#NR_syscalls,er4
 	bcc	badsys
 	SHLL2	er4
-	mov.l	#SYMBOL_NAME(sys_call_table),er0
+	mov.l	#_sys_call_table,er0
 	add.l	er4,er0
 	mov.l	@er0,er4
-	beq	SYMBOL_NAME(ret_from_exception):16
+	beq	_ret_from_exception:16
 	mov.l	@(LER1:16,sp),er0
 	mov.l	@(LER2:16,sp),er1
 	mov.l	@(LER3:16,sp),er2
@@ -258,10 +258,10 @@ SYMBOL_NAME_LABEL(system_call)
 	mov.b	@((TI_FLAGS+3-(TIF_SYSCALL_TRACE >> 3)):16,er2),r2l
 	btst	#(TIF_SYSCALL_TRACE & 7),r2l
 	beq	2f
-	jsr	@SYMBOL_NAME(do_syscall_trace)
+	jsr	@_do_syscall_trace
 2:
 #if defined(CONFIG_SYSCALL_PRINT)
-	jsr	@SYMBOL_NAME(syscall_print)
+	jsr	@_syscall_print
 #endif
 	orc	#0x80,ccr
 	bra	resume_userspace
@@ -275,11 +275,11 @@ badsys:
 #define resume_kernel restore_all
 #endif
 
-SYMBOL_NAME_LABEL(ret_from_exception)
+_ret_from_exception:
 #if defined(CONFIG_PREEMPT)
 	orc	#0x80,ccr
 #endif
-SYMBOL_NAME_LABEL(ret_from_interrupt)
+_ret_from_interrupt:
 	mov.b	@(LCCR+1:16,sp),r0l
 	btst	#4,r0l
 	bne	resume_kernel:8		/* return from kernel */
@@ -296,12 +296,12 @@ work_pending:
 	/* work notifysig */
 	mov.l	sp,er0
 	subs	#4,er0			/* er0: pt_regs */
-	jsr	@SYMBOL_NAME(do_notify_resume)
+	jsr	@_do_notify_resume
 	bra	restore_all:8
 work_resched:
 	mov.l	sp,er0
-	jsr	@SYMBOL_NAME(set_esp0)
-	jsr	@SYMBOL_NAME(schedule)
+	jsr	@_set_esp0
+	jsr	@_schedule
 	bra	resume_userspace:8
 restore_all:
 	RESTORE_ALL			/* Does RTE */
@@ -320,26 +320,26 @@ need_resched:
 	mov.l	er0,@(TI_PRE_COUNT:16,er4)
 	andc	#0x7f,ccr
 	mov.l	sp,er0
-	jsr	@SYMBOL_NAME(set_esp0)
-	jsr	@SYMBOL_NAME(schedule)
+	jsr	@_set_esp0
+	jsr	@_schedule
 	orc	#0x80,ccr
 	bra	need_resched:8
 #endif
 
-SYMBOL_NAME_LABEL(ret_from_fork)
+_ret_from_fork:
 	mov.l	er2,er0
-	jsr	@SYMBOL_NAME(schedule_tail)
-	jmp	@SYMBOL_NAME(ret_from_exception)
+	jsr	@_schedule_tail
+	jmp	@_ret_from_exception
 
-SYMBOL_NAME_LABEL(ret_from_kernel_thread)
+_ret_from_kernel_thread:
 	mov.l	er2,er0
-	jsr	@SYMBOL_NAME(schedule_tail)
+	jsr	@_schedule_tail
 	mov.l	@(LER4:16,sp),er0
 	mov.l	@(LER5:16,sp),er1
 	jsr	@er1
-	jmp	@SYMBOL_NAME(ret_from_exception)
+	jmp	@_ret_from_exception
 
-SYMBOL_NAME_LABEL(resume)
+_resume:
 	/*
 	 * Beware - when entering resume, offset of tss is in d1,
 	 * prev (the current task) is in a0, next (the new task)
@@ -355,7 +355,7 @@ SYMBOL_NAME_LABEL(resume)
 
 	/* disable interrupts */
 	orc	#0x80,ccr
-	mov.l	@SYMBOL_NAME(sw_usp),er3
+	mov.l	@_sw_usp,er3
 	mov.l	er3,@(THREAD_USP:16,er0)
 	mov.l	sp,@(THREAD_KSP:16,er0)
 
@@ -363,7 +363,7 @@ SYMBOL_NAME_LABEL(resume)
 	/* FIXME: what did we hack out of here, this does nothing! */
 
 	mov.l	@(THREAD_USP:16,er1),er0
-	mov.l	er0,@SYMBOL_NAME(sw_usp)
+	mov.l	er0,@_sw_usp
 	mov.l	@(THREAD_KSP:16,er1),sp
 
 	/* restore status register */
@@ -372,15 +372,15 @@ SYMBOL_NAME_LABEL(resume)
 	ldc	r3l,ccr
 	rts
 
-SYMBOL_NAME_LABEL(trace_break)
+_trace_break:
 	subs	#4,sp
 	SAVE_ALL
 	sub.l	er1,er1
 	dec.l	#1,er1
 	mov.l	er1,@(LORIG,sp)
 	mov.l	sp,er0
-	jsr	@SYMBOL_NAME(set_esp0)
-	mov.l	@SYMBOL_NAME(sw_usp),er0
+	jsr	@_set_esp0
+	mov.l	@_sw_usp,er0
 	mov.l	@er0,er1
 	mov.w	@(-2:16,er1),r2
 	cmp.w	#0x5730,r2
@@ -390,13 +390,13 @@ SYMBOL_NAME_LABEL(trace_break)
 1:
 	and.w	#0xff,e1
 	mov.l	er1,er0
-	jsr	@SYMBOL_NAME(trace_trap)
-	jmp	@SYMBOL_NAME(ret_from_exception)
+	jsr	@_trace_trap
+	jmp	@_ret_from_exception
 
 	.section	.bss
-SYMBOL_NAME_LABEL(sw_ksp)
+_sw_ksp:
 	.space	4
-SYMBOL_NAME_LABEL(sw_usp)
+_sw_usp:
 	.space	4
 
 	.end
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index 5c2168fb9b9..c55e0ed270d 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -2,8 +2,10 @@
 #include <linux/sys.h>
 #include <asm/linkage.h>
 #include <asm/unistd.h>
-	
-.globl SYMBOL_NAME(sys_call_table)
+
+#define CALL(x)	.long _ ## x
+
+.globl _sys_call_table
 
 #if defined(CONFIG_CPU_H8300H)
 	.h8300h
@@ -13,324 +15,324 @@
 #endif
 	.section .text
 	.align	2
-SYMBOL_NAME_LABEL(sys_call_table)	
-	.long SYMBOL_NAME(sys_ni_syscall)	/* 0  -  old "setup()" system call*/
-	.long SYMBOL_NAME(sys_exit)
-	.long SYMBOL_NAME(sys_fork)
-	.long SYMBOL_NAME(sys_read)
-	.long SYMBOL_NAME(sys_write)
-	.long SYMBOL_NAME(sys_open)		/* 5 */
-	.long SYMBOL_NAME(sys_close)
-	.long SYMBOL_NAME(sys_waitpid)
-	.long SYMBOL_NAME(sys_creat)
-	.long SYMBOL_NAME(sys_link)
-	.long SYMBOL_NAME(sys_unlink)		/* 10 */
-	.long SYMBOL_NAME(sys_execve)
-	.long SYMBOL_NAME(sys_chdir)
-	.long SYMBOL_NAME(sys_time)
-	.long SYMBOL_NAME(sys_mknod)
-	.long SYMBOL_NAME(sys_chmod)		/* 15 */
-	.long SYMBOL_NAME(sys_chown16)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old break syscall holder */
-	.long SYMBOL_NAME(sys_stat)
-	.long SYMBOL_NAME(sys_lseek)
-	.long SYMBOL_NAME(sys_getpid)		/* 20 */
-	.long SYMBOL_NAME(sys_mount)
-	.long SYMBOL_NAME(sys_oldumount)
-	.long SYMBOL_NAME(sys_setuid16)
-	.long SYMBOL_NAME(sys_getuid16)
-	.long SYMBOL_NAME(sys_stime)		/* 25 */
-	.long SYMBOL_NAME(sys_ptrace)
-	.long SYMBOL_NAME(sys_alarm)
-	.long SYMBOL_NAME(sys_fstat)
-	.long SYMBOL_NAME(sys_pause)
-	.long SYMBOL_NAME(sys_utime)		/* 30 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old stty syscall holder */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old gtty syscall holder */
-	.long SYMBOL_NAME(sys_access)
-	.long SYMBOL_NAME(sys_nice)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* 35 old ftime syscall holder */
-	.long SYMBOL_NAME(sys_sync)
-	.long SYMBOL_NAME(sys_kill)
-	.long SYMBOL_NAME(sys_rename)
-	.long SYMBOL_NAME(sys_mkdir)
-	.long SYMBOL_NAME(sys_rmdir)		/* 40 */
-	.long SYMBOL_NAME(sys_dup)
-	.long SYMBOL_NAME(sys_pipe)
-	.long SYMBOL_NAME(sys_times)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old prof syscall holder */
-	.long SYMBOL_NAME(sys_brk)		/* 45 */
-	.long SYMBOL_NAME(sys_setgid16)
-	.long SYMBOL_NAME(sys_getgid16)
-	.long SYMBOL_NAME(sys_signal)
-	.long SYMBOL_NAME(sys_geteuid16)
-	.long SYMBOL_NAME(sys_getegid16)	/* 50 */
-	.long SYMBOL_NAME(sys_acct)
-	.long SYMBOL_NAME(sys_umount)		/* recycled never used phys() */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old lock syscall holder */
-	.long SYMBOL_NAME(sys_ioctl)
-	.long SYMBOL_NAME(sys_fcntl)		/* 55 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old mpx syscall holder */
-	.long SYMBOL_NAME(sys_setpgid)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old ulimit syscall holder */
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_umask)		/* 60 */
-	.long SYMBOL_NAME(sys_chroot)
-	.long SYMBOL_NAME(sys_ustat)
-	.long SYMBOL_NAME(sys_dup2)
-	.long SYMBOL_NAME(sys_getppid)
-	.long SYMBOL_NAME(sys_getpgrp)		/* 65 */
-	.long SYMBOL_NAME(sys_setsid)
-	.long SYMBOL_NAME(sys_sigaction)
-	.long SYMBOL_NAME(sys_sgetmask)
-	.long SYMBOL_NAME(sys_ssetmask)
-	.long SYMBOL_NAME(sys_setreuid16)	/* 70 */
-	.long SYMBOL_NAME(sys_setregid16)
-	.long SYMBOL_NAME(sys_sigsuspend)
-	.long SYMBOL_NAME(sys_sigpending)
-	.long SYMBOL_NAME(sys_sethostname)
-	.long SYMBOL_NAME(sys_setrlimit)	/* 75 */
-	.long SYMBOL_NAME(sys_old_getrlimit)
-	.long SYMBOL_NAME(sys_getrusage)
-	.long SYMBOL_NAME(sys_gettimeofday)
-	.long SYMBOL_NAME(sys_settimeofday)
-	.long SYMBOL_NAME(sys_getgroups16)	/* 80 */
-	.long SYMBOL_NAME(sys_setgroups16)
-	.long SYMBOL_NAME(sys_old_select)
-	.long SYMBOL_NAME(sys_symlink)
-	.long SYMBOL_NAME(sys_lstat)
-	.long SYMBOL_NAME(sys_readlink)		/* 85 */
-	.long SYMBOL_NAME(sys_uselib)
-	.long SYMBOL_NAME(sys_swapon)
-	.long SYMBOL_NAME(sys_reboot)
-	.long SYMBOL_NAME(sys_old_readdir)
-	.long SYMBOL_NAME(sys_old_mmap)		/* 90 */
-	.long SYMBOL_NAME(sys_munmap)
-	.long SYMBOL_NAME(sys_truncate)
-	.long SYMBOL_NAME(sys_ftruncate)
-	.long SYMBOL_NAME(sys_fchmod)
-	.long SYMBOL_NAME(sys_fchown16)		/* 95 */
-	.long SYMBOL_NAME(sys_getpriority)
-	.long SYMBOL_NAME(sys_setpriority)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old profil syscall holder */
-	.long SYMBOL_NAME(sys_statfs)
-	.long SYMBOL_NAME(sys_fstatfs)		/* 100 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* ioperm for i386 */
-	.long SYMBOL_NAME(sys_socketcall)
-	.long SYMBOL_NAME(sys_syslog)
-	.long SYMBOL_NAME(sys_setitimer)
-	.long SYMBOL_NAME(sys_getitimer)	/* 105 */
-	.long SYMBOL_NAME(sys_newstat)
-	.long SYMBOL_NAME(sys_newlstat)
-	.long SYMBOL_NAME(sys_newfstat)
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* iopl for i386 */ /* 110 */
-	.long SYMBOL_NAME(sys_vhangup)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* obsolete idle() syscall */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* vm86old for i386 */
-	.long SYMBOL_NAME(sys_wait4)
-	.long SYMBOL_NAME(sys_swapoff)		/* 115 */
-	.long SYMBOL_NAME(sys_sysinfo)
-	.long SYMBOL_NAME(sys_ipc)
-	.long SYMBOL_NAME(sys_fsync)
-	.long SYMBOL_NAME(sys_sigreturn)
-	.long SYMBOL_NAME(sys_clone)		/* 120 */
-	.long SYMBOL_NAME(sys_setdomainname)
-	.long SYMBOL_NAME(sys_newuname)
-	.long SYMBOL_NAME(sys_cacheflush)	/* modify_ldt for i386 */
-	.long SYMBOL_NAME(sys_adjtimex)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* 125 sys_mprotect */
-	.long SYMBOL_NAME(sys_sigprocmask)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_create_module */
-	.long SYMBOL_NAME(sys_init_module)
-	.long SYMBOL_NAME(sys_delete_module)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* 130 sys_get_kernel_syms */
-	.long SYMBOL_NAME(sys_quotactl)
-	.long SYMBOL_NAME(sys_getpgid)
-	.long SYMBOL_NAME(sys_fchdir)
-	.long SYMBOL_NAME(sys_bdflush)
-	.long SYMBOL_NAME(sys_sysfs)		/* 135 */
-	.long SYMBOL_NAME(sys_personality)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* for afs_syscall */
-	.long SYMBOL_NAME(sys_setfsuid16)
-	.long SYMBOL_NAME(sys_setfsgid16)
-	.long SYMBOL_NAME(sys_llseek)		/* 140 */
-	.long SYMBOL_NAME(sys_getdents)
-	.long SYMBOL_NAME(sys_select)
-	.long SYMBOL_NAME(sys_flock)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_msync */
-	.long SYMBOL_NAME(sys_readv)		/* 145 */
-	.long SYMBOL_NAME(sys_writev)
-	.long SYMBOL_NAME(sys_getsid)
-	.long SYMBOL_NAME(sys_fdatasync)
-	.long SYMBOL_NAME(sys_sysctl)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* 150 sys_mlock */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_munlock */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_mlockall */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_munlockall */
-	.long SYMBOL_NAME(sys_sched_setparam)
-	.long SYMBOL_NAME(sys_sched_getparam)   /* 155 */
-	.long SYMBOL_NAME(sys_sched_setscheduler)
-	.long SYMBOL_NAME(sys_sched_getscheduler)
-	.long SYMBOL_NAME(sys_sched_yield)
-	.long SYMBOL_NAME(sys_sched_get_priority_max)
-	.long SYMBOL_NAME(sys_sched_get_priority_min)  /* 160 */
-	.long SYMBOL_NAME(sys_sched_rr_get_interval)
-	.long SYMBOL_NAME(sys_nanosleep)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_mremap */
-	.long SYMBOL_NAME(sys_setresuid16)
-	.long SYMBOL_NAME(sys_getresuid16)	/* 165 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* for vm86 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_query_module */
-	.long SYMBOL_NAME(sys_poll)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* old nfsservctl */
-	.long SYMBOL_NAME(sys_setresgid16)	/* 170 */
-	.long SYMBOL_NAME(sys_getresgid16)
-	.long SYMBOL_NAME(sys_prctl)
-	.long SYMBOL_NAME(sys_rt_sigreturn)
-	.long SYMBOL_NAME(sys_rt_sigaction)
-	.long SYMBOL_NAME(sys_rt_sigprocmask)	/* 175 */
-	.long SYMBOL_NAME(sys_rt_sigpending)
-	.long SYMBOL_NAME(sys_rt_sigtimedwait)
-	.long SYMBOL_NAME(sys_rt_sigqueueinfo)
-	.long SYMBOL_NAME(sys_rt_sigsuspend)
-	.long SYMBOL_NAME(sys_pread64)		/* 180 */
-	.long SYMBOL_NAME(sys_pwrite64)
-	.long SYMBOL_NAME(sys_lchown16);
-	.long SYMBOL_NAME(sys_getcwd)
-	.long SYMBOL_NAME(sys_capget)
-	.long SYMBOL_NAME(sys_capset)           /* 185 */
-	.long SYMBOL_NAME(sys_sigaltstack)
-	.long SYMBOL_NAME(sys_sendfile)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* streams1 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* streams2 */
-	.long SYMBOL_NAME(sys_vfork)            /* 190 */
-	.long SYMBOL_NAME(sys_getrlimit)
-	.long SYMBOL_NAME(sys_mmap_pgoff)
-	.long SYMBOL_NAME(sys_truncate64)
-	.long SYMBOL_NAME(sys_ftruncate64)
-	.long SYMBOL_NAME(sys_stat64)		/* 195 */
-	.long SYMBOL_NAME(sys_lstat64)
-	.long SYMBOL_NAME(sys_fstat64)
-	.long SYMBOL_NAME(sys_chown)
-	.long SYMBOL_NAME(sys_getuid)
-	.long SYMBOL_NAME(sys_getgid)		/* 200 */
-	.long SYMBOL_NAME(sys_geteuid)
-	.long SYMBOL_NAME(sys_getegid)
-	.long SYMBOL_NAME(sys_setreuid)
-	.long SYMBOL_NAME(sys_setregid)
-	.long SYMBOL_NAME(sys_getgroups)	/* 205 */
-	.long SYMBOL_NAME(sys_setgroups)
-	.long SYMBOL_NAME(sys_fchown)
-	.long SYMBOL_NAME(sys_setresuid)
-	.long SYMBOL_NAME(sys_getresuid)
-	.long SYMBOL_NAME(sys_setresgid)	/* 210 */
-	.long SYMBOL_NAME(sys_getresgid)
-	.long SYMBOL_NAME(sys_lchown)
-	.long SYMBOL_NAME(sys_setuid)
-	.long SYMBOL_NAME(sys_setgid)
-	.long SYMBOL_NAME(sys_setfsuid)		/* 215 */
-	.long SYMBOL_NAME(sys_setfsgid)
-	.long SYMBOL_NAME(sys_pivot_root)
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_getdents64)	/* 220 */
-	.long SYMBOL_NAME(sys_fcntl64)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved TUX */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* reserved Security */
-	.long SYMBOL_NAME(sys_gettid)
-	.long SYMBOL_NAME(sys_readahead)	/* 225 */
-	.long SYMBOL_NAME(sys_setxattr)
-	.long SYMBOL_NAME(sys_lsetxattr)
-	.long SYMBOL_NAME(sys_fsetxattr)
-	.long SYMBOL_NAME(sys_getxattr)
-	.long SYMBOL_NAME(sys_lgetxattr)	/* 230 */
-	.long SYMBOL_NAME(sys_fgetxattr)
-	.long SYMBOL_NAME(sys_listxattr)
-	.long SYMBOL_NAME(sys_llistxattr)
-	.long SYMBOL_NAME(sys_flistxattr)
-	.long SYMBOL_NAME(sys_removexattr)	/* 235 */
-	.long SYMBOL_NAME(sys_lremovexattr)
-	.long SYMBOL_NAME(sys_fremovexattr)
-	.long SYMBOL_NAME(sys_tkill)
-	.long SYMBOL_NAME(sys_sendfile64)
-	.long SYMBOL_NAME(sys_futex)		/* 240 */
-	.long SYMBOL_NAME(sys_sched_setaffinity)
-	.long SYMBOL_NAME(sys_sched_getaffinity)
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_io_setup)		/* 245 */
-	.long SYMBOL_NAME(sys_io_destroy)
-	.long SYMBOL_NAME(sys_io_getevents)
-	.long SYMBOL_NAME(sys_io_submit)
-	.long SYMBOL_NAME(sys_io_cancel)
-	.long SYMBOL_NAME(sys_fadvise64)	/* 250 */
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_exit_group)
-	.long SYMBOL_NAME(sys_lookup_dcookie)
-	.long SYMBOL_NAME(sys_epoll_create)
-	.long SYMBOL_NAME(sys_epoll_ctl)	/* 255 */
-	.long SYMBOL_NAME(sys_epoll_wait)
- 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_remap_file_pages */
- 	.long SYMBOL_NAME(sys_set_tid_address)
- 	.long SYMBOL_NAME(sys_timer_create)
- 	.long SYMBOL_NAME(sys_timer_settime)	/* 260 */
- 	.long SYMBOL_NAME(sys_timer_gettime)
- 	.long SYMBOL_NAME(sys_timer_getoverrun)
- 	.long SYMBOL_NAME(sys_timer_delete)
- 	.long SYMBOL_NAME(sys_clock_settime)
- 	.long SYMBOL_NAME(sys_clock_gettime)	/* 265 */
- 	.long SYMBOL_NAME(sys_clock_getres)
- 	.long SYMBOL_NAME(sys_clock_nanosleep)
-	.long SYMBOL_NAME(sys_statfs64)
-	.long SYMBOL_NAME(sys_fstatfs64)	
-	.long SYMBOL_NAME(sys_tgkill)		/* 270 */
-	.long SYMBOL_NAME(sys_utimes)
- 	.long SYMBOL_NAME(sys_fadvise64_64)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_vserver */
-	.long SYMBOL_NAME(sys_ni_syscall)
-	.long SYMBOL_NAME(sys_get_mempolicy)	/* 275 */
-	.long SYMBOL_NAME(sys_set_mempolicy)
-	.long SYMBOL_NAME(sys_mq_open)
-	.long SYMBOL_NAME(sys_mq_unlink)
-	.long SYMBOL_NAME(sys_mq_timedsend)
-	.long SYMBOL_NAME(sys_mq_timedreceive)	/* 280 */
-	.long SYMBOL_NAME(sys_mq_notify)
-	.long SYMBOL_NAME(sys_mq_getsetattr)
-	.long SYMBOL_NAME(sys_waitid)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_kexec_load */
-	.long SYMBOL_NAME(sys_add_key) 		/* 285 */
-	.long SYMBOL_NAME(sys_request_key)
-	.long SYMBOL_NAME(sys_keyctl)
-	.long SYMBOL_NAME(sys_ioprio_set)
-	.long SYMBOL_NAME(sys_ioprio_get)	/* 290 */
-	.long SYMBOL_NAME(sys_inotify_init)
-	.long SYMBOL_NAME(sys_inotify_add_watch)
-	.long SYMBOL_NAME(sys_inotify_rm_watch)
-	.long SYMBOL_NAME(sys_migrate_pages)
-	.long SYMBOL_NAME(sys_openat)		/* 295 */
-	.long SYMBOL_NAME(sys_mkdirat)
-	.long SYMBOL_NAME(sys_mknodat)
-	.long SYMBOL_NAME(sys_fchownat)
-	.long SYMBOL_NAME(sys_futimesat)
-	.long SYMBOL_NAME(sys_fstatat64)	/* 300 */
-	.long SYMBOL_NAME(sys_unlinkat)
-	.long SYMBOL_NAME(sys_renameat)
-	.long SYMBOL_NAME(sys_linkat)
-	.long SYMBOL_NAME(sys_symlinkat)
-	.long SYMBOL_NAME(sys_readlinkat)	/* 305 */
-	.long SYMBOL_NAME(sys_fchmodat)
-	.long SYMBOL_NAME(sys_faccessat)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_pselect6 */
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_ppoll */
-	.long SYMBOL_NAME(sys_unshare)		/* 310 */
-	.long SYMBOL_NAME(sys_set_robust_list)
-	.long SYMBOL_NAME(sys_get_robust_list)
-	.long SYMBOL_NAME(sys_splice)
-	.long SYMBOL_NAME(sys_sync_file_range)
-	.long SYMBOL_NAME(sys_tee)		/* 315 */
-	.long SYMBOL_NAME(sys_vmsplice)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_move_pages */
-	.long SYMBOL_NAME(sys_getcpu)
-	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_pwait */
-	.long SYMBOL_NAME(sys_setns)		/* 320 */
+_sys_call_table:
+	CALL(sys_ni_syscall)		/* 0  -  old "setup()" system call*/
+	CALL(sys_exit)
+	CALL(sys_fork)
+	CALL(sys_read)
+	CALL(sys_write)
+	CALL(sys_open)			/* 5 */
+	CALL(sys_close)
+	CALL(sys_waitpid)
+	CALL(sys_creat)
+	CALL(sys_link)
+	CALL(sys_unlink)		/* 10 */
+	CALL(sys_execve)
+	CALL(sys_chdir)
+	CALL(sys_time)
+	CALL(sys_mknod)
+	CALL(sys_chmod)			/* 15 */
+	CALL(sys_chown16)
+	CALL(sys_ni_syscall)		/* old break syscall holder */
+	CALL(sys_stat)
+	CALL(sys_lseek)
+	CALL(sys_getpid)		/* 20 */
+	CALL(sys_mount)
+	CALL(sys_oldumount)
+	CALL(sys_setuid16)
+	CALL(sys_getuid16)
+	CALL(sys_stime)			/* 25 */
+	CALL(sys_ptrace)
+	CALL(sys_alarm)
+	CALL(sys_fstat)
+	CALL(sys_pause)
+	CALL(sys_utime)			/* 30 */
+	CALL(sys_ni_syscall)		/* old stty syscall holder */
+	CALL(sys_ni_syscall)		/* old gtty syscall holder */
+	CALL(sys_access)
+	CALL(sys_nice)
+	CALL(sys_ni_syscall)		/* 35 old ftime syscall holder */
+	CALL(sys_sync)
+	CALL(sys_kill)
+	CALL(sys_rename)
+	CALL(sys_mkdir)
+	CALL(sys_rmdir)			/* 40 */
+	CALL(sys_dup)
+	CALL(sys_pipe)
+	CALL(sys_times)
+	CALL(sys_ni_syscall)		/* old prof syscall holder */
+	CALL(sys_brk)			/* 45 */
+	CALL(sys_setgid16)
+	CALL(sys_getgid16)
+	CALL(sys_signal)
+	CALL(sys_geteuid16)
+	CALL(sys_getegid16)		/* 50 */
+	CALL(sys_acct)
+	CALL(sys_umount)		/* recycled never used phys() */
+	CALL(sys_ni_syscall)		/* old lock syscall holder */
+	CALL(sys_ioctl)
+	CALL(sys_fcntl)			/* 55 */
+	CALL(sys_ni_syscall)		/* old mpx syscall holder */
+	CALL(sys_setpgid)
+	CALL(sys_ni_syscall)		/* old ulimit syscall holder */
+	CALL(sys_ni_syscall)
+	CALL(sys_umask)			/* 60 */
+	CALL(sys_chroot)
+	CALL(sys_ustat)
+	CALL(sys_dup2)
+	CALL(sys_getppid)
+	CALL(sys_getpgrp)		/* 65 */
+	CALL(sys_setsid)
+	CALL(sys_sigaction)
+	CALL(sys_sgetmask)
+	CALL(sys_ssetmask)
+	CALL(sys_setreuid16)		/* 70 */
+	CALL(sys_setregid16)
+	CALL(sys_sigsuspend)
+	CALL(sys_sigpending)
+	CALL(sys_sethostname)
+	CALL(sys_setrlimit)		/* 75 */
+	CALL(sys_old_getrlimit)
+	CALL(sys_getrusage)
+	CALL(sys_gettimeofday)
+	CALL(sys_settimeofday)
+	CALL(sys_getgroups16)		/* 80 */
+	CALL(sys_setgroups16)
+	CALL(sys_old_select)
+	CALL(sys_symlink)
+	CALL(sys_lstat)
+	CALL(sys_readlink)		/* 85 */
+	CALL(sys_uselib)
+	CALL(sys_swapon)
+	CALL(sys_reboot)
+	CALL(sys_old_readdir)
+	CALL(sys_old_mmap)		/* 90 */
+	CALL(sys_munmap)
+	CALL(sys_truncate)
+	CALL(sys_ftruncate)
+	CALL(sys_fchmod)
+	CALL(sys_fchown16)		/* 95 */
+	CALL(sys_getpriority)
+	CALL(sys_setpriority)
+	CALL(sys_ni_syscall)		/* old profil syscall holder */
+	CALL(sys_statfs)
+	CALL(sys_fstatfs)		/* 100 */
+	CALL(sys_ni_syscall)		/* ioperm for i386 */
+	CALL(sys_socketcall)
+	CALL(sys_syslog)
+	CALL(sys_setitimer)
+	CALL(sys_getitimer)		/* 105 */
+	CALL(sys_newstat)
+	CALL(sys_newlstat)
+	CALL(sys_newfstat)
+	CALL(sys_ni_syscall)
+	CALL(sys_ni_syscall)		/* iopl for i386 */ /* 110 */
+	CALL(sys_vhangup)
+	CALL(sys_ni_syscall)		/* obsolete idle() syscall */
+	CALL(sys_ni_syscall)		/* vm86old for i386 */
+	CALL(sys_wait4)
+	CALL(sys_swapoff)		/* 115 */
+	CALL(sys_sysinfo)
+	CALL(sys_ipc)
+	CALL(sys_fsync)
+	CALL(sys_sigreturn)
+	CALL(sys_clone)			/* 120 */
+	CALL(sys_setdomainname)
+	CALL(sys_newuname)
+	CALL(sys_cacheflush)		/* modify_ldt for i386 */
+	CALL(sys_adjtimex)
+	CALL(sys_ni_syscall)		/* 125 sys_mprotect */
+	CALL(sys_sigprocmask)
+	CALL(sys_ni_syscall)		/* sys_create_module */
+	CALL(sys_init_module)
+	CALL(sys_delete_module)
+	CALL(sys_ni_syscall)		/* 130 sys_get_kernel_syms */
+	CALL(sys_quotactl)
+	CALL(sys_getpgid)
+	CALL(sys_fchdir)
+	CALL(sys_bdflush)
+	CALL(sys_sysfs)			/* 135 */
+	CALL(sys_personality)
+	CALL(sys_ni_syscall)		/* for afs_syscall */
+	CALL(sys_setfsuid16)
+	CALL(sys_setfsgid16)
+	CALL(sys_llseek)		/* 140 */
+	CALL(sys_getdents)
+	CALL(sys_select)
+	CALL(sys_flock)
+	CALL(sys_ni_syscall)		/* sys_msync */
+	CALL(sys_readv)			/* 145 */
+	CALL(sys_writev)
+	CALL(sys_getsid)
+	CALL(sys_fdatasync)
+	CALL(sys_sysctl)
+	CALL(sys_ni_syscall)		/* 150 sys_mlock */
+	CALL(sys_ni_syscall)		/* sys_munlock */
+	CALL(sys_ni_syscall)		/* sys_mlockall */
+	CALL(sys_ni_syscall)		/* sys_munlockall */
+	CALL(sys_sched_setparam)
+	CALL(sys_sched_getparam)	/* 155 */
+	CALL(sys_sched_setscheduler)
+	CALL(sys_sched_getscheduler)
+	CALL(sys_sched_yield)
+	CALL(sys_sched_get_priority_max)
+	CALL(sys_sched_get_priority_min)  /* 160 */
+	CALL(sys_sched_rr_get_interval)
+	CALL(sys_nanosleep)
+	CALL(sys_ni_syscall)		/* sys_mremap */
+	CALL(sys_setresuid16)
+	CALL(sys_getresuid16)		/* 165 */
+	CALL(sys_ni_syscall)		/* for vm86 */
+	CALL(sys_ni_syscall)		/* sys_query_module */
+	CALL(sys_poll)
+	CALL(sys_ni_syscall)		/* old nfsservctl */
+	CALL(sys_setresgid16)		/* 170 */
+	CALL(sys_getresgid16)
+	CALL(sys_prctl)
+	CALL(sys_rt_sigreturn)
+	CALL(sys_rt_sigaction)
+	CALL(sys_rt_sigprocmask)	/* 175 */
+	CALL(sys_rt_sigpending)
+	CALL(sys_rt_sigtimedwait)
+	CALL(sys_rt_sigqueueinfo)
+	CALL(sys_rt_sigsuspend)
+	CALL(sys_pread64)		/* 180 */
+	CALL(sys_pwrite64)
+	CALL(sys_lchown16);
+	CALL(sys_getcwd)
+	CALL(sys_capget)
+	CALL(sys_capset)		/* 185 */
+	CALL(sys_sigaltstack)
+	CALL(sys_sendfile)
+	CALL(sys_ni_syscall)		/* streams1 */
+	CALL(sys_ni_syscall)		/* streams2 */
+	CALL(sys_vfork)			/* 190 */
+	CALL(sys_getrlimit)
+	CALL(sys_mmap_pgoff)
+	CALL(sys_truncate64)
+	CALL(sys_ftruncate64)
+	CALL(sys_stat64)		/* 195 */
+	CALL(sys_lstat64)
+	CALL(sys_fstat64)
+	CALL(sys_chown)
+	CALL(sys_getuid)
+	CALL(sys_getgid)		/* 200 */
+	CALL(sys_geteuid)
+	CALL(sys_getegid)
+	CALL(sys_setreuid)
+	CALL(sys_setregid)
+	CALL(sys_getgroups)		/* 205 */
+	CALL(sys_setgroups)
+	CALL(sys_fchown)
+	CALL(sys_setresuid)
+	CALL(sys_getresuid)
+	CALL(sys_setresgid)		/* 210 */
+	CALL(sys_getresgid)
+	CALL(sys_lchown)
+	CALL(sys_setuid)
+	CALL(sys_setgid)
+	CALL(sys_setfsuid)		/* 215 */
+	CALL(sys_setfsgid)
+	CALL(sys_pivot_root)
+	CALL(sys_ni_syscall)
+	CALL(sys_ni_syscall)
+	CALL(sys_getdents64)		/* 220 */
+	CALL(sys_fcntl64)
+	CALL(sys_ni_syscall)		/* reserved TUX */
+	CALL(sys_ni_syscall)		/* reserved Security */
+	CALL(sys_gettid)
+	CALL(sys_readahead)		/* 225 */
+	CALL(sys_setxattr)
+	CALL(sys_lsetxattr)
+	CALL(sys_fsetxattr)
+	CALL(sys_getxattr)
+	CALL(sys_lgetxattr)		/* 230 */
+	CALL(sys_fgetxattr)
+	CALL(sys_listxattr)
+	CALL(sys_llistxattr)
+	CALL(sys_flistxattr)
+	CALL(sys_removexattr)		/* 235 */
+	CALL(sys_lremovexattr)
+	CALL(sys_fremovexattr)
+	CALL(sys_tkill)
+	CALL(sys_sendfile64)
+	CALL(sys_futex)			/* 240 */
+	CALL(sys_sched_setaffinity)
+	CALL(sys_sched_getaffinity)
+	CALL(sys_ni_syscall)
+	CALL(sys_ni_syscall)
+	CALL(sys_io_setup)		/* 245 */
+	CALL(sys_io_destroy)
+	CALL(sys_io_getevents)
+	CALL(sys_io_submit)
+	CALL(sys_io_cancel)
+	CALL(sys_fadvise64)		/* 250 */
+	CALL(sys_ni_syscall)
+	CALL(sys_exit_group)
+	CALL(sys_lookup_dcookie)
+	CALL(sys_epoll_create)
+	CALL(sys_epoll_ctl)		/* 255 */
+	CALL(sys_epoll_wait)
+	CALL(sys_ni_syscall)		/* sys_remap_file_pages */
+	CALL(sys_set_tid_address)
+	CALL(sys_timer_create)
+	CALL(sys_timer_settime)		/* 260 */
+	CALL(sys_timer_gettime)
+	CALL(sys_timer_getoverrun)
+	CALL(sys_timer_delete)
+	CALL(sys_clock_settime)
+	CALL(sys_clock_gettime)		/* 265 */
+	CALL(sys_clock_getres)
+	CALL(sys_clock_nanosleep)
+	CALL(sys_statfs64)
+	CALL(sys_fstatfs64)
+	CALL(sys_tgkill)		/* 270 */
+	CALL(sys_utimes)
+	CALL(sys_fadvise64_64)
+	CALL(sys_ni_syscall)		/* sys_vserver */
+	CALL(sys_ni_syscall)
+	CALL(sys_get_mempolicy)		/* 275 */
+	CALL(sys_set_mempolicy)
+	CALL(sys_mq_open)
+	CALL(sys_mq_unlink)
+	CALL(sys_mq_timedsend)
+	CALL(sys_mq_timedreceive)	/* 280 */
+	CALL(sys_mq_notify)
+	CALL(sys_mq_getsetattr)
+	CALL(sys_waitid)
+	CALL(sys_ni_syscall)		/* sys_kexec_load */
+	CALL(sys_add_key) 		/* 285 */
+	CALL(sys_request_key)
+	CALL(sys_keyctl)
+	CALL(sys_ioprio_set)
+	CALL(sys_ioprio_get)		/* 290 */
+	CALL(sys_inotify_init)
+	CALL(sys_inotify_add_watch)
+	CALL(sys_inotify_rm_watch)
+	CALL(sys_migrate_pages)
+	CALL(sys_openat)		/* 295 */
+	CALL(sys_mkdirat)
+	CALL(sys_mknodat)
+	CALL(sys_fchownat)
+	CALL(sys_futimesat)
+	CALL(sys_fstatat64)		/* 300 */
+	CALL(sys_unlinkat)
+	CALL(sys_renameat)
+	CALL(sys_linkat)
+	CALL(sys_symlinkat)
+	CALL(sys_readlinkat)		/* 305 */
+	CALL(sys_fchmodat)
+	CALL(sys_faccessat)
+	CALL(sys_ni_syscall)		/* sys_pselect6 */
+	CALL(sys_ni_syscall)		/* sys_ppoll */
+	CALL(sys_unshare)		/* 310 */
+	CALL(sys_set_robust_list)
+	CALL(sys_get_robust_list)
+	CALL(sys_splice)
+	CALL(sys_sync_file_range)
+	CALL(sys_tee)			/* 315 */
+	CALL(sys_vmsplice)
+	CALL(sys_ni_syscall)		/* sys_move_pages */
+	CALL(sys_getcpu)
+	CALL(sys_ni_syscall)		/* sys_epoll_pwait */
+	CALL(sys_setns)			/* 320 */
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 03d356d96e5..3253fed42ac 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -132,10 +132,12 @@ SECTIONS
         {
 	. = ALIGN(0x4) ;
 	__sbss = . ;
+	___bss_start = . ;
 		*(.bss*)
 	. = ALIGN(0x4) ;
 		*(COMMON)
 	. = ALIGN(0x4) ;
+	___bss_stop = . ;
 	__ebss = . ;
 	__end = . ;
 	__ramstart = .;
diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S
index cabdd46b41d..ddd1fb3d01a 100644
--- a/arch/h8300/lib/abs.S
+++ b/arch/h8300/lib/abs.S
@@ -9,10 +9,10 @@
 	.h8300s
 #endif
 	.text
-.global SYMBOL_NAME(abs)
+.global _abs
 
 ;;; int abs(int n)
-SYMBOL_NAME_LABEL(abs)
+_abs:
 	mov.l	er0,er0
 	bpl	1f
 	neg.l	er0
diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S
index fdcbc1ee673..cad325e2c0e 100644
--- a/arch/h8300/lib/memcpy.S
+++ b/arch/h8300/lib/memcpy.S
@@ -10,10 +10,10 @@
 #endif
 
 	.text
-.global SYMBOL_NAME(memcpy)
+.global _memcpy
 
 ;;; void *memcpy(void *to, void *from, size_t n)
-SYMBOL_NAME_LABEL(memcpy)
+_memcpy:
 	mov.l	er2,er2
 	bne	1f
 	rts	
diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S
index 59abdf9485a..4549a64c5b7 100644
--- a/arch/h8300/lib/memset.S
+++ b/arch/h8300/lib/memset.S
@@ -10,13 +10,13 @@
 #endif
 	.text
 
-.global	SYMBOL_NAME(memset)
+.global	_memset
 
 ;;void *memset(*ptr, int c, size_t count)
 ;; ptr = er0
 ;; c   = er1(r1l)
 ;; count = er2
-SYMBOL_NAME_LABEL(memset)
+_memset:
 	btst	#0,r0l
 	beq	2f
 
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index ff349d70a29..6c1251e491a 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -121,47 +121,27 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	int codek = 0, datak = 0, initk = 0;
-	/* DAVIDM look at setup memory map generically with reserved area */
-	unsigned long tmp;
-	extern unsigned long  _ramend, _ramstart;
-	unsigned long len = &_ramend - &_ramstart;
-	unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */
-	unsigned long end_mem   = memory_end; /* DAVIDM - this must not include kernel stack at top */
+	unsigned long codesize = _etext - _stext;
 
-#ifdef DEBUG
-	printk(KERN_DEBUG "Mem_init: start=%lx, end=%lx\n", start_mem, end_mem);
-#endif
+	pr_devel("Mem_init: start=%lx, end=%lx\n", memory_start, memory_end);
 
-	end_mem &= PAGE_MASK;
-	high_memory = (void *) end_mem;
-
-	start_mem = PAGE_ALIGN(start_mem);
-	max_mapnr = num_physpages = MAP_NR(high_memory);
+	high_memory = (void *) (memory_end & PAGE_MASK);
+	max_mapnr = MAP_NR(high_memory);
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages = free_all_bootmem();
-
-	codek = (_etext - _stext) >> 10;
-	datak = (__bss_stop - _sdata) >> 10;
-	initk = (__init_begin - __init_end) >> 10;
-
-	tmp = nr_free_pages() << PAGE_SHIFT;
-	printk(KERN_INFO "Memory available: %luk/%luk RAM, %luk/%luk ROM (%dk kernel code, %dk data)\n",
-	       tmp >> 10,
-	       len >> 10,
-	       (rom_length > 0) ? ((rom_length >> 10) - codek) : 0,
-	       rom_length >> 10,
-	       codek,
-	       datak
-	       );
+	free_all_bootmem();
+
+	mem_init_print_info(NULL);
+	if (rom_length > 0 && rom_length > codesize)
+		pr_info("Memory available: %luK/%luK ROM\n",
+			(rom_length - codesize) >> 10, rom_length >> 10);
 }
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
@@ -169,7 +149,7 @@ void
 free_initmem(void)
 {
 #ifdef CONFIG_RAMKERNEL
-	free_initmem_default(0);
+	free_initmem_default(-1);
 #endif
 }
 
diff --git a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S b/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
index ecaeb31ae9a..b2ad0f2d041 100644
--- a/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
+++ b/arch/h8300/platform/h8300h/aki3068net/crt0_ram.S
@@ -22,10 +22,10 @@
 #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
 #endif
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global _command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300h
 
@@ -33,7 +33,7 @@
 	.file	"crt0_ram.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#RAMEND,sp
 	ldc	#0x80,ccr
 
@@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(command_line),er6
+	mov.l	#_command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* uClinux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
@@ -107,4 +107,4 @@ __target_name:
 	.asciz	"AE-3068"
 	
 	.section .bootvec,"ax"
-	jmp	@SYMBOL_NAME(_start)
+	jmp	@__start
diff --git a/arch/h8300/platform/h8300h/generic/crt0_ram.S b/arch/h8300/platform/h8300h/generic/crt0_ram.S
index 80d0e16a449..5ab7d9c1291 100644
--- a/arch/h8300/platform/h8300h/generic/crt0_ram.S
+++ b/arch/h8300/platform/h8300h/generic/crt0_ram.S
@@ -22,10 +22,10 @@
 #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
 #endif
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global _command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300h
 
@@ -33,7 +33,7 @@
 	.file	"crt0_ram.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#RAMEND,sp
 	ldc	#0x80,ccr
 
@@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(command_line),er6
+	mov.l	#_command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* uClinux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
diff --git a/arch/h8300/platform/h8300h/generic/crt0_rom.S b/arch/h8300/platform/h8300h/generic/crt0_rom.S
index 120add7ca83..dda1dfa15a5 100644
--- a/arch/h8300/platform/h8300h/generic/crt0_rom.S
+++ b/arch/h8300/platform/h8300h/generic/crt0_rom.S
@@ -12,17 +12,17 @@
 
 #include <asm/linkage.h>
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(_command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global __command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300h
 	.section .text
 	.file	"crt0_rom.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#__ramend,sp
 	ldc	#0x80,ccr
 
@@ -60,13 +60,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(_command_line),er6
+	mov.l	#__command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* linux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
diff --git a/arch/h8300/platform/h8300h/h8max/crt0_ram.S b/arch/h8300/platform/h8300h/h8max/crt0_ram.S
index efcbefb91b6..6a0d4e2d9ec 100644
--- a/arch/h8300/platform/h8300h/h8max/crt0_ram.S
+++ b/arch/h8300/platform/h8300h/h8max/crt0_ram.S
@@ -22,10 +22,10 @@
 #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
 #endif
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global _command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300h
 
@@ -33,7 +33,7 @@
 	.file	"crt0_ram.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#RAMEND,sp
 	ldc	#0x80,ccr
 
@@ -59,13 +59,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(command_line),er6
+	mov.l	#_command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* uClinux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
@@ -107,4 +107,4 @@ __target_name:
 	.asciz	"H8MAX"
 	
 	.section .bootvec,"ax"
-	jmp	@SYMBOL_NAME(_start)
+	jmp	@__start
diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S b/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
index d12b0debe47..5ed191b37cd 100644
--- a/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
+++ b/arch/h8300/platform/h8s/edosk2674/crt0_ram.S
@@ -23,10 +23,10 @@
 #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
 #endif
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(_command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global __command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300s
 
@@ -34,7 +34,7 @@
 	.file	"crt0_ram.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#RAMEND,sp
 	ldc	#0x80,ccr
 	ldc	#0x00,exr
@@ -66,13 +66,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(command_line),er6
+	mov.l	#_command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* uClinux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
@@ -127,4 +127,4 @@ __target_name:
 	.asciz	"EDOSK-2674"
 	
 	.section .bootvec,"ax"
-	jmp	@SYMBOL_NAME(_start)
+	jmp	@__start
diff --git a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S b/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
index c03d23c6fe1..06d1d7f324c 100644
--- a/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
+++ b/arch/h8300/platform/h8s/edosk2674/crt0_rom.S
@@ -13,17 +13,17 @@
 #include <asm/linkage.h>
 #include <asm/regs267x.h>
 		
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(_command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global __command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300s
 	.section .text
 	.file	"crt0_rom.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#__ramend,sp
 	ldc	#0x80,ccr
 	ldc	#0,exr
@@ -82,13 +82,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(_command_line),er6
+	mov.l	#__command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* linux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
diff --git a/arch/h8300/platform/h8s/generic/crt0_ram.S b/arch/h8300/platform/h8s/generic/crt0_ram.S
index b0454106997..7018915de74 100644
--- a/arch/h8300/platform/h8s/generic/crt0_ram.S
+++ b/arch/h8300/platform/h8s/generic/crt0_ram.S
@@ -23,10 +23,10 @@
 #define RAMEND CONFIG_BLKDEV_RESERVE_ADDRESS
 #endif
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(_command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global __command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300s
 
@@ -34,7 +34,7 @@
 	.file	"crt0_ram.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#RAMEND,sp
 	ldc	#0x80,ccr
 	ldc	#0x00,exr
@@ -63,13 +63,13 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* copy kernel commandline */
 	mov.l	#COMMAND_START,er5
-	mov.l	#SYMBOL_NAME(command_line),er6
+	mov.l	#_command_line,er6
 	mov.w	#512,r4
 	eepmov.w
 
 	/* uClinux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
@@ -124,4 +124,4 @@ __target_name:
 	.asciz	"generic"
 	
 	.section .bootvec,"ax"
-	jmp	@SYMBOL_NAME(_start)
+	jmp	@__start
diff --git a/arch/h8300/platform/h8s/generic/crt0_rom.S b/arch/h8300/platform/h8s/generic/crt0_rom.S
index 95b6f2898f5..623ba782819 100644
--- a/arch/h8300/platform/h8s/generic/crt0_rom.S
+++ b/arch/h8300/platform/h8s/generic/crt0_rom.S
@@ -13,17 +13,17 @@
 #include <asm/linkage.h>
 #include <asm/regs267x.h>
 	
-	.global SYMBOL_NAME(_start)
-	.global SYMBOL_NAME(_command_line)
-	.global SYMBOL_NAME(_platform_gpio_table)
-	.global SYMBOL_NAME(_target_name)
+	.global __start
+	.global __command_line
+	.global __platform_gpio_table
+	.global __target_name
 	
 	.h8300s
 	.section .text
 	.file	"crt0_rom.S"
 
 	/* CPU Reset entry */
-SYMBOL_NAME_LABEL(_start)
+__start:
 	mov.l	#__ramend,sp
 	ldc	#0x80,ccr
 	ldc	#0,exr
@@ -61,7 +61,7 @@ SYMBOL_NAME_LABEL(_start)
 
 	/* linux kernel start */
 	ldc	#0x90,ccr	/* running kernel */
-	mov.l	#SYMBOL_NAME(init_thread_union),sp
+	mov.l	#_init_thread_union,sp
 	add.l	#0x2000,sp
 	jsr	@_start_kernel
 _exit:
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 2561d259a29..88977e42af0 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -70,10 +70,8 @@ unsigned long long kmap_generation;
 void __init mem_init(void)
 {
 	/*  No idea where this is actually declared.  Seems to evade LXR.  */
-	totalram_pages += free_all_bootmem();
-	num_physpages = bootmem_lastpg-ARCH_PFN_OFFSET;
-
-	printk(KERN_INFO "totalram_pages = %ld\n", totalram_pages);
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 
 	/*
 	 *  To-Do:  someone somewhere should wipe out the bootmem map
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index bcda5b2d121..d43daf192b2 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2042,7 +2042,8 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
 #endif
 
 static int __init
-acpi_sba_ioc_add(struct acpi_device *device)
+acpi_sba_ioc_add(struct acpi_device *device,
+		 const struct acpi_device_id *not_used)
 {
 	struct ioc *ioc;
 	acpi_status status;
@@ -2090,14 +2091,18 @@ static const struct acpi_device_id hp_ioc_iommu_device_ids[] = {
 	{"HWP0004", 0},
 	{"", 0},
 };
-static struct acpi_driver acpi_sba_ioc_driver = {
-	.name		= "IOC IOMMU Driver",
-	.ids		= hp_ioc_iommu_device_ids,
-	.ops		= {
-		.add	= acpi_sba_ioc_add,
-	},
+static struct acpi_scan_handler acpi_sba_ioc_handler = {
+	.ids	= hp_ioc_iommu_device_ids,
+	.attach	= acpi_sba_ioc_add,
 };
 
+static int __init acpi_sba_ioc_init_acpi(void)
+{
+	return acpi_scan_add_handler(&acpi_sba_ioc_handler);
+}
+/* This has to run before acpi_scan_init(). */
+arch_initcall(acpi_sba_ioc_init_acpi);
+
 extern struct dma_map_ops swiotlb_dma_ops;
 
 static int __init
@@ -2122,7 +2127,10 @@ sba_init(void)
 	}
 #endif
 
-	acpi_bus_register_driver(&acpi_sba_ioc_driver);
+	/*
+	 * ioc_list should be populated by the acpi_sba_ioc_handler's .attach()
+	 * routine, but that only happens if acpi_scan_init() has already run.
+	 */
 	if (!ioc_list) {
 #ifdef CONFIG_IA64_GENERIC
 		/*
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 331de723c67..3a428f19a00 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -88,8 +88,8 @@ simscsi_setup (char *s)
 	if (strlen(s) > MAX_ROOT_LEN) {
 		printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
 		       simscsi_root);
-	}
-	simscsi_root = s;
+	} else
+		simscsi_root = s;
 	return 1;
 }
 
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index bed73a643a5..f41e66d65e3 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -29,17 +29,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
  *                                 from 1 to a 0 value
  *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
  *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns.
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
  */
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
 {
 	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
-		return fail_fn(count);
+		return -1;
 	return 0;
 }
 
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 5e04b591e42..80775f55f03 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -89,9 +89,9 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 #define pci_legacy_read platform_pci_legacy_read
 #define pci_legacy_write platform_pci_legacy_write
 
-struct pci_window {
-	struct resource resource;
-	u64 offset;
+struct iospace_resource {
+	struct list_head list;
+	struct resource res;
 };
 
 struct pci_controller {
@@ -100,12 +100,10 @@ struct pci_controller {
 	int segment;
 	int node;		/* nearest node with memory or -1 for global allocation */
 
-	unsigned int windows;
-	struct pci_window *window;
-
 	void *platform_data;
 };
 
+
 #define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata)
 #define pci_domain_nr(busdev)    (PCI_CONTROLLER(busdev)->segment)
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 335eb07480f..5eb71d22c3d 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -807,7 +807,7 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
  *  ACPI based hotplug CPU support
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
-static __cpuinit
+static
 int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
@@ -882,7 +882,7 @@ __init void prefill_possible_map(void)
 		set_cpu_possible(i, true);
 }
 
-static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f034563aeae..51bce594eb8 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1116,11 +1116,6 @@ efi_memmap_init(u64 *s, u64 *e)
 		if (!is_memory_available(md))
 			continue;
 
-#ifdef CONFIG_CRASH_DUMP
-		/* saved_max_pfn should ignore max_addr= command line arg */
-		if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT))
-			saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT);
-#endif
 		/*
 		 * Round ends inward to granule boundaries
 		 * Give trimmings to uncached allocator
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 2d67317a1ec..f59c0b844e8 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -225,17 +225,17 @@ static struct attribute_group err_inject_attr_group = {
 	.name = "err_inject"
 };
 /* Add/Remove err_inject interface for CPU device */
-static int __cpuinit err_inject_add_dev(struct device * sys_dev)
+static int err_inject_add_dev(struct device *sys_dev)
 {
 	return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
 }
 
-static int __cpuinit err_inject_remove_dev(struct device * sys_dev)
+static int err_inject_remove_dev(struct device *sys_dev)
 {
 	sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
 	return 0;
 }
-static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
+static int err_inject_cpu_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
@@ -256,7 +256,7 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata err_inject_cpu_notifier =
+static struct notifier_block err_inject_cpu_notifier =
 {
 	.notifier_call = err_inject_cpu_callback,
 };
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 9be4e497f3d..991ca336b8a 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1035,7 +1035,7 @@ END(ia64_delay_loop)
  * Return a CPU-local timestamp in nano-seconds.  This timestamp is
  * NOT synchronized across CPUs its return value must never be
  * compared against the values returned on another CPU.  The usage in
- * kernel/sched.c ensures that.
+ * kernel/sched/core.c ensures that.
  *
  * The return-value of sched_clock() is NOT supposed to wrap-around.
  * If it did, it would cause some scheduling hiccups (at the worst).
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index d7396dbb07b..b8edfa75a83 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -631,7 +631,7 @@ ia64_mca_register_cpev (int cpev)
  * Outputs
  *	None
  */
-void __cpuinit
+void
 ia64_mca_cmc_vector_setup (void)
 {
 	cmcv_reg_t	cmcv;
@@ -1814,7 +1814,7 @@ static struct irqaction mca_cpep_irqaction = {
  * format most of the fields.
  */
 
-static void __cpuinit
+static void
 format_mca_init_stack(void *mca_data, unsigned long offset,
 		const char *type, int cpu)
 {
@@ -1844,7 +1844,7 @@ static void * __init_refok mca_bootmem(void)
 }
 
 /* Do per-CPU MCA-related initialization.  */
-void __cpuinit
+void
 ia64_mca_cpu_init(void *cpu_data)
 {
 	void *pal_vaddr;
@@ -1896,7 +1896,7 @@ ia64_mca_cpu_init(void *cpu_data)
 							      PAGE_KERNEL));
 }
 
-static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy)
+static void ia64_mca_cmc_vector_adjust(void *dummy)
 {
 	unsigned long flags;
 
@@ -1906,7 +1906,7 @@ static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy)
 	local_irq_restore(flags);
 }
 
-static int __cpuinit mca_cpu_callback(struct notifier_block *nfb,
+static int mca_cpu_callback(struct notifier_block *nfb,
 				      unsigned long action,
 				      void *hcpu)
 {
@@ -1922,7 +1922,7 @@ static int __cpuinit mca_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block mca_cpu_notifier __cpuinitdata = {
+static struct notifier_block mca_cpu_notifier = {
 	.notifier_call = mca_cpu_callback
 };
 
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index c93420c9740..d288cde9360 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
 EXPORT_SYMBOL(node_to_cpu_mask);
 
-void __cpuinit map_cpu_to_node(int cpu, int nid)
+void map_cpu_to_node(int cpu, int nid)
 {
 	int oldnid;
 	if (nid < 0) { /* just initialize by zero */
@@ -51,7 +51,7 @@ void __cpuinit map_cpu_to_node(int cpu, int nid)
 	return;
 }
 
-void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+void unmap_cpu_from_node(int cpu, int nid)
 {
 	WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
 	WARN_ON(cpu_to_node_map[cpu] != nid);
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 2b3c2d79256..ab333284f4b 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -932,7 +932,7 @@ static const struct file_operations proc_palinfo_fops = {
 	.release	= single_release,
 };
 
-static void __cpuinit
+static void
 create_palinfo_proc_entries(unsigned int cpu)
 {
 	pal_func_cpu_u_t f;
@@ -962,7 +962,7 @@ remove_palinfo_proc_entries(unsigned int hcpu)
 	remove_proc_subtree(cpustr, palinfo_dir);
 }
 
-static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb,
+static int palinfo_cpu_callback(struct notifier_block *nfb,
 					unsigned long action, void *hcpu)
 {
 	unsigned int hotcpu = (unsigned long)hcpu;
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 1ddcfe5ef35..992c1098c52 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -33,15 +33,6 @@ int force_iommu __read_mostly;
 
 int iommu_pass_through;
 
-/* Dummy device used for NULL arguments (normally ISA). Better would
-   be probably a smaller DMA mask, but this is bug-to-bug compatible
-   to i386. */
-struct device fallback_dev = {
-	.init_name = "fallback device",
-	.coherent_dma_mask = DMA_BIT_MASK(32),
-	.dma_mask = &fallback_dev.coherent_dma_mask,
-};
-
 extern struct dma_map_ops intel_dma_ops;
 
 static int __init pci_iommu_init(void)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9ea25fce06d..5a9ff1c3c3e 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5647,24 +5647,8 @@ pfm_proc_show_header(struct seq_file *m)
 
 	list_for_each(pos, &pfm_buffer_fmt_list) {
 		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
-		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
-			entry->fmt_uuid[0],
-			entry->fmt_uuid[1],
-			entry->fmt_uuid[2],
-			entry->fmt_uuid[3],
-			entry->fmt_uuid[4],
-			entry->fmt_uuid[5],
-			entry->fmt_uuid[6],
-			entry->fmt_uuid[7],
-			entry->fmt_uuid[8],
-			entry->fmt_uuid[9],
-			entry->fmt_uuid[10],
-			entry->fmt_uuid[11],
-			entry->fmt_uuid[12],
-			entry->fmt_uuid[13],
-			entry->fmt_uuid[14],
-			entry->fmt_uuid[15],
-			entry->fmt_name);
+		seq_printf(m, "format                    : %16phD %s\n",
+			   entry->fmt_uuid, entry->fmt_name);
 	}
 	spin_unlock(&pfm_buffer_fmt_lock);
 
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 4bc580af67b..960a396f592 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -568,7 +568,7 @@ static const struct file_operations salinfo_data_fops = {
 	.llseek  = default_llseek,
 };
 
-static int __cpuinit
+static int
 salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 {
 	unsigned int i, cpu = (unsigned long)hcpu;
@@ -609,7 +609,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
 	return NOTIFY_OK;
 }
 
-static struct notifier_block salinfo_cpu_notifier __cpuinitdata =
+static struct notifier_block salinfo_cpu_notifier =
 {
 	.notifier_call = salinfo_cpu_callback,
 	.priority = 0,
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 13bfdd22afc..4fc2e9569bb 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -748,7 +748,7 @@ const struct seq_operations cpuinfo_op = {
 #define MAX_BRANDS	8
 static char brandname[MAX_BRANDS][128];
 
-static char * __cpuinit
+static char *
 get_model_name(__u8 family, __u8 model)
 {
 	static int overflow;
@@ -778,7 +778,7 @@ get_model_name(__u8 family, __u8 model)
 	return "Unknown";
 }
 
-static void __cpuinit
+static void
 identify_cpu (struct cpuinfo_ia64 *c)
 {
 	union {
@@ -850,7 +850,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
  * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
  * 3. the minimum of the cache stride sizes for "clflush_cache_range()".
  */
-static void __cpuinit
+static void
 get_cache_info(void)
 {
 	unsigned long line_size, max = 1;
@@ -915,10 +915,10 @@ get_cache_info(void)
  * cpu_init() initializes state that is per-CPU.  This function acts
  * as a 'CPU state barrier', nothing should get across.
  */
-void __cpuinit
+void
 cpu_init (void)
 {
-	extern void __cpuinit ia64_mmu_init (void *);
+	extern void ia64_mmu_init(void *);
 	static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
 	unsigned long num_phys_stacked;
 	pal_vm_info_2_u_t vmi;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 8d87168d218..547a48d78bd 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -351,7 +351,7 @@ static inline void smp_setup_percpu_timer(void)
 {
 }
 
-static void __cpuinit
+static void
 smp_callin (void)
 {
 	int cpuid, phys_id, itc_master;
@@ -442,7 +442,7 @@ smp_callin (void)
 /*
  * Activate a secondary processor.  head.S calls this.
  */
-int __cpuinit
+int
 start_secondary (void *unused)
 {
 	/* Early console may use I/O ports */
@@ -459,7 +459,7 @@ start_secondary (void *unused)
 	return 0;
 }
 
-static int __cpuinit
+static int
 do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
 {
 	int timeout;
@@ -728,7 +728,7 @@ static inline void set_cpu_sibling_map(int cpu)
 	}
 }
 
-int __cpuinit
+int
 __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int ret;
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index dc00b2c1b42..ca69a5a96dc 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -135,11 +135,11 @@ struct cpu_cache_info {
 	struct kobject kobj;
 };
 
-static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS] __cpuinitdata;
+static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS];
 #define LEAF_KOBJECT_PTR(x,y)    (&all_cpu_cache_info[x].cache_leaves[y])
 
 #ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu,
+static void cache_shared_cpu_map_setup(unsigned int cpu,
 		struct cache_info * this_leaf)
 {
 	pal_cache_shared_info_t	csi;
@@ -174,7 +174,7 @@ static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu,
 				&csi) == PAL_STATUS_SUCCESS);
 }
 #else
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu,
+static void cache_shared_cpu_map_setup(unsigned int cpu,
 		struct cache_info * this_leaf)
 {
 	cpu_set(cpu, this_leaf->shared_cpu_map);
@@ -298,7 +298,7 @@ static struct kobj_type cache_ktype_percpu_entry = {
 	.sysfs_ops	= &cache_sysfs_ops,
 };
 
-static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+static void cpu_cache_sysfs_exit(unsigned int cpu)
 {
 	kfree(all_cpu_cache_info[cpu].cache_leaves);
 	all_cpu_cache_info[cpu].cache_leaves = NULL;
@@ -307,7 +307,7 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
 	return;
 }
 
-static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+static int cpu_cache_sysfs_init(unsigned int cpu)
 {
 	unsigned long i, levels, unique_caches;
 	pal_cache_config_info_t cci;
@@ -351,7 +351,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
 }
 
 /* Add cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct device * sys_dev)
+static int cache_add_dev(struct device *sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i, j;
@@ -401,7 +401,7 @@ static int __cpuinit cache_add_dev(struct device * sys_dev)
 }
 
 /* Remove cache interface for CPU device */
-static int __cpuinit cache_remove_dev(struct device * sys_dev)
+static int cache_remove_dev(struct device *sys_dev)
 {
 	unsigned int cpu = sys_dev->id;
 	unsigned long i;
@@ -425,7 +425,7 @@ static int __cpuinit cache_remove_dev(struct device * sys_dev)
  * When a cpu is hot-plugged, do a check and initiate
  * cache kobject if necessary
  */
-static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
+static int cache_cpu_callback(struct notifier_block *nfb,
 		unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
@@ -445,7 +445,7 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata cache_cpu_notifier =
+static struct notifier_block cache_cpu_notifier =
 {
 	.notifier_call = cache_cpu_callback
 };
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index f7f9f9c6caf..d3636e67a98 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -630,7 +630,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
 		       iip, ifa, isr);
 		force_sig(SIGSEGV, current);
-		break;
+		return;
 
 	      case 46:
 		printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 1a4053789d0..18e45ec49bb 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
 
 ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o)
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
 
 ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 67c59ebec89..da5237d636d 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -156,8 +156,7 @@ static void *cpu_data;
  *
  * Allocate and setup per-cpu data areas.
  */
-void * __cpuinit
-per_cpu_init (void)
+void *per_cpu_init(void)
 {
 	static bool first_time = true;
 	void *cpu0_data = __cpu0_per_cpu;
@@ -295,14 +294,6 @@ find_memory (void)
 	alloc_per_cpu_data();
 }
 
-static int count_pages(u64 start, u64 end, void *arg)
-{
-	unsigned long *count = arg;
-
-	*count += (end - start) >> PAGE_SHIFT;
-	return 0;
-}
-
 /*
  * Set up the page tables.
  */
@@ -313,9 +304,6 @@ paging_init (void)
 	unsigned long max_dma;
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
-	num_physpages = 0;
-	efi_memmap_walk(count_pages, &num_physpages);
-
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 #ifdef CONFIG_ZONE_DMA
 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ae4db4bd6d9..2de08f4d993 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -37,7 +37,6 @@ struct early_node_data {
 	struct ia64_node_data *node_data;
 	unsigned long pernode_addr;
 	unsigned long pernode_size;
-	unsigned long num_physpages;
 #ifdef CONFIG_ZONE_DMA
 	unsigned long num_dma_physpages;
 #endif
@@ -593,7 +592,7 @@ void __init find_memory(void)
  * find_pernode_space() does most of this already, we just need to set
  * local_per_cpu_offset
  */
-void __cpuinit *per_cpu_init(void)
+void *per_cpu_init(void)
 {
 	int cpu;
 	static int first_time = 1;
@@ -732,7 +731,6 @@ static __init int count_node_pages(unsigned long start, unsigned long len, int n
 {
 	unsigned long end = start + len;
 
-	mem_data[node].num_physpages += len >> PAGE_SHIFT;
 #ifdef CONFIG_ZONE_DMA
 	if (start <= __pa(MAX_DMA_ADDRESS))
 		mem_data[node].num_dma_physpages +=
@@ -778,7 +776,6 @@ void __init paging_init(void)
 #endif
 
 	for_each_online_node(node) {
-		num_physpages += mem_data[node].num_physpages;
 		pfn_offset = mem_data[node].min_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index d1fe4b40260..b6f7f43424e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -154,9 +154,8 @@ ia64_init_addr_space (void)
 void
 free_initmem (void)
 {
-	free_reserved_area((unsigned long)ia64_imva(__init_begin),
-			   (unsigned long)ia64_imva(__init_end),
-			   0, "unused kernel");
+	free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end),
+			   -1, "unused kernel");
 }
 
 void __init
@@ -546,19 +545,6 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
 	return 0;
 }
 
-static int __init
-count_reserved_pages(u64 start, u64 end, void *arg)
-{
-	unsigned long num_reserved = 0;
-	unsigned long *count = arg;
-
-	for (; start < end; start += PAGE_SIZE)
-		if (PageReserved(virt_to_page(start)))
-			++num_reserved;
-	*count += num_reserved;
-	return 0;
-}
-
 int
 find_max_min_low_pfn (u64 start, u64 end, void *arg)
 {
@@ -597,8 +583,6 @@ __setup("nolwsys", nolwsys_setup);
 void __init
 mem_init (void)
 {
-	long reserved_pages, codesize, datasize, initsize;
-	pg_data_t *pgdat;
 	int i;
 
 	BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
@@ -616,27 +600,12 @@ mem_init (void)
 
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
-	max_mapnr = max_low_pfn;
 #endif
 
+	set_max_mapnr(max_low_pfn);
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
-
-	for_each_online_pgdat(pgdat)
-		if (pgdat->bdata->node_bootmem_map)
-			totalram_pages += free_all_bootmem_node(pgdat);
-
-	reserved_pages = 0;
-	efi_memmap_walk(count_reserved_pages, &reserved_pages);
-
-	codesize =  (unsigned long) _etext - (unsigned long) _stext;
-	datasize =  (unsigned long) _edata - (unsigned long) _etext;
-	initsize =  (unsigned long) __init_end - (unsigned long) __init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
-	       "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10),
-	       num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
-	       reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
-
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 
 	/*
 	 * For fsyscall entrpoints with no light-weight handler, use the ordinary
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 4248492b932..ea21d4cad54 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -86,7 +86,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn)
 	return -1;
 }
 
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
 {
 	unmap_cpu_from_node(cpu, NUMA_NO_NODE);
 }
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index de1474ff0bc..2326790b7d8 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -134,6 +134,10 @@ struct pci_root_info {
 	struct acpi_device *bridge;
 	struct pci_controller *controller;
 	struct list_head resources;
+	struct resource *res;
+	resource_size_t *res_offset;
+	unsigned int res_num;
+	struct list_head io_resources;
 	char *name;
 };
 
@@ -153,7 +157,7 @@ new_space (u64 phys_base, int sparse)
 			return i;
 
 	if (num_io_spaces == MAX_IO_SPACES) {
-		printk(KERN_ERR "PCI: Too many IO port spaces "
+		pr_err("PCI: Too many IO port spaces "
 			"(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES);
 		return ~0;
 	}
@@ -168,25 +172,22 @@ new_space (u64 phys_base, int sparse)
 static u64 add_io_space(struct pci_root_info *info,
 			struct acpi_resource_address64 *addr)
 {
+	struct iospace_resource *iospace;
 	struct resource *resource;
 	char *name;
 	unsigned long base, min, max, base_port;
 	unsigned int sparse = 0, space_nr, len;
 
-	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
-	if (!resource) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space\n",
-			info->name);
+	len = strlen(info->name) + 32;
+	iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL);
+	if (!iospace) {
+		dev_err(&info->bridge->dev,
+				"PCI: No memory for %s I/O port space\n",
+				info->name);
 		goto out;
 	}
 
-	len = strlen(info->name) + 32;
-	name = kzalloc(len, GFP_KERNEL);
-	if (!name) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space name\n",
-			info->name);
-		goto free_resource;
-	}
+	name = (char *)(iospace + 1);
 
 	min = addr->minimum;
 	max = min + addr->address_length - 1;
@@ -195,7 +196,7 @@ static u64 add_io_space(struct pci_root_info *info,
 
 	space_nr = new_space(addr->translation_offset, sparse);
 	if (space_nr == ~0)
-		goto free_name;
+		goto free_resource;
 
 	base = __pa(io_space[space_nr].mmio_base);
 	base_port = IO_SPACE_BASE(space_nr);
@@ -210,18 +211,23 @@ static u64 add_io_space(struct pci_root_info *info,
 	if (space_nr == 0)
 		sparse = 1;
 
+	resource = &iospace->res;
 	resource->name  = name;
 	resource->flags = IORESOURCE_MEM;
 	resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
 	resource->end   = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
-	insert_resource(&iomem_resource, resource);
+	if (insert_resource(&iomem_resource, resource)) {
+		dev_err(&info->bridge->dev,
+				"can't allocate host bridge io space resource  %pR\n",
+				resource);
+		goto free_resource;
+	}
 
+	list_add_tail(&iospace->list, &info->io_resources);
 	return base_port;
 
-free_name:
-	kfree(name);
 free_resource:
-	kfree(resource);
+	kfree(iospace);
 out:
 	return ~0;
 }
@@ -265,7 +271,7 @@ static acpi_status count_window(struct acpi_resource *resource, void *data)
 static acpi_status add_window(struct acpi_resource *res, void *data)
 {
 	struct pci_root_info *info = data;
-	struct pci_window *window;
+	struct resource *resource;
 	struct acpi_resource_address64 addr;
 	acpi_status status;
 	unsigned long flags, offset = 0;
@@ -289,55 +295,146 @@ static acpi_status add_window(struct acpi_resource *res, void *data)
 	} else
 		return AE_OK;
 
-	window = &info->controller->window[info->controller->windows++];
-	window->resource.name = info->name;
-	window->resource.flags = flags;
-	window->resource.start = addr.minimum + offset;
-	window->resource.end = window->resource.start + addr.address_length - 1;
-	window->offset = offset;
+	resource = &info->res[info->res_num];
+	resource->name = info->name;
+	resource->flags = flags;
+	resource->start = addr.minimum + offset;
+	resource->end = resource->start + addr.address_length - 1;
+	info->res_offset[info->res_num] = offset;
 
-	if (insert_resource(root, &window->resource)) {
+	if (insert_resource(root, resource)) {
 		dev_err(&info->bridge->dev,
 			"can't allocate host bridge window %pR\n",
-			&window->resource);
+			resource);
 	} else {
 		if (offset)
 			dev_info(&info->bridge->dev, "host bridge window %pR "
 				 "(PCI address [%#llx-%#llx])\n",
-				 &window->resource,
-				 window->resource.start - offset,
-				 window->resource.end - offset);
+				 resource,
+				 resource->start - offset,
+				 resource->end - offset);
 		else
 			dev_info(&info->bridge->dev,
-				 "host bridge window %pR\n",
-				 &window->resource);
+				 "host bridge window %pR\n", resource);
 	}
-
 	/* HP's firmware has a hack to work around a Windows bug.
 	 * Ignore these tiny memory ranges */
-	if (!((window->resource.flags & IORESOURCE_MEM) &&
-	      (window->resource.end - window->resource.start < 16)))
-		pci_add_resource_offset(&info->resources, &window->resource,
-					window->offset);
+	if (!((resource->flags & IORESOURCE_MEM) &&
+	      (resource->end - resource->start < 16)))
+		pci_add_resource_offset(&info->resources, resource,
+					info->res_offset[info->res_num]);
 
+	info->res_num++;
 	return AE_OK;
 }
 
+static void free_pci_root_info_res(struct pci_root_info *info)
+{
+	struct iospace_resource *iospace, *tmp;
+
+	list_for_each_entry_safe(iospace, tmp, &info->io_resources, list)
+		kfree(iospace);
+
+	kfree(info->name);
+	kfree(info->res);
+	info->res = NULL;
+	kfree(info->res_offset);
+	info->res_offset = NULL;
+	info->res_num = 0;
+	kfree(info->controller);
+	info->controller = NULL;
+}
+
+static void __release_pci_root_info(struct pci_root_info *info)
+{
+	int i;
+	struct resource *res;
+	struct iospace_resource *iospace;
+
+	list_for_each_entry(iospace, &info->io_resources, list)
+		release_resource(&iospace->res);
+
+	for (i = 0; i < info->res_num; i++) {
+		res = &info->res[i];
+
+		if (!res->parent)
+			continue;
+
+		if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
+			continue;
+
+		release_resource(res);
+	}
+
+	free_pci_root_info_res(info);
+	kfree(info);
+}
+
+static void release_pci_root_info(struct pci_host_bridge *bridge)
+{
+	struct pci_root_info *info = bridge->release_data;
+
+	__release_pci_root_info(info);
+}
+
+static int
+probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
+		int busnum, int domain)
+{
+	char *name;
+
+	name = kmalloc(16, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	sprintf(name, "PCI Bus %04x:%02x", domain, busnum);
+	info->bridge = device;
+	info->name = name;
+
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
+			&info->res_num);
+	if (info->res_num) {
+		info->res =
+			kzalloc_node(sizeof(*info->res) * info->res_num,
+				     GFP_KERNEL, info->controller->node);
+		if (!info->res) {
+			kfree(name);
+			return -ENOMEM;
+		}
+
+		info->res_offset =
+			kzalloc_node(sizeof(*info->res_offset) * info->res_num,
+					GFP_KERNEL, info->controller->node);
+		if (!info->res_offset) {
+			kfree(name);
+			kfree(info->res);
+			info->res = NULL;
+			return -ENOMEM;
+		}
+
+		info->res_num = 0;
+		acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+			add_window, info);
+	} else
+		kfree(name);
+
+	return 0;
+}
+
 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 {
 	struct acpi_device *device = root->device;
 	int domain = root->segment;
 	int bus = root->secondary.start;
 	struct pci_controller *controller;
-	unsigned int windows = 0;
-	struct pci_root_info info;
+	struct pci_root_info *info = NULL;
+	int busnum = root->secondary.start;
 	struct pci_bus *pbus;
-	char *name;
-	int pxm;
+	int pxm, ret;
 
 	controller = alloc_pci_controller(domain);
 	if (!controller)
-		goto out1;
+		return NULL;
 
 	controller->acpi_handle = device->handle;
 
@@ -347,29 +444,27 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 		controller->node = pxm_to_node(pxm);
 #endif
 
-	INIT_LIST_HEAD(&info.resources);
-	/* insert busn resource at first */
-	pci_add_resource(&info.resources, &root->secondary);
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
-			&windows);
-	if (windows) {
-		controller->window =
-			kzalloc_node(sizeof(*controller->window) * windows,
-				     GFP_KERNEL, controller->node);
-		if (!controller->window)
-			goto out2;
-
-		name = kmalloc(16, GFP_KERNEL);
-		if (!name)
-			goto out3;
-
-		sprintf(name, "PCI Bus %04x:%02x", domain, bus);
-		info.bridge = device;
-		info.controller = controller;
-		info.name = name;
-		acpi_walk_resources(device->handle, METHOD_NAME__CRS,
-			add_window, &info);
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&device->dev,
+				"pci_bus %04x:%02x: ignored (out of memory)\n",
+				domain, busnum);
+		kfree(controller);
+		return NULL;
 	}
+
+	info->controller = controller;
+	INIT_LIST_HEAD(&info->io_resources);
+	INIT_LIST_HEAD(&info->resources);
+
+	ret = probe_pci_root_info(info, device, busnum, domain);
+	if (ret) {
+		kfree(info->controller);
+		kfree(info);
+		return NULL;
+	}
+	/* insert busn resource at first */
+	pci_add_resource(&info->resources, &root->secondary);
 	/*
 	 * See arch/x86/pci/acpi.c.
 	 * The desired pci bus might already be scanned in a quirk. We
@@ -377,21 +472,17 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 	 * such quirk. So we just ignore the case now.
 	 */
 	pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller,
-				   &info.resources);
+				   &info->resources);
 	if (!pbus) {
-		pci_free_resource_list(&info.resources);
+		pci_free_resource_list(&info->resources);
+		__release_pci_root_info(info);
 		return NULL;
 	}
 
+	pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge),
+			release_pci_root_info, info);
 	pci_scan_child_bus(pbus);
 	return pbus;
-
-out3:
-	kfree(controller->window);
-out2:
-	kfree(controller);
-out1:
-	return NULL;
 }
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
@@ -691,7 +782,7 @@ static void __init set_pci_dfl_cacheline_size(void)
 
 	status = ia64_pal_cache_summary(&levels, &unique_caches);
 	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_summary() failed "
+		pr_err("%s: ia64_pal_cache_summary() failed "
 			"(status=%ld)\n", __func__, status);
 		return;
 	}
@@ -699,7 +790,7 @@ static void __init set_pci_dfl_cacheline_size(void)
 	status = ia64_pal_cache_config_info(levels - 1,
 				/* cache_type (data_or_unified)= */ 2, &cci);
 	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed "
+		pr_err("%s: ia64_pal_cache_config_info() failed "
 			"(status=%ld)\n", __func__, status);
 		return;
 	}
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 238e2c511d9..0b5ce82d203 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -118,76 +118,26 @@ static void __init sn_fixup_ionodes(void)
 }
 
 /*
- * sn_pci_legacy_window_fixup - Create PCI controller windows for
+ * sn_pci_legacy_window_fixup - Setup PCI resources for
  *				legacy IO and MEM space. This needs to
  *				be done here, as the PROM does not have
  *				ACPI support defining the root buses
  *				and their resources (_CRS),
  */
 static void
-sn_legacy_pci_window_fixup(struct pci_controller *controller,
-			   u64 legacy_io, u64 legacy_mem)
+sn_legacy_pci_window_fixup(struct resource *res,
+		u64 legacy_io, u64 legacy_mem)
 {
-		controller->window = kcalloc(2, sizeof(struct pci_window),
-					     GFP_KERNEL);
-		BUG_ON(controller->window == NULL);
-		controller->window[0].offset = legacy_io;
-		controller->window[0].resource.name = "legacy_io";
-		controller->window[0].resource.flags = IORESOURCE_IO;
-		controller->window[0].resource.start = legacy_io;
-		controller->window[0].resource.end =
-	    			controller->window[0].resource.start + 0xffff;
-		controller->window[0].resource.parent = &ioport_resource;
-		controller->window[1].offset = legacy_mem;
-		controller->window[1].resource.name = "legacy_mem";
-		controller->window[1].resource.flags = IORESOURCE_MEM;
-		controller->window[1].resource.start = legacy_mem;
-		controller->window[1].resource.end =
-	    	       controller->window[1].resource.start + (1024 * 1024) - 1;
-		controller->window[1].resource.parent = &iomem_resource;
-		controller->windows = 2;
-}
-
-/*
- * sn_pci_window_fixup() - Create a pci_window for each device resource.
- *			   It will setup pci_windows for use by
- *			   pcibios_bus_to_resource(), pcibios_resource_to_bus(),
- *			   etc.
- */
-static void
-sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
-		    s64 * pci_addrs)
-{
-	struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
-	unsigned int i;
-	unsigned int idx;
-	unsigned int new_count;
-	struct pci_window *new_window;
-
-	if (count == 0)
-		return;
-	idx = controller->windows;
-	new_count = controller->windows + count;
-	new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
-	BUG_ON(new_window == NULL);
-	if (controller->window) {
-		memcpy(new_window, controller->window,
-		       sizeof(struct pci_window) * controller->windows);
-		kfree(controller->window);
-	}
-
-	/* Setup a pci_window for each device resource. */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		if (pci_addrs[i] == -1)
-			continue;
-
-		new_window[idx].offset = dev->resource[i].start - pci_addrs[i];
-		new_window[idx].resource = dev->resource[i];
-		idx++;
-	}
-
-	controller->windows = new_count;
-	controller->window = new_window;
+		res[0].name = "legacy_io";
+		res[0].flags = IORESOURCE_IO;
+		res[0].start = legacy_io;
+		res[0].end = res[0].start + 0xffff;
+		res[0].parent = &ioport_resource;
+		res[1].name = "legacy_mem";
+		res[1].flags = IORESOURCE_MEM;
+		res[1].start = legacy_mem;
+		res[1].end = res[1].start + (1024 * 1024) - 1;
+		res[1].parent = &iomem_resource;
 }
 
 /*
@@ -199,9 +149,7 @@ sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
 void
 sn_io_slot_fixup(struct pci_dev *dev)
 {
-	unsigned int count = 0;
 	int idx;
-	s64 pci_addrs[PCI_ROM_RESOURCE + 1];
 	unsigned long addr, end, size, start;
 	struct pcidev_info *pcidev_info;
 	struct sn_irq_info *sn_irq_info;
@@ -229,7 +177,6 @@ sn_io_slot_fixup(struct pci_dev *dev)
 	for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
 
 		if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
-			pci_addrs[idx] = -1;
 			continue;
 		}
 
@@ -237,11 +184,8 @@ sn_io_slot_fixup(struct pci_dev *dev)
 		end = dev->resource[idx].end;
 		size = end - start;
 		if (size == 0) {
-			pci_addrs[idx] = -1;
 			continue;
 		}
-		pci_addrs[idx] = start;
-		count++;
 		addr = pcidev_info->pdi_pio_mapped_addr[idx];
 		addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
 		dev->resource[idx].start = addr;
@@ -276,11 +220,6 @@ sn_io_slot_fixup(struct pci_dev *dev)
 						 IORESOURCE_ROM_BIOS_COPY;
 		}
 	}
-	/* Create a pci_window in the pci_controller struct for
-	 * each device resource.
-	 */
-	if (count > 0)
-		sn_pci_window_fixup(dev, count, pci_addrs);
 
 	sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info);
 }
@@ -297,8 +236,8 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 	s64 status = 0;
 	struct pci_controller *controller;
 	struct pcibus_bussoft *prom_bussoft_ptr;
+	struct resource *res;
 	LIST_HEAD(resources);
-	int i;
 
  	status = sal_get_pcibus_info((u64) segment, (u64) busnum,
  				     (u64) ia64_tpa(&prom_bussoft_ptr));
@@ -310,32 +249,29 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 	BUG_ON(!controller);
 	controller->segment = segment;
 
+	res = kcalloc(2, sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(!res);
+
 	/*
 	 * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup().
 	 * (platform_data will be overwritten later in sn_common_bus_fixup())
 	 */
 	controller->platform_data = prom_bussoft_ptr;
 
-	sn_legacy_pci_window_fixup(controller,
-				   prom_bussoft_ptr->bs_legacy_io,
-				   prom_bussoft_ptr->bs_legacy_mem);
-	for (i = 0; i < controller->windows; i++)
-		pci_add_resource_offset(&resources,
-					&controller->window[i].resource,
-					controller->window[i].offset);
+	sn_legacy_pci_window_fixup(res,
+			prom_bussoft_ptr->bs_legacy_io,
+			prom_bussoft_ptr->bs_legacy_mem);
+	pci_add_resource_offset(&resources,	&res[0],
+			prom_bussoft_ptr->bs_legacy_io);
+	pci_add_resource_offset(&resources,	&res[1],
+			prom_bussoft_ptr->bs_legacy_mem);
+
 	bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller,
 				&resources);
- 	if (bus == NULL)
- 		goto error_return; /* error, or bus already scanned */
-
-	bus->sysdata = controller;
-
-	return;
-
-error_return:
-
-	kfree(controller);
-	return;
+ 	if (bus == NULL) {
+		kfree(res);
+		kfree(controller);
+	}
 }
 
 /*
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index f82e7b462b7..53b01b8e2f1 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -192,7 +192,7 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-static int __cpuinitdata shub_1_1_found;
+static int shub_1_1_found;
 
 /*
  * sn_check_for_wars
@@ -200,7 +200,7 @@ static int __cpuinitdata shub_1_1_found;
  * Set flag for enabling shub specific wars
  */
 
-static inline int __cpuinit is_shub_1_1(int nasid)
+static inline int is_shub_1_1(int nasid)
 {
 	unsigned long id;
 	int rev;
@@ -212,7 +212,7 @@ static inline int __cpuinit is_shub_1_1(int nasid)
 	return rev <= 2;
 }
 
-static void __cpuinit sn_check_for_wars(void)
+static void sn_check_for_wars(void)
 {
 	int cnode;
 
@@ -558,7 +558,7 @@ static void __init sn_init_pdas(char **cmdline_p)
  * Also sets up a few fields in the nodepda.  Also known as
  * platform_cpu_init() by the ia64 machvec code.
  */
-void __cpuinit sn_cpu_init(void)
+void sn_cpu_init(void)
 {
 	int cpuid;
 	int cpuphyid;
diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c
index 52172eee859..fab62528a80 100644
--- a/arch/ia64/xen/hypervisor.c
+++ b/arch/ia64/xen/hypervisor.c
@@ -74,7 +74,7 @@ void __init xen_setup_vcpu_info_placement(void)
 		xen_vcpu_setup(cpu);
 }
 
-void __cpuinit
+void
 xen_cpu_init(void)
 {
 	xen_smp_intr_init();
diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
index 1c7047bea20..84fe7ba5303 100644
--- a/arch/m32r/include/asm/uaccess.h
+++ b/arch/m32r/include/asm/uaccess.h
@@ -216,7 +216,7 @@ extern int fixup_exception(struct pt_regs *regs);
 ({									\
 	long __gu_err = 0;						\
 	unsigned long __gu_val;						\
-	might_sleep();							\
+	might_fault();							\
 	__get_user_size(__gu_val,(ptr),(size),__gu_err);		\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
@@ -227,7 +227,7 @@ extern int fixup_exception(struct pt_regs *regs);
 	long __gu_err = -EFAULT;					\
 	unsigned long __gu_val = 0;					\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
-	might_sleep();							\
+	might_fault();							\
 	if (access_ok(VERIFY_READ,__gu_addr,size))			\
 		__get_user_size(__gu_val,__gu_addr,(size),__gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
@@ -295,7 +295,7 @@ do {									\
 #define __put_user_nocheck(x,ptr,size)					\
 ({									\
 	long __pu_err;							\
-	might_sleep();							\
+	might_fault();							\
 	__put_user_size((x),(ptr),(size),__pu_err);			\
 	__pu_err;							\
 })
@@ -305,7 +305,7 @@ do {									\
 ({									\
 	long __pu_err = -EFAULT;					\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	might_sleep();							\
+	might_fault();							\
 	if (access_ok(VERIFY_WRITE,__pu_addr,size))			\
 		__put_user_size((x),__pu_addr,(size),__pu_err);		\
 	__pu_err;							\
@@ -597,7 +597,7 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  */
 #define copy_to_user(to,from,n)				\
 ({							\
-	might_sleep();					\
+	might_fault();					\
 	__generic_copy_to_user((to),(from),(n));	\
 })
 
@@ -638,7 +638,7 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon
  */
 #define copy_from_user(to,from,n)			\
 ({							\
-	might_sleep();					\
+	might_fault();					\
 	__generic_copy_from_user((to),(from),(n));	\
 })
 
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 2c468e8b585..27196303ce3 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -129,11 +129,10 @@ unsigned long __init setup_memory(void)
 #define START_PFN(nid)		(NODE_DATA(nid)->bdata->node_min_pfn)
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
-unsigned long __init zone_sizes_init(void)
+void __init zone_sizes_init(void)
 {
 	unsigned long zones_size[MAX_NR_ZONES], zholes_size[MAX_NR_ZONES];
 	unsigned long low, start_pfn;
-	unsigned long holes = 0;
 	int nid, i;
 	mem_prof_t *mp;
 
@@ -147,7 +146,6 @@ unsigned long __init zone_sizes_init(void)
 		low = MAX_LOW_PFN(nid);
 		zones_size[ZONE_DMA] = low - start_pfn;
 		zholes_size[ZONE_DMA] = mp->holes;
-		holes += zholes_size[ZONE_DMA];
 
 		node_set_state(nid, N_NORMAL_MEMORY);
 		free_area_init_node(nid, zones_size, start_pfn, zholes_size);
@@ -161,6 +159,4 @@ unsigned long __init zone_sizes_init(void)
 	NODE_DATA(1)->node_zones->watermark[WMARK_MIN] = 0;
 	NODE_DATA(1)->node_zones->watermark[WMARK_LOW] = 0;
 	NODE_DATA(1)->node_zones->watermark[WMARK_HIGH] = 0;
-
-	return holes;
 }
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index ab4cbce91a9..0d4146f644d 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -40,7 +40,6 @@ unsigned long mmu_context_cache_dat;
 #else
 unsigned long mmu_context_cache_dat[NR_CPUS];
 #endif
-static unsigned long hole_pages;
 
 /*
  * function prototype
@@ -57,7 +56,7 @@ void free_initrd_mem(unsigned long, unsigned long);
 #define MAX_LOW_PFN(nid)	(NODE_DATA(nid)->bdata->node_low_pfn)
 
 #ifndef CONFIG_DISCONTIGMEM
-unsigned long __init zone_sizes_init(void)
+void __init zone_sizes_init(void)
 {
 	unsigned long  zones_size[MAX_NR_ZONES] = {0, };
 	unsigned long  max_dma;
@@ -83,11 +82,9 @@ unsigned long __init zone_sizes_init(void)
 #endif /* CONFIG_MMU */
 
 	free_area_init_node(0, zones_size, start_pfn, 0);
-
-	return 0;
 }
 #else	/* CONFIG_DISCONTIGMEM */
-extern unsigned long zone_sizes_init(void);
+extern void zone_sizes_init(void);
 #endif	/* CONFIG_DISCONTIGMEM */
 
 /*======================================================================*
@@ -105,24 +102,7 @@ void __init paging_init(void)
 	for (i = 0 ; i < USER_PTRS_PER_PGD * 2 ; i++)
 		pgd_val(pg_dir[i]) = 0;
 #endif /* CONFIG_MMU */
-	hole_pages = zone_sizes_init();
-}
-
-int __init reservedpages_count(void)
-{
-	int reservedpages, nid, i;
-
-	reservedpages = 0;
-	for_each_online_node(nid) {
-		unsigned long flags;
-		pgdat_resize_lock(NODE_DATA(nid), &flags);
-		for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
-			if (PageReserved(nid_page_nr(nid, i)))
-				reservedpages++;
-		pgdat_resize_unlock(NODE_DATA(nid), &flags);
-	}
-
-	return reservedpages;
+	zone_sizes_init();
 }
 
 /*======================================================================*
@@ -131,48 +111,20 @@ int __init reservedpages_count(void)
  *======================================================================*/
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-	int nid;
 #ifndef CONFIG_MMU
 	extern unsigned long memory_end;
-#endif
-
-	num_physpages = 0;
-	for_each_online_node(nid)
-		num_physpages += MAX_LOW_PFN(nid) - START_PFN(nid) + 1;
-
-	num_physpages -= hole_pages;
 
-#ifndef CONFIG_DISCONTIGMEM
-	max_mapnr = num_physpages;
-#endif	/* CONFIG_DISCONTIGMEM */
-
-#ifdef CONFIG_MMU
-	high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0)));
-#else
 	high_memory = (void *)(memory_end & PAGE_MASK);
+#else
+	high_memory = (void *)__va(PFN_PHYS(MAX_LOW_PFN(0)));
 #endif /* CONFIG_MMU */
 
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 
-	/* this will put all low memory onto the freelists */
-	for_each_online_node(nid)
-		totalram_pages += free_all_bootmem_node(NODE_DATA(nid));
-
-	reservedpages = reservedpages_count() - hole_pages;
-	codesize = (unsigned long) &_etext - (unsigned long)&_text;
-	datasize = (unsigned long) &_edata - (unsigned long)&_etext;
-	initsize = (unsigned long) &__init_end - (unsigned long)&__init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
-		"%dk reserved, %dk data, %dk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		initsize >> 10);
+	set_max_mapnr(get_num_physpages());
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 }
 
 /*======================================================================*
@@ -181,7 +133,7 @@ void __init mem_init(void)
  *======================================================================*/
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -191,6 +143,6 @@ void free_initmem(void)
  *======================================================================*/
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug
index fa12283d58f..22968272124 100644
--- a/arch/m68k/Kconfig.debug
+++ b/arch/m68k/Kconfig.debug
@@ -11,9 +11,8 @@ config BOOTPARAM_STRING
 	depends on BOOTPARAM
 
 config EARLY_PRINTK
-	bool "Early printk" if EMBEDDED
+	bool "Early printk"
 	depends on MVME16x || MAC
-	default y
 	help
           Write kernel log output directly to a serial port.
 
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 0f795d8e65f..b17a8837f0e 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -214,6 +214,7 @@ CONFIG_DEVTMPFS=y
 # CONFIG_FW_LOADER_USER_HELPER is not set
 CONFIG_CONNECTOR=m
 CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
 CONFIG_PARPORT_AMIGA=m
 CONFIG_PARPORT_MFC3=m
 CONFIG_PARPORT_ATARI=m
@@ -325,6 +326,7 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_PLIP=m
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 8982370e8b4..be1496ed9b6 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -199,6 +199,9 @@ CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_FW_LOADER_USER_HELPER is not set
 CONFIG_CONNECTOR=m
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_1284=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
@@ -267,6 +270,7 @@ CONFIG_NE2000=m
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_PLIP=m
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -292,9 +296,11 @@ CONFIG_SERIO_Q40KBD=y
 CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
+CONFIG_PRINTER=m
 # CONFIG_HW_RANDOM is not set
 CONFIG_NTP_PPS=y
 CONFIG_PPS_CLIENT_LDISC=m
+CONFIG_PPS_CLIENT_PARPORT=m
 CONFIG_PTP_1588_CLOCK=m
 # CONFIG_HWMON is not set
 CONFIG_FB=y
diff --git a/arch/m68k/include/asm/parport.h b/arch/m68k/include/asm/parport.h
index 5ea75e6a739..c85cece778e 100644
--- a/arch/m68k/include/asm/parport.h
+++ b/arch/m68k/include/asm/parport.h
@@ -11,6 +11,8 @@
 #ifndef _ASM_M68K_PARPORT_H
 #define _ASM_M68K_PARPORT_H 1
 
+#undef insl
+#undef outsl
 #define insl(port,buf,len)   isa_insb(port,buf,(len)<<2)
 #define outsl(port,buf,len)  isa_outsb(port,buf,(len)<<2)
 
diff --git a/arch/m68k/include/asm/string.h b/arch/m68k/include/asm/string.h
index 9aea9f11fa2..c30c03d9858 100644
--- a/arch/m68k/include/asm/string.h
+++ b/arch/m68k/include/asm/string.h
@@ -4,20 +4,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-static inline char *__kernel_strcpy(char *dest, const char *src)
-{
-	char *xdest = dest;
-
-	asm volatile ("\n"
-		"1:	move.b	(%1)+,(%0)+\n"
-		"	jne	1b"
-		: "+a" (dest), "+a" (src)
-		: : "memory");
-	return xdest;
-}
-
-#ifndef __IN_STRING_C
-
 #define __HAVE_ARCH_STRNLEN
 static inline size_t strnlen(const char *s, size_t count)
 {
@@ -34,16 +20,6 @@ static inline size_t strnlen(const char *s, size_t count)
 	return sc - s;
 }
 
-#define __HAVE_ARCH_STRCPY
-#if __GNUC__ >= 4
-#define strcpy(d, s)	(__builtin_constant_p(s) &&	\
-			 __builtin_strlen(s) <= 32 ?	\
-			 __builtin_strcpy(d, s) :	\
-			 __kernel_strcpy(d, s))
-#else
-#define strcpy(d, s)	__kernel_strcpy(d, s)
-#endif
-
 #define __HAVE_ARCH_STRNCPY
 static inline char *strncpy(char *dest, const char *src, size_t n)
 {
@@ -61,12 +37,6 @@ static inline char *strncpy(char *dest, const char *src, size_t n)
 	return xdest;
 }
 
-#define __HAVE_ARCH_STRCAT
-#define strcat(d, s)	({			\
-	char *__d = (d);			\
-	strcpy(__d + strlen(__d), (s));		\
-})
-
 #ifndef CONFIG_COLDFIRE
 #define __HAVE_ARCH_STRCMP
 static inline int strcmp(const char *cs, const char *ct)
@@ -100,6 +70,4 @@ extern void *memset(void *, int, __kernel_size_t);
 extern void *memcpy(void *, const void *, __kernel_size_t);
 #define memcpy(d, s, n) __builtin_memcpy(d, s, n)
 
-#endif
-
 #endif /* _M68K_STRING_H_ */
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
index 472c891a4ae..15901db435b 100644
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ b/arch/m68k/include/asm/uaccess_mm.h
@@ -90,7 +90,7 @@ asm volatile ("\n"					\
 		__put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT);	\
 		break;							\
 	case 2:								\
-		__put_user_asm(__pu_err, __pu_val, ptr, w, d, -EFAULT);	\
+		__put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT);	\
 		break;							\
 	case 4:								\
 		__put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT);	\
@@ -158,7 +158,7 @@ asm volatile ("\n"					\
 		__get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT);	\
 		break;							\
 	case 2:								\
-		__get_user_asm(__gu_err, x, ptr, u16, w, d, -EFAULT);	\
+		__get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT);	\
 		break;							\
 	case 4:								\
 		__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);	\
@@ -245,7 +245,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n)
 		__get_user_asm(res, *(u8 *)to, (u8 __user *)from, u8, b, d, 1);
 		break;
 	case 2:
-		__get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, d, 2);
+		__get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, r, 2);
 		break;
 	case 3:
 		__constant_copy_from_user_asm(res, to, from, tmp, 3, w, b,);
@@ -326,7 +326,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
 		__put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
 		break;
 	case 2:
-		__put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, d, 2);
+		__put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
 		break;
 	case 3:
 		__constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index a972b00cd77..8b7b2284636 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -77,7 +77,7 @@ int main(void)
 	DEFINE(BIR_SIZE, offsetof(struct bi_record, size));
 	DEFINE(BIR_DATA, offsetof(struct bi_record, data));
 
-	/* offsets into font_desc (drivers/video/console/font.h) */
+	/* offsets into the font_desc struct */
 	DEFINE(FONT_DESC_IDX, offsetof(struct font_desc, idx));
 	DEFINE(FONT_DESC_NAME, offsetof(struct font_desc, name));
 	DEFINE(FONT_DESC_WIDTH, offsetof(struct font_desc, width));
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 6b32b64bac3..4d7da384eea 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -101,7 +101,7 @@ void __init m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt)
 	BUG_ON(IRQ_USER + cnt > NR_IRQS);
 	m68k_first_user_vec = vec;
 	for (i = 0; i < cnt; i++)
-		irq_set_chip(IRQ_USER + i, &user_irq_chip);
+		irq_set_chip_and_handler(i, &user_irq_chip, handle_simple_irq);
 	*user_irqvec_fixup = vec - IRQ_USER;
 	flush_icache();
 }
diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index a9d782d3427..fcd8eb1d7c7 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile
@@ -6,7 +6,7 @@
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
 	   memcpy.o memset.o memmove.o
 
-lib-$(CONFIG_MMU) += string.o uaccess.o
+lib-$(CONFIG_MMU) += uaccess.o
 lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += mulsi3.o divsi3.o udivsi3.o
 lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += modsi3.o umodsi3.o
 
diff --git a/arch/m68k/lib/string.c b/arch/m68k/lib/string.c
deleted file mode 100644
index 4d61fa8a112..00000000000
--- a/arch/m68k/lib/string.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#define __IN_STRING_C
-
-#include <linux/module.h>
-#include <linux/string.h>
-
-char *strcpy(char *dest, const char *src)
-{
-	return __kernel_strcpy(dest, src);
-}
-EXPORT_SYMBOL(strcpy);
-
-char *strcat(char *dest, const char *src)
-{
-	return __kernel_strcpy(dest + strlen(dest), src);
-}
-EXPORT_SYMBOL(strcat);
diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c
index 5e97f2ee7c1..35d1442dee8 100644
--- a/arch/m68k/lib/uaccess.c
+++ b/arch/m68k/lib/uaccess.c
@@ -52,7 +52,7 @@ unsigned long __generic_copy_from_user(void *to, const void __user *from,
 		"	.long	3b,30b\n"
 		"	.long	5b,50b\n"
 		"	.previous"
-		: "=d" (res), "+a" (from), "+a" (to), "=&r" (tmp)
+		: "=d" (res), "+a" (from), "+a" (to), "=&d" (tmp)
 		: "0" (n / 4), "d" (n & 3));
 
 	return res;
@@ -96,7 +96,7 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from,
 		"	.long	7b,50b\n"
 		"	.long	8b,50b\n"
 		"	.previous"
-		: "=d" (res), "+a" (from), "+a" (to), "=&r" (tmp)
+		: "=d" (res), "+a" (from), "+a" (to), "=&d" (tmp)
 		: "0" (n / 4), "d" (n & 3));
 
 	return res;
@@ -141,7 +141,7 @@ unsigned long __clear_user(void __user *to, unsigned long n)
 		"	.long	7b,40b\n"
 		"	.previous"
 		: "=d" (res), "+a" (to)
-		: "r" (0), "0" (n / 4), "d" (n & 3));
+		: "d" (0), "0" (n / 4), "d" (n & 3));
 
     return res;
 }
diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c
index 08f286db3c5..239eb199018 100644
--- a/arch/m68k/math-emu/fp_arith.c
+++ b/arch/m68k/math-emu/fp_arith.c
@@ -519,7 +519,7 @@ static void fp_roundint(struct fp_ext *dest, int mode)
 				return;
 			break;
 		case 0x401e:
-			if (!(oldmant.m32[1] >= 0))
+			if (oldmant.m32[1] & 0x80000000)
 				return;
 			if (oldmant.m32[0] & 1)
 				break;
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 1af2ca3411f..6b4baa6e4d3 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -110,7 +110,7 @@ void __init paging_init(void)
 void free_initmem(void)
 {
 #ifndef CONFIG_MMU_SUN3
-	free_initmem_default(0);
+	free_initmem_default(-1);
 #endif /* CONFIG_MMU_SUN3 */
 }
 
@@ -146,38 +146,11 @@ void __init print_memmap(void)
 		MLK_ROUNDUP(__bss_start, __bss_stop));
 }
 
-void __init mem_init(void)
+static inline void init_pointer_tables(void)
 {
-	pg_data_t *pgdat;
-	int codepages = 0;
-	int datapages = 0;
-	int initpages = 0;
+#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE)
 	int i;
 
-	/* this will put all memory onto the freelists */
-	totalram_pages = num_physpages = 0;
-	for_each_online_pgdat(pgdat) {
-		num_physpages += pgdat->node_present_pages;
-
-		totalram_pages += free_all_bootmem_node(pgdat);
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			struct page *page = pgdat->node_mem_map + i;
-			char *addr = page_to_virt(page);
-
-			if (!PageReserved(page))
-				continue;
-			if (addr >= _text &&
-			    addr < _etext)
-				codepages++;
-			else if (addr >= __init_begin &&
-				 addr < __init_end)
-				initpages++;
-			else
-				datapages++;
-		}
-	}
-
-#if defined(CONFIG_MMU) && !defined(CONFIG_SUN3) && !defined(CONFIG_COLDFIRE)
 	/* insert pointer tables allocated so far into the tablelist */
 	init_pointer_table((unsigned long)kernel_pg_dir);
 	for (i = 0; i < PTRS_PER_PGD; i++) {
@@ -189,19 +162,20 @@ void __init mem_init(void)
 	if (zero_pgtable)
 		init_pointer_table((unsigned long)zero_pgtable);
 #endif
+}
 
-	pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       totalram_pages << (PAGE_SHIFT-10),
-	       codepages << (PAGE_SHIFT-10),
-	       datapages << (PAGE_SHIFT-10),
-	       initpages << (PAGE_SHIFT-10));
+void __init mem_init(void)
+{
+	/* this will put all memory onto the freelists */
+	free_all_bootmem();
+	init_pointer_tables();
+	mem_init_print_info(NULL);
 	print_memmap();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
diff --git a/arch/m68k/platform/coldfire/pci.c b/arch/m68k/platform/coldfire/pci.c
index 8572246db84..b33f97a13e6 100644
--- a/arch/m68k/platform/coldfire/pci.c
+++ b/arch/m68k/platform/coldfire/pci.c
@@ -320,7 +320,6 @@ static int __init mcf_pci_init(void)
 	pci_bus_size_bridges(rootbus);
 	pci_bus_assign_resources(rootbus);
 	pci_enable_bridges(rootbus);
-	pci_bus_add_devices(rootbus);
 	return 0;
 }
 
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index ca0966cac72..cab54482ca3 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -275,7 +275,7 @@ void dvma_init(void)
 
 }
 
-inline unsigned long dvma_map_align(unsigned long kaddr, int len, int align)
+unsigned long dvma_map_align(unsigned long kaddr, int len, int align)
 {
 
 	unsigned long baddr;
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
index 366569425c5..5b18888ee36 100644
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -882,7 +882,7 @@ static int __init init_hw_perf_events(void)
 	}
 
 	register_cpu_notifier(&metag_pmu_notifier);
-	ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+	ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
 out:
 	return ret;
 }
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index d05b8455c44..28813f16473 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -376,34 +376,21 @@ void __init paging_init(unsigned long mem_end)
 
 void __init mem_init(void)
 {
-	int nid;
-
 #ifdef CONFIG_HIGHMEM
 	unsigned long tmp;
+
+	/*
+	 * Explicitly reset zone->managed_pages because highmem pages are
+	 * freed before calling free_all_bootmem();
+	 */
+	reset_all_zones_managed_pages();
 	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++)
 		free_highmem_page(pfn_to_page(tmp));
-	num_physpages += totalhigh_pages;
 #endif /* CONFIG_HIGHMEM */
 
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		unsigned long node_pages = 0;
-
-		num_physpages += pgdat->node_present_pages;
-
-		if (pgdat->node_spanned_pages)
-			node_pages = free_all_bootmem_node(pgdat);
-
-		totalram_pages += node_pages;
-	}
-
-	pr_info("Memory: %luk/%luk available\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
-		num_physpages << (PAGE_SHIFT - 10));
-
+	free_all_bootmem();
+	mem_init_print_info(NULL);
 	show_mem(0);
-
-	return;
 }
 
 void free_initmem(void)
@@ -414,7 +401,8 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 85a5ae8e9bd..fd850879854 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -168,7 +168,6 @@ extern int page_is_ram(unsigned long pfn);
 #  else /* CONFIG_MMU */
 #  define ARCH_PFN_OFFSET	(memory_start >> PAGE_SHIFT)
 #  define pfn_valid(pfn)	((pfn) < (max_mapnr + ARCH_PFN_OFFSET))
-#  define VALID_PAGE(page) 	((page - mem_map) < max_mapnr)
 #  endif /* CONFIG_MMU */
 
 # endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 04e49553bdf..0aa005703a0 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -145,7 +145,7 @@ static inline unsigned long __must_check __clear_user(void __user *to,
 static inline unsigned long __must_check clear_user(void __user *to,
 							unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
 		return n;
 
@@ -371,7 +371,7 @@ extern long __user_bad(void);
 static inline long copy_from_user(void *to,
 		const void __user *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (access_ok(VERIFY_READ, from, n))
 		return __copy_from_user(to, from, n);
 	return n;
@@ -385,7 +385,7 @@ static inline long copy_from_user(void *to,
 static inline long copy_to_user(void __user *to,
 		const void *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		return __copy_to_user(to, from, n);
 	return n;
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index b38ae3acfeb..74c7bcc1e82 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -71,24 +71,17 @@ static void __init highmem_init(void)
 	kmap_prot = PAGE_KERNEL;
 }
 
-static unsigned long highmem_setup(void)
+static void highmem_setup(void)
 {
 	unsigned long pfn;
-	unsigned long reservedpages = 0;
 
 	for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) {
 		struct page *page = pfn_to_page(pfn);
 
 		/* FIXME not sure about */
-		if (memblock_is_reserved(pfn << PAGE_SHIFT))
-			continue;
-		free_highmem_page(page);
-		reservedpages++;
+		if (!memblock_is_reserved(pfn << PAGE_SHIFT))
+			free_highmem_page(page);
 	}
-	pr_info("High memory: %luk\n",
-					totalhigh_pages << (PAGE_SHIFT-10));
-
-	return reservedpages;
 }
 #endif /* CONFIG_HIGHMEM */
 
@@ -167,13 +160,12 @@ void __init setup_memory(void)
 	 * min_low_pfn - the first page (mm/bootmem.c - node_boot_start)
 	 * max_low_pfn
 	 * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn)
-	 * num_physpages - number of all pages
 	 */
 
 	/* memory start is from the kernel end (aligned) to higher addr */
 	min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */
 	/* RAM is assumed contiguous */
-	num_physpages = max_mapnr = memory_size >> PAGE_SHIFT;
+	max_mapnr = memory_size >> PAGE_SHIFT;
 	max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT;
 	max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT;
 
@@ -235,57 +227,26 @@ void __init setup_memory(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 void __init mem_init(void)
 {
-	pg_data_t *pgdat;
-	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
-
 	high_memory = (void *)__va(memory_start + lowmem_size - 1);
 
 	/* this will put all memory onto the freelists */
-	totalram_pages += free_all_bootmem();
-
-	for_each_online_pgdat(pgdat) {
-		unsigned long i;
-		struct page *page;
-
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			if (!pfn_valid(pgdat->node_start_pfn + i))
-				continue;
-			page = pgdat_page_nr(pgdat, i);
-			if (PageReserved(page))
-				reservedpages++;
-		}
-	}
-
+	free_all_bootmem();
 #ifdef CONFIG_HIGHMEM
-	reservedpages -= highmem_setup();
+	highmem_setup();
 #endif
 
-	codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
-	datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
-	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
-	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
-
-	pr_info("Memory: %luk/%luk available (%luk kernel code, ",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10);
-	pr_cont("%luk reserved, %luk data, %luk bss, %luk init)\n",
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		bsssize >> 10,
-		initsize >> 10);
-
+	mem_init_print_info(NULL);
 #ifdef CONFIG_MMU
 	pr_info("Kernel virtual memory layout:\n");
 	pr_info("  * 0x%08lx..0x%08lx  : fixmap\n", FIXADDR_START, FIXADDR_TOP);
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index a22f06a6f7c..7181def6037 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -607,7 +607,7 @@ static void octeon_irq_ciu_gpio_ack(struct irq_data *data)
 
 static void octeon_irq_handle_gpio(unsigned int irq, struct irq_desc *desc)
 {
-	if (irqd_get_trigger_type(irq_desc_get_irq_data(desc)) & IRQ_TYPE_EDGE_BOTH)
+	if (irq_get_trigger_type(irq) & IRQ_TYPE_EDGE_BOTH)
 		handle_edge_irq(irq, desc);
 	else
 		handle_level_irq(irq, desc);
diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c
index 3be9e7bb30f..f291cf99b03 100644
--- a/arch/mips/kernel/crash_dump.c
+++ b/arch/mips/kernel/crash_dump.c
@@ -4,16 +4,6 @@
 #include <asm/uaccess.h>
 #include <linux/slab.h>
 
-static int __init parse_savemaxmem(char *p)
-{
-	if (p)
-		saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
-
-	return 1;
-}
-__setup("savemaxmem=", parse_savemaxmem);
-
-
 static void *kdump_buf_page;
 
 /**
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index fd814e08c94..cb098628aee 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -27,12 +27,12 @@ unsigned long mt_fpemul_threshold;
  * FPU affinity with the user's requested processor affinity.
  * This code is 98% identical with the sys_sched_setaffinity()
  * and sys_sched_getaffinity() system calls, and should be
- * updated when kernel/sched.c changes.
+ * updated when kernel/sched/core.c changes.
  */
 
 /*
  * find_process_by_pid - find a process with a matching PID value.
- * used in sys_sched_set/getaffinity() in kernel/sched.c, so
+ * used in sys_sched_set/getaffinity() in kernel/sched/core.c, so
  * cloned here.
  */
 static inline struct task_struct *find_process_by_pid(pid_t pid)
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 9b36424b03c..e9127ec612e 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -476,8 +476,9 @@ einval: li	v0, -ENOSYS
 	/*
 	 * For FPU affinity scheduling on MIPS MT processors, we need to
 	 * intercept sys_sched_xxxaffinity() calls until we get a proper hook
-	 * in kernel/sched.c.  Considered only temporary we only support these
-	 * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+	 * in kernel/sched/core.c.  Considered only temporary we only support
+	 * these hooks for the 32-bit kernel - there is no MIPS64 MT processor
+	 * atm.
 	 */
 	sys	mipsmt_sys_sched_setaffinity	3
 	sys	mipsmt_sys_sched_getaffinity	3
diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c
index bc739d4bab2..4dc2f5fa3f6 100644
--- a/arch/mips/loongson/lemote-2f/clock.c
+++ b/arch/mips/loongson/lemote-2f/clock.c
@@ -121,7 +121,8 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 	clk->rate = rate;
 
 	regval = LOONGSON_CHIPCFG0;
-	regval = (regval & ~0x7) | (loongson2_clockmod_table[i].index - 1);
+	regval = (regval & ~0x7) |
+		(loongson2_clockmod_table[i].driver_data - 1);
 	LOONGSON_CHIPCFG0 = regval;
 
 	return ret;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b973e0af9c..4e73f10a751 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -359,11 +359,24 @@ void __init paging_init(void)
 static struct kcore_list kcore_kseg0;
 #endif
 
-void __init mem_init(void)
+static inline void mem_init_free_highmem(void)
 {
-	unsigned long codesize, reservedpages, datasize, initsize;
-	unsigned long tmp, ram;
+#ifdef CONFIG_HIGHMEM
+	unsigned long tmp;
 
+	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+		struct page *page = pfn_to_page(tmp);
+
+		if (!page_is_ram(tmp))
+			SetPageReserved(page);
+		else
+			free_highmem_page(page);
+	}
+#endif
+}
+
+void __init mem_init(void)
+{
 #ifdef CONFIG_HIGHMEM
 #ifdef CONFIG_DISCONTIGMEM
 #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
@@ -374,34 +387,10 @@ void __init mem_init(void)
 #endif
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
-	totalram_pages += free_all_bootmem();
+	free_all_bootmem();
 	setup_zero_pages();	/* Setup zeroed pages.  */
-
-	reservedpages = ram = 0;
-	for (tmp = 0; tmp < max_low_pfn; tmp++)
-		if (page_is_ram(tmp) && pfn_valid(tmp)) {
-			ram++;
-			if (PageReserved(pfn_to_page(tmp)))
-				reservedpages++;
-		}
-	num_physpages = ram;
-
-#ifdef CONFIG_HIGHMEM
-	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
-		struct page *page = pfn_to_page(tmp);
-
-		if (!page_is_ram(tmp)) {
-			SetPageReserved(page);
-			continue;
-		}
-		free_highmem_page(page);
-	}
-	num_physpages += totalhigh_pages;
-#endif
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+	mem_init_free_highmem();
+	mem_init_print_info(NULL);
 
 #ifdef CONFIG_64BIT
 	if ((unsigned long) &_text > (unsigned long) CKSEG0)
@@ -410,16 +399,6 @@ void __init mem_init(void)
 		kclist_add(&kcore_kseg0, (void *) CKSEG0,
 				0x80000000 - 4, KCORE_TEXT);
 #endif
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
-	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       ram << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10,
-	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
@@ -440,7 +419,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 879077b0115..cb1ef998406 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -89,7 +89,7 @@ static inline u32 ltq_calc_bar11mask(void)
 	u32 mem, bar11mask;
 
 	/* BAR11MASK value depends on available memory on system. */
-	mem = num_physpages * PAGE_SIZE;
+	mem = get_num_physpages() * PAGE_SIZE;
 	bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8;
 
 	return bar11mask;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 1230f56429d..a95c00f5fb9 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -357,8 +357,6 @@ static void __init szmem(void)
 	int slot;
 	cnodeid_t node;
 
-	num_physpages = 0;
-
 	for_each_online_node(node) {
 		nodebytes = 0;
 		for (slot = 0; slot < MAX_MEM_SLOTS; slot++) {
@@ -381,7 +379,6 @@ static void __init szmem(void)
 				slot = MAX_MEM_SLOTS;
 				continue;
 			}
-			num_physpages += slot_psize;
 			memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
 					  PFN_PHYS(slot_psize), node);
 		}
@@ -480,32 +477,8 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	unsigned long codesize, datasize, initsize, tmp;
-	unsigned node;
-
-	high_memory = (void *) __va(num_physpages << PAGE_SHIFT);
-
-	for_each_online_node(node) {
-		/*
-		 * This will free up the bootmem, ie, slot 0 memory.
-		 */
-		totalram_pages += free_all_bootmem_node(NODE_DATA(node));
-	}
-
+	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
+	free_all_bootmem();
 	setup_zero_pages();	/* This comes from node 0 */
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	tmp = nr_free_pages();
-	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
-	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
-	       tmp << (PAGE_SHIFT-10),
-	       num_physpages << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       (num_physpages - tmp) << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10,
-	       totalhigh_pages << (PAGE_SHIFT-10));
+	mem_init_print_info(NULL);
 }
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index d7966e0f769..537278746a1 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -471,13 +471,13 @@ extern unsigned long __generic_copy_from_user(void *, const void __user *,
 
 #define __copy_to_user(to, from, n)			\
 ({							\
-	might_sleep();					\
+	might_fault();					\
 	__copy_to_user_inatomic((to), (from), (n));	\
 })
 
 #define __copy_from_user(to, from, n)			\
 ({							\
-	might_sleep();					\
+	might_fault();					\
 	__copy_from_user_inatomic((to), (from), (n));	\
 })
 
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 5a8ace63a6b..97a1ec0beee 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -99,43 +99,21 @@ void __init paging_init(void)
  */
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-	int tmp;
-
 	BUG_ON(!mem_map);
 
 #define START_PFN	(contig_page_data.bdata->node_min_pfn)
 #define MAX_LOW_PFN	(contig_page_data.bdata->node_low_pfn)
 
-	max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
+	max_mapnr = MAX_LOW_PFN - START_PFN;
 	high_memory = (void *) __va(MAX_LOW_PFN * PAGE_SIZE);
 
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages += free_all_bootmem();
-
-	reservedpages = 0;
-	for (tmp = 0; tmp < num_physpages; tmp++)
-		if (PageReserved(&mem_map[tmp]))
-			reservedpages++;
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_stext;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk(KERN_INFO
-	       "Memory: %luk/%luk available"
-	       " (%dk kernel code, %dk reserved, %dk data, %dk init,"
-	       " %ldk highmem)\n",
-	       nr_free_pages() << (PAGE_SHIFT - 10),
-	       max_mapnr << (PAGE_SHIFT - 10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT - 10),
-	       datasize >> 10,
-	       initsize >> 10,
-	       totalhigh_pages << (PAGE_SHIFT - 10));
+	free_all_bootmem();
+
+	mem_init_print_info(NULL);
 }
 
 /*
@@ -152,6 +130,7 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index f20d01d9aaf..195653e851d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -66,3 +66,4 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += word-at-a-time.h
+generic-y += xor.h
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index b3cbc670383..7f94652311d 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -202,56 +202,20 @@ void __init paging_init(void)
 
 /* References to section boundaries */
 
-static int __init free_pages_init(void)
-{
-	int reservedpages, pfn;
-
-	/* this will put all low memory onto the freelists */
-	totalram_pages = free_all_bootmem();
-
-	reservedpages = 0;
-	for (pfn = 0; pfn < max_low_pfn; pfn++) {
-		/*
-		 * Only count reserved RAM pages
-		 */
-		if (PageReserved(mem_map + pfn))
-			reservedpages++;
-	}
-
-	return reservedpages;
-}
-
-static void __init set_max_mapnr_init(void)
-{
-	max_mapnr = num_physpages = max_low_pfn;
-}
-
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-
 	BUG_ON(!mem_map);
 
-	set_max_mapnr_init();
-
+	max_mapnr = max_low_pfn;
 	high_memory = (void *)__va(max_low_pfn * PAGE_SIZE);
 
 	/* clear the zero-page */
 	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
-	reservedpages = free_pages_init();
-
-	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
-	datasize = (unsigned long)&_edata - (unsigned long)&_etext;
-	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
+	/* this will put all low memory onto the freelists */
+	free_all_bootmem();
 
-	printk(KERN_INFO
-	       "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
-	       (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
-	       max_mapnr << (PAGE_SHIFT - 10), codesize >> 10,
-	       reservedpages << (PAGE_SHIFT - 10), datasize >> 10,
-	       initsize >> 10, (unsigned long)(0 << (PAGE_SHIFT - 10))
-	    );
+	mem_init_print_info(NULL);
 
 	printk("mem_init_done ...........................................\n");
 	mem_init_done = 1;
@@ -261,11 +225,11 @@ void __init mem_init(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
diff --git a/arch/parisc/include/uapi/asm/fcntl.h b/arch/parisc/include/uapi/asm/fcntl.h
index 0304b92ccfe..cc61c475f27 100644
--- a/arch/parisc/include/uapi/asm/fcntl.h
+++ b/arch/parisc/include/uapi/asm/fcntl.h
@@ -20,6 +20,7 @@
 #define O_INVISIBLE	004000000 /* invisible I/O, for DMAPI/XDSM */
 
 #define O_PATH		020000000
+#define O_TMPFILE	040000000
 
 #define F_GETLK64	8
 #define F_SETLK64	9
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 505b56c6b9b..b0f96c0e631 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -214,7 +214,6 @@ static void __init setup_bootmem(void)
 	mem_limit_func();       /* check for "mem=" argument */
 
 	mem_max = 0;
-	num_physpages = 0;
 	for (i = 0; i < npmem_ranges; i++) {
 		unsigned long rsize;
 
@@ -229,10 +228,8 @@ static void __init setup_bootmem(void)
 				npmem_ranges = i + 1;
 				mem_max = mem_limit;
 			}
-	        num_physpages += pmem_ranges[i].pages;
 			break;
 		}
-	    num_physpages += pmem_ranges[i].pages;
 		mem_max += rsize;
 	}
 
@@ -532,7 +529,7 @@ void free_initmem(void)
 	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
 	
-	num_physpages += free_initmem_default(0);
+	free_initmem_default(-1);
 
 	/* set up a new led state on systems shipped LED State panel */
 	pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE);
@@ -580,8 +577,6 @@ unsigned long pcxl_dma_start __read_mostly;
 
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-
 	/* Do sanity checks on page table constants */
 	BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t));
 	BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t));
@@ -590,45 +585,8 @@ void __init mem_init(void)
 			> BITS_PER_LONG);
 
 	high_memory = __va((max_pfn << PAGE_SHIFT));
-
-#ifndef CONFIG_DISCONTIGMEM
-	max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1;
-	totalram_pages += free_all_bootmem();
-#else
-	{
-		int i;
-
-		for (i = 0; i < npmem_ranges; i++)
-			totalram_pages += free_all_bootmem_node(NODE_DATA(i));
-	}
-#endif
-
-	codesize = (unsigned long)_etext - (unsigned long)_text;
-	datasize = (unsigned long)_edata - (unsigned long)_etext;
-	initsize = (unsigned long)__init_end - (unsigned long)__init_begin;
-
-	reservedpages = 0;
-{
-	unsigned long pfn;
-#ifdef CONFIG_DISCONTIGMEM
-	int i;
-
-	for (i = 0; i < npmem_ranges; i++) {
-		for (pfn = node_start_pfn(i); pfn < node_end_pfn(i); pfn++) {
-			if (PageReserved(pfn_to_page(pfn)))
-				reservedpages++;
-		}
-	}
-#else /* !CONFIG_DISCONTIGMEM */
-	for (pfn = 0; pfn < max_pfn; pfn++) {
-		/*
-		 * Only count reserved RAM pages
-		 */
-		if (PageReserved(pfn_to_page(pfn)))
-			reservedpages++;
-	}
-#endif
-}
+	set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1);
+	free_all_bootmem();
 
 #ifdef CONFIG_PA11
 	if (hppa_dma_ops == &pcxl_dma_ops) {
@@ -643,15 +601,7 @@ void __init mem_init(void)
 	parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START);
 #endif
 
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		initsize >> 10
-	);
-
+	mem_init_print_info(NULL);
 #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
 	printk("virtual kernel memory layout:\n"
 	       "    vmalloc : 0x%p - 0x%p   (%4ld MB)\n"
@@ -1101,6 +1051,6 @@ void flush_tlb_all(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	num_physpages += free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 349ed85c7d6..08891d07aeb 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -107,8 +107,9 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST		1
 #define CONTEXT_GUEST_END	2
 
-#define VSID_REAL	0x1fffffffffc00000ULL
-#define VSID_BAT	0x1fffffffffb00000ULL
+#define VSID_REAL	0x0fffffffffc00000ULL
+#define VSID_BAT	0x0fffffffffb00000ULL
+#define VSID_1T		0x1000000000000000ULL
 #define VSID_REAL_DR	0x2000000000000000ULL
 #define VSID_REAL_IR	0x4000000000000000ULL
 #define VSID_PR		0x8000000000000000ULL
@@ -123,6 +124,7 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
 extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
+extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
 extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
 			struct kvm_vcpu *vcpu, unsigned long addr,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index a73668a5f30..b467530e248 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -38,7 +38,7 @@ extern void drop_cop(unsigned long acop, struct mm_struct *mm);
 
 /*
  * switch_mm is the entry point called from the architecture independent
- * code in kernel/sched.c
+ * code in kernel/sched/core.c
  */
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 5399f7e1810..127ab23e1f6 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -82,17 +82,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
  *                                 from 1 to a 0 value
  *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
  *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns.
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
  */
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
 {
 	if (unlikely(__mutex_dec_return_lock(count) < 0))
-		return fail_fn(count);
+		return -1;
 	return 0;
 }
 
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4db49590acf..9485b43a7c0 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -178,7 +178,7 @@ do {								\
 	long __pu_err;						\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
 	if (!is_kernel_addr((unsigned long)__pu_addr))		\
-		might_sleep();					\
+		might_fault();					\
 	__chk_user_ptr(ptr);					\
 	__put_user_size((x), __pu_addr, (size), __pu_err);	\
 	__pu_err;						\
@@ -188,7 +188,7 @@ do {								\
 ({									\
 	long __pu_err = -EFAULT;					\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	might_sleep();							\
+	might_fault();							\
 	if (access_ok(VERIFY_WRITE, __pu_addr, size))			\
 		__put_user_size((x), __pu_addr, (size), __pu_err);	\
 	__pu_err;							\
@@ -268,7 +268,7 @@ do {								\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
-		might_sleep();					\
+		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
@@ -282,7 +282,7 @@ do {								\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
-		might_sleep();					\
+		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
@@ -294,7 +294,7 @@ do {								\
 	long __gu_err = -EFAULT;					\
 	unsigned long  __gu_val = 0;					\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
-	might_sleep();							\
+	might_fault();							\
 	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;				\
@@ -419,14 +419,14 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
 static inline unsigned long __copy_from_user(void *to,
 		const void __user *from, unsigned long size)
 {
-	might_sleep();
+	might_fault();
 	return __copy_from_user_inatomic(to, from, size);
 }
 
 static inline unsigned long __copy_to_user(void __user *to,
 		const void *from, unsigned long size)
 {
-	might_sleep();
+	might_fault();
 	return __copy_to_user_inatomic(to, from, size);
 }
 
@@ -434,7 +434,7 @@ extern unsigned long __clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
 {
-	might_sleep();
+	might_fault();
 	if (likely(access_ok(VERIFY_WRITE, addr, size)))
 		return __clear_user(addr, size);
 	if ((unsigned long)addr < TASK_SIZE) {
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9ec3fe174cb..779a78c2643 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -69,16 +69,6 @@ void __init setup_kdump_trampoline(void)
 }
 #endif /* CONFIG_NONSTATIC_KERNEL */
 
-static int __init parse_savemaxmem(char *p)
-{
-	if (p)
-		saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
-
-	return 1;
-}
-__setup("savemaxmem=", parse_savemaxmem);
-
-
 static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
                                unsigned long offset, int userbuf)
 {
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 6782221d49b..db28032e320 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -750,13 +750,8 @@ EXPORT_SYMBOL_GPL(kvm_hypercall);
 
 static __init void kvm_free_tmp(void)
 {
-	unsigned long start, end;
-
-	start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
-	end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
-
-	/* Free the tmp space we don't need */
-	free_reserved_area(start, end, 0, NULL);
+	free_reserved_area(&kvm_tmp[kvm_tmp_index],
+			   &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
 }
 
 static int __init kvm_guest_init(void)
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 2a67e9baa59..6b0ba5854d9 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -128,7 +128,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	const char *type;
 	struct pci_slot *slot;
 
-	dev = alloc_pci_dev();
+	dev = pci_alloc_dev(bus);
 	if (!dev)
 		return NULL;
 	type = of_get_property(node, "device_type", NULL);
@@ -137,7 +137,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 
 	pr_debug("    create device, devfn: %x, type: %s\n", devfn, type);
 
-	dev->bus = bus;
 	dev->dev.of_node = of_node_get(node);
 	dev->dev.parent = bus->bridge;
 	dev->dev.bus = &pci_bus_type;
@@ -165,7 +164,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	pr_debug("    class: 0x%x\n", dev->class);
 	pr_debug("    revision: 0x%x\n", dev->revision);
 
-	dev->current_state = 4;		/* unknown power state */
+	dev->current_state = PCI_UNKNOWN;	/* unknown power state */
 	dev->error_state = pci_channel_io_normal;
 	dev->dma_mask = 0xffffffff;
 
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index feb8580fdc8..c30612aad68 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -29,25 +29,9 @@
 
 #ifdef CONFIG_PPC64
 
-static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+static loff_t page_map_seek(struct file *file, loff_t off, int whence)
 {
-	loff_t new;
-	switch(whence) {
-	case 0:
-		new = off;
-		break;
-	case 1:
-		new = file->f_pos + off;
-		break;
-	case 2:
-		new = PAGE_SIZE + off;
-		break;
-	default:
-		return -EINVAL;
-	}
-	if ( new < 0 || new > PAGE_SIZE )
-		return -EINVAL;
-	return (file->f_pos = new);
+	return fixed_size_llseek(file, off, whence, PAGE_SIZE);
 }
 
 static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46..008cd856c5b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
-						eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+		$(KVM)/eventfd.o
 
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-	../../../virt/kvm/coalesced_mmio.o \
+	$(KVM)/coalesced_mmio.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
 kvm-book3s_64-module-objs := \
-	../../../virt/kvm/kvm_main.o \
-	../../../virt/kvm/eventfd.o \
+	$(KVM)/kvm_main.o \
+	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
 	book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
 kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index b871721c005..739bfbadb85 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,6 +26,7 @@
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
 
 /* #define DEBUG_MMU */
 
@@ -76,6 +77,24 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
 	return NULL;
 }
 
+static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
+{
+	return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
+}
+
+static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
+{
+	return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
+}
+
+static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
+{
+	eaddr &= kvmppc_slb_offset_mask(slb);
+
+	return (eaddr >> VPN_SHIFT) |
+		((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
+}
+
 static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 					 bool data)
 {
@@ -85,11 +104,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
 	if (!slb)
 		return 0;
 
-	if (slb->tb)
-		return (((u64)eaddr >> 12) & 0xfffffff) |
-		       (((u64)slb->vsid) << 28);
-
-	return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
+	return kvmppc_slb_calc_vpn(slb, eaddr);
 }
 
 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
@@ -100,7 +115,8 @@ static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
 {
 	int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
-	return ((eaddr & 0xfffffff) >> p);
+
+	return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
 }
 
 static hva_t kvmppc_mmu_book3s_64_get_pteg(
@@ -109,13 +125,15 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
 				bool second)
 {
 	u64 hash, pteg, htabsize;
-	u32 page;
+	u32 ssize;
 	hva_t r;
+	u64 vpn;
 
-	page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
 	htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
 
-	hash = slbe->vsid ^ page;
+	vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
+	ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
+	hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
 	if (second)
 		hash = ~hash;
 	hash &= ((1ULL << 39ULL) - 1ULL);
@@ -146,7 +164,7 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
 	u64 avpn;
 
 	avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
-	avpn |= slbe->vsid << (28 - p);
+	avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
 
 	if (p < 24)
 		avpn >>= ((80 - p) - 56) - 8;
@@ -167,7 +185,6 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	int i;
 	u8 key = 0;
 	bool found = false;
-	bool perm_err = false;
 	int second = 0;
 	ulong mp_ea = vcpu->arch.magic_page_ea;
 
@@ -190,13 +207,15 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	if (!slbe)
 		goto no_seg_found;
 
+	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+	if (slbe->tb)
+		avpn |= SLB_VSID_B_1T;
+
 do_second:
 	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
 	if (kvm_is_error_hva(ptegp))
 		goto no_page_found;
 
-	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
-
 	if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
 		printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
 		goto no_page_found;
@@ -219,7 +238,7 @@ do_second:
 			continue;
 
 		/* AVPN compare */
-		if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
+		if (HPTE_V_COMPARE(avpn, v)) {
 			u8 pp = (r & HPTE_R_PP) | key;
 			int eaddr_mask = 0xFFF;
 
@@ -248,11 +267,6 @@ do_second:
 				break;
 			}
 
-			if (!gpte->may_read) {
-				perm_err = true;
-				continue;
-			}
-
 			dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
 				"-> 0x%lx\n",
 				eaddr, avpn, gpte->vpage, gpte->raddr);
@@ -281,6 +295,8 @@ do_second:
 		if (pteg[i+1] != oldr)
 			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
 
+		if (!gpte->may_read)
+			return -EPERM;
 		return 0;
 	} else {
 		dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
@@ -296,13 +312,7 @@ do_second:
 		}
 	}
 
-
 no_page_found:
-
-
-	if (perm_err)
-		return -EPERM;
-
 	return -ENOENT;
 
 no_seg_found:
@@ -334,7 +344,7 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 	slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
 	slbe->tb    = (rs & SLB_VSID_B_1T) ? 1 : 0;
 	slbe->esid  = slbe->tb ? esid_1t : esid;
-	slbe->vsid  = rs >> 12;
+	slbe->vsid  = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
 	slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
 	slbe->Ks    = (rs & SLB_VSID_KS) ? 1 : 0;
 	slbe->Kp    = (rs & SLB_VSID_KP) ? 1 : 0;
@@ -375,6 +385,7 @@ static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
 static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
 {
 	struct kvmppc_slb *slbe;
+	u64 seg_size;
 
 	dprintk("KVM MMU: slbie(0x%llx)\n", ea);
 
@@ -386,8 +397,11 @@ static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
 	dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
 
 	slbe->valid = false;
+	slbe->orige = 0;
+	slbe->origv = 0;
 
-	kvmppc_mmu_map_segment(vcpu, ea);
+	seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
+	kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
 }
 
 static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
@@ -396,8 +410,11 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
 
 	dprintk("KVM MMU: slbia()\n");
 
-	for (i = 1; i < vcpu->arch.slb_nr; i++)
+	for (i = 1; i < vcpu->arch.slb_nr; i++) {
 		vcpu->arch.slb[i].valid = false;
+		vcpu->arch.slb[i].orige = 0;
+		vcpu->arch.slb[i].origv = 0;
+	}
 
 	if (vcpu->arch.shared->msr & MSR_IR) {
 		kvmppc_mmu_flush_segments(vcpu);
@@ -467,8 +484,14 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 
 	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
-		if (slb)
+		if (slb) {
 			gvsid = slb->vsid;
+			if (slb->tb) {
+				gvsid <<= SID_SHIFT_1T - SID_SHIFT;
+				gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
+				gvsid |= VSID_1T;
+			}
+		}
 	}
 
 	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 3a9a1aceb14..b350d9494b2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -301,6 +301,23 @@ out:
 	return r;
 }
 
+void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	ulong seg_mask = -seg_size;
+	int i;
+
+	for (i = 1; i < svcpu->slb_max; i++) {
+		if ((svcpu->slb[i].esid & SLB_ESID_V) &&
+		    (svcpu->slb[i].esid & seg_mask) == ea) {
+			/* Invalidate this entry */
+			svcpu->slb[i].esid = 0;
+		}
+	}
+
+	svcpu_put(svcpu);
+}
+
 void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
@@ -325,9 +342,9 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 		return -1;
 	vcpu3s->context_id[0] = err;
 
-	vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
+	vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
 				  << ESID_BITS) - 1;
-	vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS;
+	vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
 	vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
 
 	kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 56b983e7b73..4f0caecc0f9 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -66,10 +66,6 @@ slb_exit_skip_ ## num:
 
 	ld	r12, PACA_SLBSHADOWPTR(r13)
 
-	/* Save off the first entry so we can slbie it later */
-	ld	r10, SHADOW_SLB_ESID(0)(r12)
-	ld	r11, SHADOW_SLB_VSID(0)(r12)
-
 	/* Remove bolted entries */
 	UNBOLT_SLB_ENTRY(0)
 	UNBOLT_SLB_ENTRY(1)
@@ -81,15 +77,10 @@ slb_exit_skip_ ## num:
 
 	/* Flush SLB */
 
+	li	r10, 0
+	slbmte	r10, r10
 	slbia
 
-	/* r0 = esid & ESID_MASK */
-	rldicr  r10, r10, 0, 35
-	/* r0 |= CLASS_BIT(VSID) */
-	rldic   r12, r11, 56 - 36, 36
-	or      r10, r10, r12
-	slbie	r10
-
 	/* Fill SLB with our shadow */
 
 	lbz	r12, SVCPU_SLB_MAX(r3)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b39..2efa9dde741 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1864,7 +1864,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
  up_out:
 	up_read(&current->mm->mmap_sem);
-	goto out;
+	goto out_srcu;
 }
 
 int kvmppc_core_init_vm(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bdc40b8e77d..19498a567a8 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1239,8 +1239,7 @@ out:
 #ifdef CONFIG_PPC64
 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 {
-	/* No flags */
-	info->flags = 0;
+	info->flags = KVM_PPC_1T_SEGMENTS;
 
 	/* SLB is always 64 entries */
 	info->slb_size = 64;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b5118977..dcc94f01600 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -796,7 +796,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 		kvmppc_fill_pt_regs(&regs);
 		timer_interrupt(&regs);
 		break;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+#if defined(CONFIG_PPC_DOORBELL)
 	case BOOKE_INTERRUPT_DOORBELL:
 		kvmppc_fill_pt_regs(&regs);
 		doorbell_exception(&regs);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 631a2650e4e..2c52ada3077 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -169,6 +169,9 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		vcpu->arch.shared->sprg3 = spr_val;
 		break;
 
+	/* PIR can legally be written, but we ignore it */
+	case SPRN_PIR: break;
+
 	default:
 		emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
 						     spr_val);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 77fdd2cef33..4210549ac95 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -357,7 +357,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
 int alloc_bootmem_huge_page(struct hstate *hstate)
 {
 	struct huge_bootmem_page *m;
-	int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
+	int idx = shift_to_mmu_psize(huge_page_shift(hstate));
 	int nr_gpages = gpage_freearray[idx].nr_gpages;
 
 	if (nr_gpages == 0)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0988a26e041..1cb1ea133a2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -299,47 +299,13 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-	int nid;
-#endif
-	pg_data_t *pgdat;
-	unsigned long i;
-	struct page *page;
-	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
-
 #ifdef CONFIG_SWIOTLB
 	swiotlb_init(0);
 #endif
 
-	num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-        for_each_online_node(nid) {
-		if (NODE_DATA(nid)->node_spanned_pages != 0) {
-			printk("freeing bootmem node %d\n", nid);
-			totalram_pages +=
-				free_all_bootmem_node(NODE_DATA(nid));
-		}
-	}
-#else
-	max_mapnr = max_pfn;
-	totalram_pages += free_all_bootmem();
-#endif
-	for_each_online_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			if (!pfn_valid(pgdat->node_start_pfn + i))
-				continue;
-			page = pgdat_page_nr(pgdat, i);
-			if (PageReserved(page))
-				reservedpages++;
-		}
-	}
-
-	codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
-	datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
-	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
-	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
+	set_max_mapnr(max_pfn);
+	free_all_bootmem();
 
 #ifdef CONFIG_HIGHMEM
 	{
@@ -349,13 +315,9 @@ void __init mem_init(void)
 		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
 			phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
 			struct page *page = pfn_to_page(pfn);
-			if (memblock_is_reserved(paddr))
-				continue;
-			free_highmem_page(page);
-			reservedpages--;
+			if (!memblock_is_reserved(paddr))
+				free_highmem_page(page);
 		}
-		printk(KERN_DEBUG "High memory: %luk\n",
-		       totalhigh_pages << (PAGE_SHIFT-10));
 	}
 #endif /* CONFIG_HIGHMEM */
 
@@ -368,16 +330,7 @@ void __init mem_init(void)
 		(mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
 #endif
 
-	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
-	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		bsssize >> 10,
-		initsize >> 10);
-
+	mem_init_print_info(NULL);
 #ifdef CONFIG_PPC32
 	pr_info("Kernel virtual memory layout:\n");
 	pr_info("  * 0x%08lx..0x%08lx  : fixmap\n", FIXADDR_START, FIXADDR_TOP);
@@ -407,7 +360,7 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 3c475d6267c..13c3f0e547a 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -62,6 +62,29 @@
 #define	PME_PM_BRU_FIN			0x10068
 #define	PME_PM_BRU_MPRED		0x400f6
 
+#define PME_PM_CMPLU_STALL_FXU			0x20014
+#define PME_PM_CMPLU_STALL_DIV			0x40014
+#define PME_PM_CMPLU_STALL_SCALAR		0x40012
+#define PME_PM_CMPLU_STALL_SCALAR_LONG		0x20018
+#define PME_PM_CMPLU_STALL_VECTOR		0x2001c
+#define PME_PM_CMPLU_STALL_VECTOR_LONG		0x4004a
+#define PME_PM_CMPLU_STALL_LSU			0x20012
+#define PME_PM_CMPLU_STALL_REJECT		0x40016
+#define PME_PM_CMPLU_STALL_ERAT_MISS		0x40018
+#define PME_PM_CMPLU_STALL_DCACHE_MISS		0x20016
+#define PME_PM_CMPLU_STALL_STORE		0x2004a
+#define PME_PM_CMPLU_STALL_THRD			0x1001c
+#define PME_PM_CMPLU_STALL_IFU			0x4004c
+#define PME_PM_CMPLU_STALL_BRU			0x4004e
+#define PME_PM_GCT_NOSLOT_IC_MISS		0x2001a
+#define PME_PM_GCT_NOSLOT_BR_MPRED		0x4001a
+#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS	0x4001c
+#define PME_PM_GRP_CMPL				0x30004
+#define PME_PM_1PLUS_PPC_CMPL			0x100f2
+#define PME_PM_CMPLU_STALL_DFU			0x2003c
+#define PME_PM_RUN_CYC				0x200f4
+#define PME_PM_RUN_INST_CMPL			0x400fa
+
 /*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
@@ -393,6 +416,31 @@ POWER_EVENT_ATTR(LD_MISS_L1,			LD_MISS_L1);
 POWER_EVENT_ATTR(BRU_FIN,			BRU_FIN)
 POWER_EVENT_ATTR(BRU_MPRED,			BRU_MPRED);
 
+POWER_EVENT_ATTR(CMPLU_STALL_FXU,		CMPLU_STALL_FXU);
+POWER_EVENT_ATTR(CMPLU_STALL_DIV,		CMPLU_STALL_DIV);
+POWER_EVENT_ATTR(CMPLU_STALL_SCALAR,		CMPLU_STALL_SCALAR);
+POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG,	CMPLU_STALL_SCALAR_LONG);
+POWER_EVENT_ATTR(CMPLU_STALL_VECTOR,		CMPLU_STALL_VECTOR);
+POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG,	CMPLU_STALL_VECTOR_LONG);
+POWER_EVENT_ATTR(CMPLU_STALL_LSU,		CMPLU_STALL_LSU);
+POWER_EVENT_ATTR(CMPLU_STALL_REJECT,		CMPLU_STALL_REJECT);
+
+POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS,		CMPLU_STALL_ERAT_MISS);
+POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS,	CMPLU_STALL_DCACHE_MISS);
+POWER_EVENT_ATTR(CMPLU_STALL_STORE,		CMPLU_STALL_STORE);
+POWER_EVENT_ATTR(CMPLU_STALL_THRD,		CMPLU_STALL_THRD);
+POWER_EVENT_ATTR(CMPLU_STALL_IFU,		CMPLU_STALL_IFU);
+POWER_EVENT_ATTR(CMPLU_STALL_BRU,		CMPLU_STALL_BRU);
+POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS,		GCT_NOSLOT_IC_MISS);
+
+POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED,		GCT_NOSLOT_BR_MPRED);
+POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS,	GCT_NOSLOT_BR_MPRED_IC_MISS);
+POWER_EVENT_ATTR(GRP_CMPL,			GRP_CMPL);
+POWER_EVENT_ATTR(1PLUS_PPC_CMPL,		1PLUS_PPC_CMPL);
+POWER_EVENT_ATTR(CMPLU_STALL_DFU,		CMPLU_STALL_DFU);
+POWER_EVENT_ATTR(RUN_CYC,			RUN_CYC);
+POWER_EVENT_ATTR(RUN_INST_CMPL,			RUN_INST_CMPL);
+
 static struct attribute *power7_events_attr[] = {
 	GENERIC_EVENT_PTR(CYC),
 	GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
@@ -411,6 +459,31 @@ static struct attribute *power7_events_attr[] = {
 	POWER_EVENT_PTR(LD_MISS_L1),
 	POWER_EVENT_PTR(BRU_FIN),
 	POWER_EVENT_PTR(BRU_MPRED),
+
+	POWER_EVENT_PTR(CMPLU_STALL_FXU),
+	POWER_EVENT_PTR(CMPLU_STALL_DIV),
+	POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
+	POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
+	POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
+	POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
+	POWER_EVENT_PTR(CMPLU_STALL_LSU),
+	POWER_EVENT_PTR(CMPLU_STALL_REJECT),
+
+	POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
+	POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
+	POWER_EVENT_PTR(CMPLU_STALL_STORE),
+	POWER_EVENT_PTR(CMPLU_STALL_THRD),
+	POWER_EVENT_PTR(CMPLU_STALL_IFU),
+	POWER_EVENT_PTR(CMPLU_STALL_BRU),
+	POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
+	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
+
+	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
+	POWER_EVENT_PTR(GRP_CMPL),
+	POWER_EVENT_PTR(1PLUS_PPC_CMPL),
+	POWER_EVENT_PTR(CMPLU_STALL_DFU),
+	POWER_EVENT_PTR(RUN_CYC),
+	POWER_EVENT_PTR(RUN_INST_CMPL),
 	NULL
 };
 
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index b62aab3e22e..e17cdfc5ba4 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -193,37 +193,6 @@ config PPC_IO_WORKAROUNDS
 
 source "drivers/cpufreq/Kconfig"
 
-menu "CPU Frequency drivers"
-	depends on CPU_FREQ
-
-config CPU_FREQ_PMAC
-	bool "Support for Apple PowerBooks"
-	depends on ADB_PMU && PPC32
-	select CPU_FREQ_TABLE
-	help
-	  This adds support for frequency switching on Apple PowerBooks,
-	  this currently includes some models of iBook & Titanium
-	  PowerBook.
-
-config CPU_FREQ_PMAC64
-	bool "Support for some Apple G5s"
-	depends on PPC_PMAC && PPC64
-	select CPU_FREQ_TABLE
-	help
-	  This adds support for frequency switching on Apple iMac G5,
-	  and some of the more recent desktop G5 machines as well.
-
-config PPC_PASEMI_CPUFREQ
-	bool "Support for PA Semi PWRficient"
-	depends on PPC_PASEMI
-	default y
-	select CPU_FREQ_TABLE
-	help
-	  This adds the support for frequency switching on PA Semi
-	  PWRficient processors.
-
-endmenu
-
 menu "CPUIdle driver"
 
 source "drivers/cpuidle/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 54f3936001a..7819c40a6bc 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -158,6 +158,7 @@ config E500
 config PPC_E500MC
 	bool "e500mc Support"
 	select PPC_FPU
+	select COMMON_CLK
 	depends on E500
 	help
 	  This must be enabled for running on e500mc (and derivatives
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
index ce6d789e074..8e8d4cae5eb 100644
--- a/arch/powerpc/platforms/pasemi/Makefile
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -1,3 +1,2 @@
 obj-y	+= setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
 obj-$(CONFIG_PPC_PASEMI_MDIO)	+= gpio_mdio.o
-obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
deleted file mode 100644
index be1e7958909..00000000000
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright (C) 2007 PA Semi, Inc
- *
- * Authors: Egor Martovetsky <egor@pasemi.com>
- *	    Olof Johansson <olof@lixom.net>
- *
- * Maintained by: Olof Johansson <olof@lixom.net>
- *
- * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c:
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/cpufreq.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/smp.h>
-
-#define SDCASR_REG		0x0100
-#define SDCASR_REG_STRIDE	0x1000
-#define SDCPWR_CFGA0_REG	0x0100
-#define SDCPWR_PWST0_REG	0x0000
-#define SDCPWR_GIZTIME_REG	0x0440
-
-/* SDCPWR_GIZTIME_REG fields */
-#define SDCPWR_GIZTIME_GR	0x80000000
-#define SDCPWR_GIZTIME_LONGLOCK	0x000000ff
-
-/* Offset of ASR registers from SDC base */
-#define SDCASR_OFFSET		0x120000
-
-static void __iomem *sdcpwr_mapbase;
-static void __iomem *sdcasr_mapbase;
-
-static DEFINE_MUTEX(pas_switch_mutex);
-
-/* Current astate, is used when waking up from power savings on
- * one core, in case the other core has switched states during
- * the idle time.
- */
-static int current_astate;
-
-/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */
-static struct cpufreq_frequency_table pas_freqs[] = {
-	{0,	0},
-	{1,	0},
-	{2,	0},
-	{3,	0},
-	{4,	0},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr *pas_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-/*
- * hardware specific functions
- */
-
-static int get_astate_freq(int astate)
-{
-	u32 ret;
-	ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10));
-
-	return ret & 0x3f;
-}
-
-static int get_cur_astate(int cpu)
-{
-	u32 ret;
-
-	ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG);
-	ret = (ret >> (cpu * 4)) & 0x7;
-
-	return ret;
-}
-
-static int get_gizmo_latency(void)
-{
-	u32 giztime, ret;
-
-	giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG);
-
-	/* just provide the upper bound */
-	if (giztime & SDCPWR_GIZTIME_GR)
-		ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000;
-	else
-		ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000;
-
-	return ret;
-}
-
-static void set_astate(int cpu, unsigned int astate)
-{
-	unsigned long flags;
-
-	/* Return if called before init has run */
-	if (unlikely(!sdcasr_mapbase))
-		return;
-
-	local_irq_save(flags);
-
-	out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate);
-
-	local_irq_restore(flags);
-}
-
-int check_astate(void)
-{
-	return get_cur_astate(hard_smp_processor_id());
-}
-
-void restore_astate(int cpu)
-{
-	set_astate(cpu, current_astate);
-}
-
-/*
- * cpufreq functions
- */
-
-static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	const u32 *max_freqp;
-	u32 max_freq;
-	int i, cur_astate;
-	struct resource res;
-	struct device_node *cpu, *dn;
-	int err = -ENODEV;
-
-	cpu = of_get_cpu_node(policy->cpu, NULL);
-
-	if (!cpu)
-		goto out;
-
-	dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
-	if (!dn)
-		dn = of_find_compatible_node(NULL, NULL,
-					     "pasemi,pwrficient-sdc");
-	if (!dn)
-		goto out;
-	err = of_address_to_resource(dn, 0, &res);
-	of_node_put(dn);
-	if (err)
-		goto out;
-	sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000);
-	if (!sdcasr_mapbase) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo");
-	if (!dn)
-		dn = of_find_compatible_node(NULL, NULL,
-					     "pasemi,pwrficient-gizmo");
-	if (!dn) {
-		err = -ENODEV;
-		goto out_unmap_sdcasr;
-	}
-	err = of_address_to_resource(dn, 0, &res);
-	of_node_put(dn);
-	if (err)
-		goto out_unmap_sdcasr;
-	sdcpwr_mapbase = ioremap(res.start, 0x1000);
-	if (!sdcpwr_mapbase) {
-		err = -EINVAL;
-		goto out_unmap_sdcasr;
-	}
-
-	pr_debug("init cpufreq on CPU %d\n", policy->cpu);
-
-	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
-	if (!max_freqp) {
-		err = -EINVAL;
-		goto out_unmap_sdcpwr;
-	}
-
-	/* we need the freq in kHz */
-	max_freq = *max_freqp / 1000;
-
-	pr_debug("max clock-frequency is at %u kHz\n", max_freq);
-	pr_debug("initializing frequency table\n");
-
-	/* initialize frequency table */
-	for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
-		pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000;
-		pr_debug("%d: %d\n", i, pas_freqs[i].frequency);
-	}
-
-	policy->cpuinfo.transition_latency = get_gizmo_latency();
-
-	cur_astate = get_cur_astate(policy->cpu);
-	pr_debug("current astate is at %d\n",cur_astate);
-
-	policy->cur = pas_freqs[cur_astate].frequency;
-	cpumask_copy(policy->cpus, cpu_online_mask);
-
-	ppc_proc_freq = policy->cur * 1000ul;
-
-	cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu);
-
-	/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max
-	 * are set correctly
-	 */
-	return cpufreq_frequency_table_cpuinfo(policy, pas_freqs);
-
-out_unmap_sdcpwr:
-	iounmap(sdcpwr_mapbase);
-
-out_unmap_sdcasr:
-	iounmap(sdcasr_mapbase);
-out:
-	return err;
-}
-
-static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	/*
-	 * We don't support CPU hotplug. Don't unmap after the system
-	 * has already made it to a running state.
-	 */
-	if (system_state != SYSTEM_BOOTING)
-		return 0;
-
-	if (sdcasr_mapbase)
-		iounmap(sdcasr_mapbase);
-	if (sdcpwr_mapbase)
-		iounmap(sdcpwr_mapbase);
-
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static int pas_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, pas_freqs);
-}
-
-static int pas_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
-{
-	struct cpufreq_freqs freqs;
-	int pas_astate_new;
-	int i;
-
-	cpufreq_frequency_table_target(policy,
-				       pas_freqs,
-				       target_freq,
-				       relation,
-				       &pas_astate_new);
-
-	freqs.old = policy->cur;
-	freqs.new = pas_freqs[pas_astate_new].frequency;
-
-	mutex_lock(&pas_switch_mutex);
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-
-	pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
-		 policy->cpu,
-		 pas_freqs[pas_astate_new].frequency,
-		 pas_freqs[pas_astate_new].index);
-
-	current_astate = pas_astate_new;
-
-	for_each_online_cpu(i)
-		set_astate(i, pas_astate_new);
-
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-	mutex_unlock(&pas_switch_mutex);
-
-	ppc_proc_freq = freqs.new * 1000ul;
-	return 0;
-}
-
-static struct cpufreq_driver pas_cpufreq_driver = {
-	.name		= "pas-cpufreq",
-	.owner		= THIS_MODULE,
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.init		= pas_cpufreq_cpu_init,
-	.exit		= pas_cpufreq_cpu_exit,
-	.verify		= pas_cpufreq_verify,
-	.target		= pas_cpufreq_target,
-	.attr		= pas_cpu_freqs_attr,
-};
-
-/*
- * module init and destoy
- */
-
-static int __init pas_cpufreq_init(void)
-{
-	if (!of_machine_is_compatible("PA6T-1682M") &&
-	    !of_machine_is_compatible("pasemi,pwrficient"))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&pas_cpufreq_driver);
-}
-
-static void __exit pas_cpufreq_exit(void)
-{
-	cpufreq_unregister_driver(&pas_cpufreq_driver);
-}
-
-module_init(pas_cpufreq_init);
-module_exit(pas_cpufreq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>");
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index ea47df66fee..52c6ce1cc98 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -9,8 +9,6 @@ obj-y				+= pic.o setup.o time.o feature.o pci.o \
 				   sleep.o low_i2c.o cache.o pfunc_core.o \
 				   pfunc_base.o udbg_scc.o udbg_adb.o
 obj-$(CONFIG_PMAC_BACKLIGHT)	+= backlight.o
-obj-$(CONFIG_CPU_FREQ_PMAC)	+= cpufreq_32.o
-obj-$(CONFIG_CPU_FREQ_PMAC64)	+= cpufreq_64.o
 # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
 # need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
 # CONFIG_NVRAM=y
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
deleted file mode 100644
index 3104fad8248..00000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ /dev/null
@@ -1,721 +0,0 @@
-/*
- *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *  Copyright (C) 2004        John Steele Scott <toojays@toojays.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * TODO: Need a big cleanup here. Basically, we need to have different
- * cpufreq_driver structures for the different type of HW instead of the
- * current mess. We also need to better deal with the detection of the
- * type of machine.
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#include <linux/cpufreq.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/hardirq.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/irq.h>
-#include <asm/pmac_feature.h>
-#include <asm/mmu_context.h>
-#include <asm/sections.h>
-#include <asm/cputable.h>
-#include <asm/time.h>
-#include <asm/mpic.h>
-#include <asm/keylargo.h>
-#include <asm/switch_to.h>
-
-/* WARNING !!! This will cause calibrate_delay() to be called,
- * but this is an __init function ! So you MUST go edit
- * init/main.c to make it non-init before enabling DEBUG_FREQ
- */
-#undef DEBUG_FREQ
-
-extern void low_choose_7447a_dfs(int dfs);
-extern void low_choose_750fx_pll(int pll);
-extern void low_sleep_handler(void);
-
-/*
- * Currently, PowerMac cpufreq supports only high & low frequencies
- * that are set by the firmware
- */
-static unsigned int low_freq;
-static unsigned int hi_freq;
-static unsigned int cur_freq;
-static unsigned int sleep_freq;
-static unsigned long transition_latency;
-
-/*
- * Different models uses different mechanisms to switch the frequency
- */
-static int (*set_speed_proc)(int low_speed);
-static unsigned int (*get_speed_proc)(void);
-
-/*
- * Some definitions used by the various speedprocs
- */
-static u32 voltage_gpio;
-static u32 frequency_gpio;
-static u32 slew_done_gpio;
-static int no_schedule;
-static int has_cpu_l2lve;
-static int is_pmu_based;
-
-/* There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-#define CPUFREQ_HIGH                  0
-#define CPUFREQ_LOW                   1
-
-static struct cpufreq_frequency_table pmac_cpu_freqs[] = {
-	{CPUFREQ_HIGH, 		0},
-	{CPUFREQ_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr* pmac_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static inline void local_delay(unsigned long ms)
-{
-	if (no_schedule)
-		mdelay(ms);
-	else
-		msleep(ms);
-}
-
-#ifdef DEBUG_FREQ
-static inline void debug_calc_bogomips(void)
-{
-	/* This will cause a recalc of bogomips and display the
-	 * result. We backup/restore the value to avoid affecting the
-	 * core cpufreq framework's own calculation.
-	 */
-	unsigned long save_lpj = loops_per_jiffy;
-	calibrate_delay();
-	loops_per_jiffy = save_lpj;
-}
-#endif /* DEBUG_FREQ */
-
-/* Switch CPU speed under 750FX CPU control
- */
-static int cpu_750fx_cpu_speed(int low_speed)
-{
-	u32 hid2;
-
-	if (low_speed == 0) {
-		/* ramping up, set voltage first */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Make sure we sleep for at least 1ms */
-		local_delay(10);
-
-		/* tweak L2 for high voltage */
-		if (has_cpu_l2lve) {
-			hid2 = mfspr(SPRN_HID2);
-			hid2 &= ~0x2000;
-			mtspr(SPRN_HID2, hid2);
-		}
-	}
-#ifdef CONFIG_6xx
-	low_choose_750fx_pll(low_speed);
-#endif
-	if (low_speed == 1) {
-		/* tweak L2 for low voltage */
-		if (has_cpu_l2lve) {
-			hid2 = mfspr(SPRN_HID2);
-			hid2 |= 0x2000;
-			mtspr(SPRN_HID2, hid2);
-		}
-
-		/* ramping down, set voltage last */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		local_delay(10);
-	}
-
-	return 0;
-}
-
-static unsigned int cpu_750fx_get_cpu_speed(void)
-{
-	if (mfspr(SPRN_HID1) & HID1_PS)
-		return low_freq;
-	else
-		return hi_freq;
-}
-
-/* Switch CPU speed using DFS */
-static int dfs_set_cpu_speed(int low_speed)
-{
-	if (low_speed == 0) {
-		/* ramping up, set voltage first */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Make sure we sleep for at least 1ms */
-		local_delay(1);
-	}
-
-	/* set frequency */
-#ifdef CONFIG_6xx
-	low_choose_7447a_dfs(low_speed);
-#endif
-	udelay(100);
-
-	if (low_speed == 1) {
-		/* ramping down, set voltage last */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		local_delay(1);
-	}
-
-	return 0;
-}
-
-static unsigned int dfs_get_cpu_speed(void)
-{
-	if (mfspr(SPRN_HID1) & HID1_DFS)
-		return low_freq;
-	else
-		return hi_freq;
-}
-
-
-/* Switch CPU speed using slewing GPIOs
- */
-static int gpios_set_cpu_speed(int low_speed)
-{
-	int gpio, timeout = 0;
-
-	/* If ramping up, set voltage first */
-	if (low_speed == 0) {
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Delay is way too big but it's ok, we schedule */
-		local_delay(10);
-	}
-
-	/* Set frequency */
-	gpio = 	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
-	if (low_speed == ((gpio & 0x01) == 0))
-		goto skip;
-
-	pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio,
-			  low_speed ? 0x04 : 0x05);
-	udelay(200);
-	do {
-		if (++timeout > 100)
-			break;
-		local_delay(1);
-		gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0);
-	} while((gpio & 0x02) == 0);
- skip:
-	/* If ramping down, set voltage last */
-	if (low_speed == 1) {
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		/* Delay is way too big but it's ok, we schedule */
-		local_delay(10);
-	}
-
-#ifdef DEBUG_FREQ
-	debug_calc_bogomips();
-#endif
-
-	return 0;
-}
-
-/* Switch CPU speed under PMU control
- */
-static int pmu_set_cpu_speed(int low_speed)
-{
-	struct adb_request req;
-	unsigned long save_l2cr;
-	unsigned long save_l3cr;
-	unsigned int pic_prio;
-	unsigned long flags;
-
-	preempt_disable();
-
-#ifdef DEBUG_FREQ
-	printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
-#endif
-	pmu_suspend();
-
-	/* Disable all interrupt sources on openpic */
- 	pic_prio = mpic_cpu_get_priority();
-	mpic_cpu_set_priority(0xf);
-
-	/* Make sure the decrementer won't interrupt us */
-	asm volatile("mtdec %0" : : "r" (0x7fffffff));
-	/* Make sure any pending DEC interrupt occurring while we did
-	 * the above didn't re-enable the DEC */
-	mb();
-	asm volatile("mtdec %0" : : "r" (0x7fffffff));
-
-	/* We can now disable MSR_EE */
-	local_irq_save(flags);
-
-	/* Giveup the FPU & vec */
-	enable_kernel_fp();
-
-#ifdef CONFIG_ALTIVEC
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		enable_kernel_altivec();
-#endif /* CONFIG_ALTIVEC */
-
-	/* Save & disable L2 and L3 caches */
-	save_l3cr = _get_L3CR();	/* (returns -1 if not available) */
-	save_l2cr = _get_L2CR();	/* (returns -1 if not available) */
-
-	/* Send the new speed command. My assumption is that this command
-	 * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep
-	 */
-	pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed);
-	while (!req.complete)
-		pmu_poll();
-
-	/* Prepare the northbridge for the speed transition */
-	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1);
-
-	/* Call low level code to backup CPU state and recover from
-	 * hardware reset
-	 */
-	low_sleep_handler();
-
-	/* Restore the northbridge */
-	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0);
-
-	/* Restore L2 cache */
-	if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
- 		_set_L2CR(save_l2cr);
-	/* Restore L3 cache */
-	if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
- 		_set_L3CR(save_l3cr);
-
-	/* Restore userland MMU context */
-	switch_mmu_context(NULL, current->active_mm);
-
-#ifdef DEBUG_FREQ
-	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
-#endif
-
-	/* Restore low level PMU operations */
-	pmu_unlock();
-
-	/*
-	 * Restore decrementer; we'll take a decrementer interrupt
-	 * as soon as interrupts are re-enabled and the generic
-	 * clockevents code will reprogram it with the right value.
-	 */
-	set_dec(1);
-
-	/* Restore interrupts */
- 	mpic_cpu_set_priority(pic_prio);
-
-	/* Let interrupts flow again ... */
-	local_irq_restore(flags);
-
-#ifdef DEBUG_FREQ
-	debug_calc_bogomips();
-#endif
-
-	pmu_resume();
-
-	preempt_enable();
-
-	return 0;
-}
-
-static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode,
-		int notify)
-{
-	struct cpufreq_freqs freqs;
-	unsigned long l3cr;
-	static unsigned long prev_l3cr;
-
-	freqs.old = cur_freq;
-	freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	if (notify)
-		cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	if (speed_mode == CPUFREQ_LOW &&
-	    cpu_has_feature(CPU_FTR_L3CR)) {
-		l3cr = _get_L3CR();
-		if (l3cr & L3CR_L3E) {
-			prev_l3cr = l3cr;
-			_set_L3CR(0);
-		}
-	}
-	set_speed_proc(speed_mode == CPUFREQ_LOW);
-	if (speed_mode == CPUFREQ_HIGH &&
-	    cpu_has_feature(CPU_FTR_L3CR)) {
-		l3cr = _get_L3CR();
-		if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr)
-			_set_L3CR(prev_l3cr);
-	}
-	if (notify)
-		cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-	cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
-
-	return 0;
-}
-
-static unsigned int pmac_cpufreq_get_speed(unsigned int cpu)
-{
-	return cur_freq;
-}
-
-static int pmac_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs);
-}
-
-static int pmac_cpufreq_target(	struct cpufreq_policy *policy,
-					unsigned int target_freq,
-					unsigned int relation)
-{
-	unsigned int    newstate = 0;
-	int		rc;
-
-	if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	rc = do_set_cpu_speed(policy, newstate, 1);
-
-	ppc_proc_freq = cur_freq * 1000ul;
-	return rc;
-}
-
-static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	policy->cpuinfo.transition_latency	= transition_latency;
-	policy->cur = cur_freq;
-
-	cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
-	return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs);
-}
-
-static u32 read_gpio(struct device_node *np)
-{
-	const u32 *reg = of_get_property(np, "reg", NULL);
-	u32 offset;
-
-	if (reg == NULL)
-		return 0;
-	/* That works for all keylargos but shall be fixed properly
-	 * some day... The problem is that it seems we can't rely
-	 * on the "reg" property of the GPIO nodes, they are either
-	 * relative to the base of KeyLargo or to the base of the
-	 * GPIO space, and the device-tree doesn't help.
-	 */
-	offset = *reg;
-	if (offset < KEYLARGO_GPIO_LEVELS0)
-		offset += KEYLARGO_GPIO_LEVELS0;
-	return offset;
-}
-
-static int pmac_cpufreq_suspend(struct cpufreq_policy *policy)
-{
-	/* Ok, this could be made a bit smarter, but let's be robust for now. We
-	 * always force a speed change to high speed before sleep, to make sure
-	 * we have appropriate voltage and/or bus speed for the wakeup process,
-	 * and to make sure our loops_per_jiffies are "good enough", that is will
-	 * not cause too short delays if we sleep in low speed and wake in high
-	 * speed..
-	 */
-	no_schedule = 1;
-	sleep_freq = cur_freq;
-	if (cur_freq == low_freq && !is_pmu_based)
-		do_set_cpu_speed(policy, CPUFREQ_HIGH, 0);
-	return 0;
-}
-
-static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
-{
-	/* If we resume, first check if we have a get() function */
-	if (get_speed_proc)
-		cur_freq = get_speed_proc();
-	else
-		cur_freq = 0;
-
-	/* We don't, hrm... we don't really know our speed here, best
-	 * is that we force a switch to whatever it was, which is
-	 * probably high speed due to our suspend() routine
-	 */
-	do_set_cpu_speed(policy, sleep_freq == low_freq ?
-			 CPUFREQ_LOW : CPUFREQ_HIGH, 0);
-
-	ppc_proc_freq = cur_freq * 1000ul;
-
-	no_schedule = 0;
-	return 0;
-}
-
-static struct cpufreq_driver pmac_cpufreq_driver = {
-	.verify 	= pmac_cpufreq_verify,
-	.target 	= pmac_cpufreq_target,
-	.get		= pmac_cpufreq_get_speed,
-	.init		= pmac_cpufreq_cpu_init,
-	.suspend	= pmac_cpufreq_suspend,
-	.resume		= pmac_cpufreq_resume,
-	.flags		= CPUFREQ_PM_NO_WARN,
-	.attr		= pmac_cpu_freqs_attr,
-	.name		= "powermac",
-	.owner		= THIS_MODULE,
-};
-
-
-static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np = of_find_node_by_name(NULL,
-								"voltage-gpio");
-	struct device_node *freq_gpio_np = of_find_node_by_name(NULL,
-								"frequency-gpio");
-	struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
-								     "slewing-done");
-	const u32 *value;
-
-	/*
-	 * Check to see if it's GPIO driven or PMU only
-	 *
-	 * The way we extract the GPIO address is slightly hackish, but it
-	 * works well enough for now. We need to abstract the whole GPIO
-	 * stuff sooner or later anyway
-	 */
-
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-	if (freq_gpio_np)
-		frequency_gpio = read_gpio(freq_gpio_np);
-	if (slew_done_gpio_np)
-		slew_done_gpio = read_gpio(slew_done_gpio_np);
-
-	/* If we use the frequency GPIOs, calculate the min/max speeds based
-	 * on the bus frequencies
-	 */
-	if (frequency_gpio && slew_done_gpio) {
-		int lenp, rc;
-		const u32 *freqs, *ratio;
-
-		freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
-		lenp /= sizeof(u32);
-		if (freqs == NULL || lenp != 2) {
-			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
-			return 1;
-		}
-		ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
-						NULL);
-		if (ratio == NULL) {
-			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
-			return 1;
-		}
-
-		/* Get the min/max bus frequencies */
-		low_freq = min(freqs[0], freqs[1]);
-		hi_freq = max(freqs[0], freqs[1]);
-
-		/* Grrrr.. It _seems_ that the device-tree is lying on the low bus
-		 * frequency, it claims it to be around 84Mhz on some models while
-		 * it appears to be approx. 101Mhz on all. Let's hack around here...
-		 * fortunately, we don't need to be too precise
-		 */
-		if (low_freq < 98000000)
-			low_freq = 101000000;
-
-		/* Convert those to CPU core clocks */
-		low_freq = (low_freq * (*ratio)) / 2000;
-		hi_freq = (hi_freq * (*ratio)) / 2000;
-
-		/* Now we get the frequencies, we read the GPIO to see what is out current
-		 * speed
-		 */
-		rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
-		cur_freq = (rc & 0x01) ? hi_freq : low_freq;
-
-		set_speed_proc = gpios_set_cpu_speed;
-		return 1;
-	}
-
-	/* If we use the PMU, look for the min & max frequencies in the
-	 * device-tree
-	 */
-	value = of_get_property(cpunode, "min-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	low_freq = (*value) / 1000;
-	/* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree
-	 * here */
-	if (low_freq < 100000)
-		low_freq *= 10;
-
-	value = of_get_property(cpunode, "max-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	hi_freq = (*value) / 1000;
-	set_speed_proc = pmu_set_cpu_speed;
-	is_pmu_based = 1;
-
-	return 0;
-}
-
-static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np;
-
-	if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
-		return 1;
-
-	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-	if (!voltage_gpio){
-		printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
-		return 1;
-	}
-
-	/* OF only reports the high frequency */
-	hi_freq = cur_freq;
-	low_freq = cur_freq/2;
-
-	/* Read actual frequency from CPU */
-	cur_freq = dfs_get_cpu_speed();
-	set_speed_proc = dfs_set_cpu_speed;
-	get_speed_proc = dfs_get_cpu_speed;
-
-	return 0;
-}
-
-static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np;
-	u32 pvr;
-	const u32 *value;
-
-	if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
-		return 1;
-
-	hi_freq = cur_freq;
-	value = of_get_property(cpunode, "reduced-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	low_freq = (*value) / 1000;
-
-	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-
-	pvr = mfspr(SPRN_PVR);
-	has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
-
-	set_speed_proc = cpu_750fx_cpu_speed;
-	get_speed_proc = cpu_750fx_get_cpu_speed;
-	cur_freq = cpu_750fx_get_cpu_speed();
-
-	return 0;
-}
-
-/* Currently, we support the following machines:
- *
- *  - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz)
- *  - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz)
- *  - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz)
- *  - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz)
- *  - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz)
- *  - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage)
- *  - Recent MacRISC3 laptops
- *  - All new machines with 7447A CPUs
- */
-static int __init pmac_cpufreq_setup(void)
-{
-	struct device_node	*cpunode;
-	const u32		*value;
-
-	if (strstr(cmd_line, "nocpufreq"))
-		return 0;
-
-	/* Assume only one CPU */
-	cpunode = of_find_node_by_type(NULL, "cpu");
-	if (!cpunode)
-		goto out;
-
-	/* Get current cpu clock freq */
-	value = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!value)
-		goto out;
-	cur_freq = (*value) / 1000;
-	transition_latency = CPUFREQ_ETERNAL;
-
-	/*  Check for 7447A based MacRISC3 */
-	if (of_machine_is_compatible("MacRISC3") &&
-	    of_get_property(cpunode, "dynamic-power-step", NULL) &&
-	    PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
-		pmac_cpufreq_init_7447A(cpunode);
-		transition_latency = 8000000;
-	/* Check for other MacRISC3 machines */
-	} else if (of_machine_is_compatible("PowerBook3,4") ||
-		   of_machine_is_compatible("PowerBook3,5") ||
-		   of_machine_is_compatible("MacRISC3")) {
-		pmac_cpufreq_init_MacRISC3(cpunode);
-	/* Else check for iBook2 500/600 */
-	} else if (of_machine_is_compatible("PowerBook4,1")) {
-		hi_freq = cur_freq;
-		low_freq = 400000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for TiPb 550 */
-	else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) {
-		hi_freq = cur_freq;
-		low_freq = 500000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for TiPb 400 & 500 */
-	else if (of_machine_is_compatible("PowerBook3,2")) {
-		/* We only know about the 400 MHz and the 500Mhz model
-		 * they both have 300 MHz as low frequency
-		 */
-		if (cur_freq < 350000 || cur_freq > 550000)
-			goto out;
-		hi_freq = cur_freq;
-		low_freq = 300000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for 750FX */
-	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000)
-		pmac_cpufreq_init_750FX(cpunode);
-out:
-	of_node_put(cpunode);
-	if (set_speed_proc == NULL)
-		return -ENODEV;
-
-	pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
-	pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
-	ppc_proc_freq = cur_freq * 1000ul;
-
-	printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
-	       low_freq/1000, hi_freq/1000, cur_freq/1000);
-
-	return cpufreq_register_driver(&pmac_cpufreq_driver);
-}
-
-module_init(pmac_cpufreq_setup);
-
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
deleted file mode 100644
index 7ba423431cf..00000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *  and                       Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
- * that is iMac G5 and latest single CPU desktop.
- */
-
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/init.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/irq.h>
-#include <asm/sections.h>
-#include <asm/cputable.h>
-#include <asm/time.h>
-#include <asm/smu.h>
-#include <asm/pmac_pfunc.h>
-
-#define DBG(fmt...) pr_debug(fmt)
-
-/* see 970FX user manual */
-
-#define SCOM_PCR 0x0aa001			/* PCR scom addr */
-
-#define PCR_HILO_SELECT		0x80000000U	/* 1 = PCR, 0 = PCRH */
-#define PCR_SPEED_FULL		0x00000000U	/* 1:1 speed value */
-#define PCR_SPEED_HALF		0x00020000U	/* 1:2 speed value */
-#define PCR_SPEED_QUARTER	0x00040000U	/* 1:4 speed value */
-#define PCR_SPEED_MASK		0x000e0000U	/* speed mask */
-#define PCR_SPEED_SHIFT		17
-#define PCR_FREQ_REQ_VALID	0x00010000U	/* freq request valid */
-#define PCR_VOLT_REQ_VALID	0x00008000U	/* volt request valid */
-#define PCR_TARGET_TIME_MASK	0x00006000U	/* target time */
-#define PCR_STATLAT_MASK	0x00001f00U	/* STATLAT value */
-#define PCR_SNOOPLAT_MASK	0x000000f0U	/* SNOOPLAT value */
-#define PCR_SNOOPACC_MASK	0x0000000fU	/* SNOOPACC value */
-
-#define SCOM_PSR 0x408001			/* PSR scom addr */
-/* warning: PSR is a 64 bits register */
-#define PSR_CMD_RECEIVED	0x2000000000000000U   /* command received */
-#define PSR_CMD_COMPLETED	0x1000000000000000U   /* command completed */
-#define PSR_CUR_SPEED_MASK	0x0300000000000000U   /* current speed */
-#define PSR_CUR_SPEED_SHIFT	(56)
-
-/*
- * The G5 only supports two frequencies (Quarter speed is not supported)
- */
-#define CPUFREQ_HIGH                  0
-#define CPUFREQ_LOW                   1
-
-static struct cpufreq_frequency_table g5_cpu_freqs[] = {
-	{CPUFREQ_HIGH, 		0},
-	{CPUFREQ_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr* g5_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-/* Power mode data is an array of the 32 bits PCR values to use for
- * the various frequencies, retrieved from the device-tree
- */
-static int g5_pmode_cur;
-
-static void (*g5_switch_volt)(int speed_mode);
-static int (*g5_switch_freq)(int speed_mode);
-static int (*g5_query_freq)(void);
-
-static DEFINE_MUTEX(g5_switch_mutex);
-
-static unsigned long transition_latency;
-
-#ifdef CONFIG_PMAC_SMU
-
-static const u32 *g5_pmode_data;
-static int g5_pmode_max;
-
-static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
-static int g5_fvt_count;			/* number of op. points */
-static int g5_fvt_cur;				/* current op. point */
-
-/*
- * SMU based voltage switching for Neo2 platforms
- */
-
-static void g5_smu_switch_volt(int speed_mode)
-{
-	struct smu_simple_cmd	cmd;
-
-	DECLARE_COMPLETION_ONSTACK(comp);
-	smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete,
-			 &comp, 'V', 'S', 'L', 'E', 'W',
-			 0xff, g5_fvt_cur+1, speed_mode);
-	wait_for_completion(&comp);
-}
-
-/*
- * Platform function based voltage/vdnap switching for Neo2
- */
-
-static struct pmf_function *pfunc_set_vdnap0;
-static struct pmf_function *pfunc_vdnap0_complete;
-
-static void g5_vdnap_switch_volt(int speed_mode)
-{
-	struct pmf_args args;
-	u32 slew, done = 0;
-	unsigned long timeout;
-
-	slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0;
-	args.count = 1;
-	args.u[0].p = &slew;
-
-	pmf_call_one(pfunc_set_vdnap0, &args);
-
-	/* It's an irq GPIO so we should be able to just block here,
-	 * I'll do that later after I've properly tested the IRQ code for
-	 * platform functions
-	 */
-	timeout = jiffies + HZ/10;
-	while(!time_after(jiffies, timeout)) {
-		args.count = 1;
-		args.u[0].p = &done;
-		pmf_call_one(pfunc_vdnap0_complete, &args);
-		if (done)
-			break;
-		msleep(1);
-	}
-	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
-}
-
-
-/*
- * SCOM based frequency switching for 970FX rev3
- */
-static int g5_scom_switch_freq(int speed_mode)
-{
-	unsigned long flags;
-	int to;
-
-	/* If frequency is going up, first ramp up the voltage */
-	if (speed_mode < g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	local_irq_save(flags);
-
-	/* Clear PCR high */
-	scom970_write(SCOM_PCR, 0);
-	/* Clear PCR low */
-       	scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
-	/* Set PCR low */
-	scom970_write(SCOM_PCR, PCR_HILO_SELECT |
-		      g5_pmode_data[speed_mode]);
-
-	/* Wait for completion */
-	for (to = 0; to < 10; to++) {
-		unsigned long psr = scom970_read(SCOM_PSR);
-
-		if ((psr & PSR_CMD_RECEIVED) == 0 &&
-		    (((psr >> PSR_CUR_SPEED_SHIFT) ^
-		      (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
-		    == 0)
-			break;
-		if (psr & PSR_CMD_COMPLETED)
-			break;
-		udelay(100);
-	}
-
-	local_irq_restore(flags);
-
-	/* If frequency is going down, last ramp the voltage */
-	if (speed_mode > g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	g5_pmode_cur = speed_mode;
-	ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
-
-	return 0;
-}
-
-static int g5_scom_query_freq(void)
-{
-	unsigned long psr = scom970_read(SCOM_PSR);
-	int i;
-
-	for (i = 0; i <= g5_pmode_max; i++)
-		if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
-		      (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
-			break;
-	return i;
-}
-
-/*
- * Fake voltage switching for platforms with missing support
- */
-
-static void g5_dummy_switch_volt(int speed_mode)
-{
-}
-
-#endif /* CONFIG_PMAC_SMU */
-
-/*
- * Platform function based voltage switching for PowerMac7,2 & 7,3
- */
-
-static struct pmf_function *pfunc_cpu0_volt_high;
-static struct pmf_function *pfunc_cpu0_volt_low;
-static struct pmf_function *pfunc_cpu1_volt_high;
-static struct pmf_function *pfunc_cpu1_volt_low;
-
-static void g5_pfunc_switch_volt(int speed_mode)
-{
-	if (speed_mode == CPUFREQ_HIGH) {
-		if (pfunc_cpu0_volt_high)
-			pmf_call_one(pfunc_cpu0_volt_high, NULL);
-		if (pfunc_cpu1_volt_high)
-			pmf_call_one(pfunc_cpu1_volt_high, NULL);
-	} else {
-		if (pfunc_cpu0_volt_low)
-			pmf_call_one(pfunc_cpu0_volt_low, NULL);
-		if (pfunc_cpu1_volt_low)
-			pmf_call_one(pfunc_cpu1_volt_low, NULL);
-	}
-	msleep(10); /* should be faster , to fix */
-}
-
-/*
- * Platform function based frequency switching for PowerMac7,2 & 7,3
- */
-
-static struct pmf_function *pfunc_cpu_setfreq_high;
-static struct pmf_function *pfunc_cpu_setfreq_low;
-static struct pmf_function *pfunc_cpu_getfreq;
-static struct pmf_function *pfunc_slewing_done;
-
-static int g5_pfunc_switch_freq(int speed_mode)
-{
-	struct pmf_args args;
-	u32 done = 0;
-	unsigned long timeout;
-	int rc;
-
-	DBG("g5_pfunc_switch_freq(%d)\n", speed_mode);
-
-	/* If frequency is going up, first ramp up the voltage */
-	if (speed_mode < g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	/* Do it */
-	if (speed_mode == CPUFREQ_HIGH)
-		rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL);
-	else
-		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
-
-	if (rc)
-		printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
-
-	/* It's an irq GPIO so we should be able to just block here,
-	 * I'll do that later after I've properly tested the IRQ code for
-	 * platform functions
-	 */
-	timeout = jiffies + HZ/10;
-	while(!time_after(jiffies, timeout)) {
-		args.count = 1;
-		args.u[0].p = &done;
-		pmf_call_one(pfunc_slewing_done, &args);
-		if (done)
-			break;
-		msleep(1);
-	}
-	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
-
-	/* If frequency is going down, last ramp the voltage */
-	if (speed_mode > g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	g5_pmode_cur = speed_mode;
-	ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
-
-	return 0;
-}
-
-static int g5_pfunc_query_freq(void)
-{
-	struct pmf_args args;
-	u32 val = 0;
-
-	args.count = 1;
-	args.u[0].p = &val;
-	pmf_call_one(pfunc_cpu_getfreq, &args);
-	return val ? CPUFREQ_HIGH : CPUFREQ_LOW;
-}
-
-
-/*
- * Common interface to the cpufreq core
- */
-
-static int g5_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, g5_cpu_freqs);
-}
-
-static int g5_cpufreq_target(struct cpufreq_policy *policy,
-	unsigned int target_freq, unsigned int relation)
-{
-	unsigned int newstate = 0;
-	struct cpufreq_freqs freqs;
-	int rc;
-
-	if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	if (g5_pmode_cur == newstate)
-		return 0;
-
-	mutex_lock(&g5_switch_mutex);
-
-	freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
-	freqs.new = g5_cpu_freqs[newstate].frequency;
-
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	rc = g5_switch_freq(newstate);
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-
-	mutex_unlock(&g5_switch_mutex);
-
-	return rc;
-}
-
-static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
-{
-	return g5_cpu_freqs[g5_pmode_cur].frequency;
-}
-
-static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	policy->cpuinfo.transition_latency = transition_latency;
-	policy->cur = g5_cpu_freqs[g5_query_freq()].frequency;
-	/* secondary CPUs are tied to the primary one by the
-	 * cpufreq core if in the secondary policy we tell it that
-	 * it actually must be one policy together with all others. */
-	cpumask_copy(policy->cpus, cpu_online_mask);
-	cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
-
-	return cpufreq_frequency_table_cpuinfo(policy,
-		g5_cpu_freqs);
-}
-
-
-static struct cpufreq_driver g5_cpufreq_driver = {
-	.name		= "powermac",
-	.owner		= THIS_MODULE,
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.init		= g5_cpufreq_cpu_init,
-	.verify		= g5_cpufreq_verify,
-	.target		= g5_cpufreq_target,
-	.get		= g5_cpufreq_get_speed,
-	.attr 		= g5_cpu_freqs_attr,
-};
-
-
-#ifdef CONFIG_PMAC_SMU
-
-static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
-{
-	struct device_node *cpunode;
-	unsigned int psize, ssize;
-	unsigned long max_freq;
-	char *freq_method, *volt_method;
-	const u32 *valp;
-	u32 pvr_hi;
-	int use_volts_vdnap = 0;
-	int use_volts_smu = 0;
-	int rc = -ENODEV;
-
-	/* Check supported platforms */
-	if (of_machine_is_compatible("PowerMac8,1") ||
-	    of_machine_is_compatible("PowerMac8,2") ||
-	    of_machine_is_compatible("PowerMac9,1"))
-		use_volts_smu = 1;
-	else if (of_machine_is_compatible("PowerMac11,2"))
-		use_volts_vdnap = 1;
-	else
-		return -ENODEV;
-
-	/* Get first CPU node */
-	for (cpunode = NULL;
-	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		const u32 *reg = of_get_property(cpunode, "reg", NULL);
-		if (reg == NULL || (*reg) != 0)
-			continue;
-		if (!strcmp(cpunode->type, "cpu"))
-			break;
-	}
-	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
-		return -ENODEV;
-	}
-
-	/* Check 970FX for now */
-	valp = of_get_property(cpunode, "cpu-version", NULL);
-	if (!valp) {
-		DBG("No cpu-version property !\n");
-		goto bail_noprops;
-	}
-	pvr_hi = (*valp) >> 16;
-	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
-		goto bail_noprops;
-	}
-
-	/* Look for the powertune data in the device-tree */
-	g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize);
-	if (!g5_pmode_data) {
-		DBG("No power-mode-data !\n");
-		goto bail_noprops;
-	}
-	g5_pmode_max = psize / sizeof(u32) - 1;
-
-	if (use_volts_smu) {
-		const struct smu_sdbp_header *shdr;
-
-		/* Look for the FVT table */
-		shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
-		if (!shdr)
-			goto bail_noprops;
-		g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1];
-		ssize = (shdr->len * sizeof(u32)) -
-			sizeof(struct smu_sdbp_header);
-		g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt);
-		g5_fvt_cur = 0;
-
-		/* Sanity checking */
-		if (g5_fvt_count < 1 || g5_pmode_max < 1)
-			goto bail_noprops;
-
-		g5_switch_volt = g5_smu_switch_volt;
-		volt_method = "SMU";
-	} else if (use_volts_vdnap) {
-		struct device_node *root;
-
-		root = of_find_node_by_path("/");
-		if (root == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find root of "
-			       "device tree\n");
-			goto bail_noprops;
-		}
-		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
-		pfunc_vdnap0_complete =
-			pmf_find_function(root, "slewing-done");
-		if (pfunc_set_vdnap0 == NULL ||
-		    pfunc_vdnap0_complete == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find required "
-			       "platform function\n");
-			goto bail_noprops;
-		}
-
-		g5_switch_volt = g5_vdnap_switch_volt;
-		volt_method = "GPIO";
-	} else {
-		g5_switch_volt = g5_dummy_switch_volt;
-		volt_method = "none";
-	}
-
-	/*
-	 * From what I see, clock-frequency is always the maximal frequency.
-	 * The current driver can not slew sysclk yet, so we really only deal
-	 * with powertune steps for now. We also only implement full freq and
-	 * half freq in this version. So far, I haven't yet seen a machine
-	 * supporting anything else.
-	 */
-	valp = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!valp)
-		return -ENODEV;
-	max_freq = (*valp)/1000;
-	g5_cpu_freqs[0].frequency = max_freq;
-	g5_cpu_freqs[1].frequency = max_freq/2;
-
-	/* Set callbacks */
-	transition_latency = 12000;
-	g5_switch_freq = g5_scom_switch_freq;
-	g5_query_freq = g5_scom_query_freq;
-	freq_method = "SCOM";
-
-	/* Force apply current frequency to make sure everything is in
-	 * sync (voltage is right for example). Firmware may leave us with
-	 * a strange setting ...
-	 */
-	g5_switch_volt(CPUFREQ_HIGH);
-	msleep(10);
-	g5_pmode_cur = -1;
-	g5_switch_freq(g5_query_freq());
-
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
-	       freq_method, volt_method);
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
-		g5_cpu_freqs[1].frequency/1000,
-		g5_cpu_freqs[0].frequency/1000,
-		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
-
-	rc = cpufreq_register_driver(&g5_cpufreq_driver);
-
-	/* We keep the CPU node on hold... hopefully, Apple G5 don't have
-	 * hotplug CPU with a dynamic device-tree ...
-	 */
-	return rc;
-
- bail_noprops:
-	of_node_put(cpunode);
-
-	return rc;
-}
-
-#endif /* CONFIG_PMAC_SMU */
-
-
-static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
-{
-	struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
-	const u8 *eeprom = NULL;
-	const u32 *valp;
-	u64 max_freq, min_freq, ih, il;
-	int has_volt = 1, rc = 0;
-
-	DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and"
-	    " RackMac3,1...\n");
-
-	/* Get first CPU node */
-	for (cpunode = NULL;
-	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		if (!strcmp(cpunode->type, "cpu"))
-			break;
-	}
-	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU node\n");
-		return -ENODEV;
-	}
-
-	/* Lookup the cpuid eeprom node */
-        cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0");
-	if (cpuid != NULL)
-		eeprom = of_get_property(cpuid, "cpuid", NULL);
-	if (eeprom == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	/* Lookup the i2c hwclock */
-	for (hwclock = NULL;
-	     (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
-		const char *loc = of_get_property(hwclock,
-				"hwctrl-location", NULL);
-		if (loc == NULL)
-			continue;
-		if (strcmp(loc, "CPU CLOCK"))
-			continue;
-		if (!of_get_property(hwclock, "platform-get-frequency", NULL))
-			continue;
-		break;
-	}
-	if (hwclock == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name);
-
-	/* Now get all the platform functions */
-	pfunc_cpu_getfreq =
-		pmf_find_function(hwclock, "get-frequency");
-	pfunc_cpu_setfreq_high =
-		pmf_find_function(hwclock, "set-frequency-high");
-	pfunc_cpu_setfreq_low =
-		pmf_find_function(hwclock, "set-frequency-low");
-	pfunc_slewing_done =
-		pmf_find_function(hwclock, "slewing-done");
-	pfunc_cpu0_volt_high =
-		pmf_find_function(hwclock, "set-voltage-high-0");
-	pfunc_cpu0_volt_low =
-		pmf_find_function(hwclock, "set-voltage-low-0");
-	pfunc_cpu1_volt_high =
-		pmf_find_function(hwclock, "set-voltage-high-1");
-	pfunc_cpu1_volt_low =
-		pmf_find_function(hwclock, "set-voltage-low-1");
-
-	/* Check we have minimum requirements */
-	if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
-	    pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	/* Check that we have complete sets */
-	if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) {
-		pmf_put_function(pfunc_cpu0_volt_high);
-		pmf_put_function(pfunc_cpu0_volt_low);
-		pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL;
-		has_volt = 0;
-	}
-	if (!has_volt ||
-	    pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) {
-		pmf_put_function(pfunc_cpu1_volt_high);
-		pmf_put_function(pfunc_cpu1_volt_low);
-		pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL;
-	}
-
-	/* Note: The device tree also contains a "platform-set-values"
-	 * function for which I haven't quite figured out the usage. It
-	 * might have to be called on init and/or wakeup, I'm not too sure
-	 * but things seem to work fine without it so far ...
-	 */
-
-	/* Get max frequency from device-tree */
-	valp = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!valp) {
-		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	max_freq = (*valp)/1000;
-
-	/* Now calculate reduced frequency by using the cpuid input freq
-	 * ratio. This requires 64 bits math unless we are willing to lose
-	 * some precision
-	 */
-	ih = *((u32 *)(eeprom + 0x10));
-	il = *((u32 *)(eeprom + 0x20));
-
-	/* Check for machines with no useful settings */
-	if (il == ih) {
-		printk(KERN_WARNING "cpufreq: No low frequency mode available"
-		       " on this model !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	min_freq = 0;
-	if (ih != 0 && il != 0)
-		min_freq = (max_freq * il) / ih;
-
-	/* Sanity check */
-	if (min_freq >= max_freq || min_freq < 1000) {
-		printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
-		rc = -ENXIO;
-		goto bail;
-	}
-	g5_cpu_freqs[0].frequency = max_freq;
-	g5_cpu_freqs[1].frequency = min_freq;
-
-	/* Set callbacks */
-	transition_latency = CPUFREQ_ETERNAL;
-	g5_switch_volt = g5_pfunc_switch_volt;
-	g5_switch_freq = g5_pfunc_switch_freq;
-	g5_query_freq = g5_pfunc_query_freq;
-
-	/* Force apply current frequency to make sure everything is in
-	 * sync (voltage is right for example). Firmware may leave us with
-	 * a strange setting ...
-	 */
-	g5_switch_volt(CPUFREQ_HIGH);
-	msleep(10);
-	g5_pmode_cur = -1;
-	g5_switch_freq(g5_query_freq());
-
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: i2c/pfunc, "
-	       "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
-		g5_cpu_freqs[1].frequency/1000,
-		g5_cpu_freqs[0].frequency/1000,
-		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
-
-	rc = cpufreq_register_driver(&g5_cpufreq_driver);
- bail:
-	if (rc != 0) {
-		pmf_put_function(pfunc_cpu_getfreq);
-		pmf_put_function(pfunc_cpu_setfreq_high);
-		pmf_put_function(pfunc_cpu_setfreq_low);
-		pmf_put_function(pfunc_slewing_done);
-		pmf_put_function(pfunc_cpu0_volt_high);
-		pmf_put_function(pfunc_cpu0_volt_low);
-		pmf_put_function(pfunc_cpu1_volt_high);
-		pmf_put_function(pfunc_cpu1_volt_low);
-	}
-	of_node_put(hwclock);
-	of_node_put(cpuid);
-	of_node_put(cpunode);
-
-	return rc;
-}
-
-static int __init g5_cpufreq_init(void)
-{
-	struct device_node *cpus;
-	int rc = 0;
-
-	cpus = of_find_node_by_path("/cpus");
-	if (cpus == NULL) {
-		DBG("No /cpus node !\n");
-		return -ENODEV;
-	}
-
-	if (of_machine_is_compatible("PowerMac7,2") ||
-	    of_machine_is_compatible("PowerMac7,3") ||
-	    of_machine_is_compatible("RackMac3,1"))
-		rc = g5_pm72_cpufreq_init(cpus);
-#ifdef CONFIG_PMAC_SMU
-	else
-		rc = g5_neo2_cpufreq_init(cpus);
-#endif /* CONFIG_PMAC_SMU */
-
-	of_node_put(cpus);
-	return rc;
-}
-
-module_init(g5_cpufreq_init);
-
-
-MODULE_LICENSE("GPL");
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 7ef60b52d6e..42be5374313 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -32,7 +32,7 @@
  * book:
  * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
  */
-static struct appldata_mem_data {
+struct appldata_mem_data {
 	u64 timestamp;
 	u32 sync_count_1;       /* after VM collected the record data, */
 	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
@@ -63,7 +63,7 @@ static struct appldata_mem_data {
 	u64 pgmajfault;		/* page faults (major only) */
 // <-- New in 2.6
 
-} __attribute__((packed)) appldata_mem_data;
+} __packed;
 
 
 /*
@@ -118,7 +118,6 @@ static struct appldata_ops ops = {
 	.record_nr = APPLDATA_RECORD_MEM_ID,
 	.size	   = sizeof(struct appldata_mem_data),
 	.callback  = &appldata_get_mem_data,
-	.data      = &appldata_mem_data,
 	.owner     = THIS_MODULE,
 	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
 };
@@ -131,7 +130,17 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_mem_init(void)
 {
-	return appldata_register_ops(&ops);
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
 }
 
 /*
@@ -142,6 +151,7 @@ static int __init appldata_mem_init(void)
 static void __exit appldata_mem_exit(void)
 {
 	appldata_unregister_ops(&ops);
+	kfree(ops.data);
 }
 
 
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 2d224b94535..66037d2622b 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -29,7 +29,7 @@
  * book:
  * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
  */
-static struct appldata_net_sum_data {
+struct appldata_net_sum_data {
 	u64 timestamp;
 	u32 sync_count_1;	/* after VM collected the record data, */
 	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
@@ -51,7 +51,7 @@ static struct appldata_net_sum_data {
 	u64 rx_dropped;		/* no space in linux buffers     */
 	u64 tx_dropped;		/* no space available in linux   */
 	u64 collisions;		/* collisions while transmitting */
-} __attribute__((packed)) appldata_net_sum_data;
+} __packed;
 
 
 /*
@@ -121,7 +121,6 @@ static struct appldata_ops ops = {
 	.record_nr = APPLDATA_RECORD_NET_SUM_ID,
 	.size	   = sizeof(struct appldata_net_sum_data),
 	.callback  = &appldata_get_net_sum_data,
-	.data      = &appldata_net_sum_data,
 	.owner     = THIS_MODULE,
 	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
 };
@@ -134,7 +133,17 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_net_init(void)
 {
-	return appldata_register_ops(&ops);
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
 }
 
 /*
@@ -145,6 +154,7 @@ static int __init appldata_net_init(void)
 static void __exit appldata_net_exit(void)
 {
 	appldata_unregister_ops(&ops);
+	kfree(ops.data);
 }
 
 
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 7fd3690b676..138893e5f73 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -651,9 +651,7 @@ static int hypfs_create_cpu_files(struct super_block *sb,
 	}
 	diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
 	rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	return 0;
+	return PTR_RET(rc);
 }
 
 static void *hypfs_create_lpar_files(struct super_block *sb,
@@ -702,9 +700,7 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
 		return PTR_ERR(rc);
 	diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
 	rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	return 0;
+	return PTR_RET(rc);
 }
 
 static void *hypfs_create_phys_files(struct super_block *sb,
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 9819891ed7a..4066cee0c2d 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,9 +9,18 @@
 #ifndef _ASM_S390_AIRQ_H
 #define _ASM_S390_AIRQ_H
 
-typedef void (*adapter_int_handler_t)(void *, void *);
+struct airq_struct {
+	struct hlist_node list;		/* Handler queueing. */
+	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
+	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
+	u8 lsi_mask;			/* Local-Summary-Indicator mask */
+	u8 isc;				/* Interrupt-subclass */
+	u8 flags;
+};
 
-void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8);
-void s390_unregister_adapter_interrupt(void *, u8);
+#define AIRQ_PTR_ALLOCATED	0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
 
 #endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 2f8c1abeb08..3fbc67d9e19 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -53,7 +53,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	debug_dma_mapping_error(dev, dma_addr);
 	if (dma_ops->mapping_error)
 		return dma_ops->mapping_error(dev, dma_addr);
-	return (dma_addr == DMA_ERROR_CODE);
+	return dma_addr == DMA_ERROR_CODE;
 }
 
 static inline void *dma_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 2ee66a65f2d..0aa6a7ed95a 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -13,6 +13,16 @@
 
 #define MAX_FACILITY_BIT (256*8)	/* stfle_fac_list has 256 bytes */
 
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return 0;
+	ptr = (unsigned char *) facilities + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
 /*
  * The test_facility function uses the bit odering where the MSB is bit 0.
  * That makes it easier to query facility bits with the bit number as
@@ -20,12 +30,7 @@
  */
 static inline int test_facility(unsigned long nr)
 {
-	unsigned char *ptr;
-
-	if (nr >= MAX_FACILITY_BIT)
-		return 0;
-	ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3);
-	return (*ptr & (0x80 >> (nr & 7))) != 0;
+	return __test_facility(nr, &S390_lowcore.stfle_fac_list);
 }
 
 /**
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index fd9be010f9b..cd6b9ee7b69 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -13,28 +13,6 @@
 #include <asm/page.h>
 #include <asm/pci_io.h>
 
-/*
- * Change virtual addresses to physical addresses and vv.
- * These are pretty trivial
- */
-static inline unsigned long virt_to_phys(volatile void * address)
-{
-	unsigned long real_address;
-	asm volatile(
-		 "	lra	%0,0(%1)\n"
-		 "	jz	0f\n"
-		 "	la	%0,0\n"
-		 "0:"
-		 : "=a" (real_address) : "a" (address) : "cc");
-	return real_address;
-}
-#define virt_to_phys virt_to_phys
-
-static inline void * phys_to_virt(unsigned long address)
-{
-	return (void *) address;
-}
-
 void *xlate_dev_mem_ptr(unsigned long phys);
 #define xlate_dev_mem_ptr xlate_dev_mem_ptr
 void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cd..3238d4004e8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -62,13 +62,20 @@ struct sca_block {
 #define CPUSTAT_MCDS       0x00000100
 #define CPUSTAT_SM         0x00000080
 #define CPUSTAT_G          0x00000008
+#define CPUSTAT_GED        0x00000004
 #define CPUSTAT_J          0x00000002
 #define CPUSTAT_P          0x00000001
 
 struct kvm_s390_sie_block {
 	atomic_t cpuflags;		/* 0x0000 */
 	__u32	prefix;			/* 0x0004 */
-	__u8	reserved8[32];		/* 0x0008 */
+	__u8	reserved08[4];		/* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+	__u32	prog0c;			/* 0x000c */
+	__u8	reserved10[16];		/* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+	atomic_t prog20;		/* 0x0020 */
+	__u8	reserved24[4];		/* 0x0024 */
 	__u64	cputm;			/* 0x0028 */
 	__u64	ckc;			/* 0x0030 */
 	__u64	epoch;			/* 0x0038 */
@@ -90,7 +97,8 @@ struct kvm_s390_sie_block {
 	__u32	scaoh;			/* 0x005c */
 	__u8	reserved60;		/* 0x0060 */
 	__u8	ecb;			/* 0x0061 */
-	__u8	reserved62[2];		/* 0x0062 */
+	__u8    ecb2;                   /* 0x0062 */
+	__u8    reserved63[1];          /* 0x0063 */
 	__u32	scaol;			/* 0x0064 */
 	__u8	reserved68[4];		/* 0x0068 */
 	__u32	todpr;			/* 0x006c */
@@ -130,6 +138,7 @@ struct kvm_vcpu_stat {
 	u32 deliver_program_int;
 	u32 deliver_io_int;
 	u32 exit_wait_state;
+	u32 instruction_pfmf;
 	u32 instruction_stidp;
 	u32 instruction_spx;
 	u32 instruction_stpx;
@@ -166,7 +175,7 @@ struct kvm_s390_ext_info {
 };
 
 #define PGM_OPERATION            0x01
-#define PGM_PRIVILEGED_OPERATION 0x02
+#define PGM_PRIVILEGED_OP	 0x02
 #define PGM_EXECUTE              0x03
 #define PGM_PROTECTION           0x04
 #define PGM_ADDRESSING           0x05
@@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt {
 	atomic_t active;
 	struct kvm_s390_float_interrupt *float_int;
 	int timer_due; /* event indicator for waitqueue below */
-	wait_queue_head_t wq;
+	wait_queue_head_t *wq;
 	atomic_t *cpuflags;
 	unsigned int action_bits;
 };
@@ -266,4 +275,5 @@ struct kvm_arch{
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
 #endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6c1801235db..6e577ba0e5d 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -120,7 +120,6 @@ struct zpci_dev {
 
 	struct dentry	*debugfs_dev;
 	struct dentry	*debugfs_perf;
-	struct dentry	*debugfs_debug;
 };
 
 struct pci_hp_callback_ops {
@@ -143,7 +142,6 @@ int zpci_enable_device(struct zpci_dev *);
 int zpci_disable_device(struct zpci_dev *);
 void zpci_stop_device(struct zpci_dev *);
 void zpci_free_device(struct zpci_dev *);
-int zpci_scan_device(struct zpci_dev *);
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 5f0173a3169..1141fb3e7b2 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -14,3 +14,13 @@
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
 #define PMU_F_ENABLED			0x2000
+
+#ifdef CONFIG_64BIT
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+#endif /* CONFIG_64BIT */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 590c3219c63..e1408ddb94f 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *);
 
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned long key, bool nq);
+
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
 	typedef struct { char _[n]; } addrtype;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9aefa3c64eb..0ea4e591fa7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -296,18 +296,16 @@ extern unsigned long MODULES_END;
 #define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
 
 /* Page status table bits for virtualization */
-#define RCP_ACC_BITS	0xf0000000UL
-#define RCP_FP_BIT	0x08000000UL
-#define RCP_PCL_BIT	0x00800000UL
-#define RCP_HR_BIT	0x00400000UL
-#define RCP_HC_BIT	0x00200000UL
-#define RCP_GR_BIT	0x00040000UL
-#define RCP_GC_BIT	0x00020000UL
-#define RCP_IN_BIT	0x00002000UL	/* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT	0x00008000UL
-#define KVM_UC_BIT	0x00004000UL
+#define PGSTE_ACC_BITS	0xf0000000UL
+#define PGSTE_FP_BIT	0x08000000UL
+#define PGSTE_PCL_BIT	0x00800000UL
+#define PGSTE_HR_BIT	0x00400000UL
+#define PGSTE_HC_BIT	0x00200000UL
+#define PGSTE_GR_BIT	0x00040000UL
+#define PGSTE_GC_BIT	0x00020000UL
+#define PGSTE_UR_BIT	0x00008000UL
+#define PGSTE_UC_BIT	0x00004000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x00002000UL	/* IPTE notify bit */
 
 #else /* CONFIG_64BIT */
 
@@ -364,18 +362,16 @@ extern unsigned long MODULES_END;
 				 | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
 
 /* Page status table bits for virtualization */
-#define RCP_ACC_BITS	0xf000000000000000UL
-#define RCP_FP_BIT	0x0800000000000000UL
-#define RCP_PCL_BIT	0x0080000000000000UL
-#define RCP_HR_BIT	0x0040000000000000UL
-#define RCP_HC_BIT	0x0020000000000000UL
-#define RCP_GR_BIT	0x0004000000000000UL
-#define RCP_GC_BIT	0x0002000000000000UL
-#define RCP_IN_BIT	0x0000200000000000UL	/* IPTE notify bit */
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT	0x0000800000000000UL
-#define KVM_UC_BIT	0x0000400000000000UL
+#define PGSTE_ACC_BITS	0xf000000000000000UL
+#define PGSTE_FP_BIT	0x0800000000000000UL
+#define PGSTE_PCL_BIT	0x0080000000000000UL
+#define PGSTE_HR_BIT	0x0040000000000000UL
+#define PGSTE_HC_BIT	0x0020000000000000UL
+#define PGSTE_GR_BIT	0x0004000000000000UL
+#define PGSTE_GC_BIT	0x0002000000000000UL
+#define PGSTE_UR_BIT	0x0000800000000000UL
+#define PGSTE_UC_BIT	0x0000400000000000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000200000000000UL	/* IPTE notify bit */
 
 #endif /* CONFIG_64BIT */
 
@@ -615,8 +611,8 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
 	asm(
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
-		"	nihh	%0,0xff7f\n"	/* clear RCP_PCL_BIT in old */
-		"	oihh	%1,0x0080\n"	/* set RCP_PCL_BIT in new */
+		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
+		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
 		"	csg	%0,%1,%2\n"
 		"	jl	0b\n"
 		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
@@ -629,7 +625,7 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
 	asm(
-		"	nihh	%1,0xff7f\n"	/* clear RCP_PCL_BIT */
+		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
 		"	stg	%1,%0\n"
 		: "=Q" (ptep[PTRS_PER_PTE])
 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
@@ -662,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 	else if (bits)
 		page_reset_referenced(address);
 	/* Transfer page changed & referenced bit to guest bits in pgste */
-	pgste_val(pgste) |= bits << 48;		/* RCP_GR_BIT & RCP_GC_BIT */
+	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
 	/* Get host changed & referenced bits from pgste */
-	bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
+	bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
 	/* Transfer page changed & referenced bit to kvm user bits */
-	pgste_val(pgste) |= bits << 45;		/* KVM_UR_BIT & KVM_UC_BIT */
+	pgste_val(pgste) |= bits << 45;		/* PGSTE_UR_BIT & PGSTE_UC_BIT */
 	/* Clear relevant host bits in pgste. */
-	pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
-	pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
+	pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 	/* Copy page access key and fetch protection bit to pgste */
 	pgste_val(pgste) |=
 		(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
@@ -690,15 +686,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 	/* Get referenced bit from storage key */
 	young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
 	if (young)
-		pgste_val(pgste) |= RCP_GR_BIT;
+		pgste_val(pgste) |= PGSTE_GR_BIT;
 	/* Get host referenced bit from pgste */
-	if (pgste_val(pgste) & RCP_HR_BIT) {
-		pgste_val(pgste) &= ~RCP_HR_BIT;
+	if (pgste_val(pgste) & PGSTE_HR_BIT) {
+		pgste_val(pgste) &= ~PGSTE_HR_BIT;
 		young = 1;
 	}
 	/* Transfer referenced bit to kvm user bits and pte */
 	if (young) {
-		pgste_val(pgste) |= KVM_UR_BIT;
+		pgste_val(pgste) |= PGSTE_UR_BIT;
 		pte_val(*ptep) |= _PAGE_SWR;
 	}
 #endif
@@ -720,7 +716,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
 	 * The guest C/R information is still in the PGSTE, set real
 	 * key C/R to 0.
 	 */
-	nkey = (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
+	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 	page_set_storage_key(address, nkey, 0);
 #endif
 }
@@ -750,6 +746,7 @@ struct gmap {
 	struct mm_struct *mm;
 	unsigned long *table;
 	unsigned long asce;
+	void *private;
 	struct list_head crst_list;
 };
 
@@ -808,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
 					pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	if (pgste_val(pgste) & RCP_IN_BIT) {
-		pgste_val(pgste) &= ~RCP_IN_BIT;
+	if (pgste_val(pgste) & PGSTE_IN_BIT) {
+		pgste_val(pgste) &= ~PGSTE_IN_BIT;
 		gmap_do_ipte_notify(mm, addr, ptep);
 	}
 #endif
@@ -977,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_all(ptep, pgste);
-		dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
-		pgste_val(pgste) &= ~KVM_UC_BIT;
+		dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+		pgste_val(pgste) &= ~PGSTE_UC_BIT;
 		pgste_set_unlock(ptep, pgste);
 		return dirty;
 	}
@@ -997,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
 		pgste = pgste_update_young(ptep, pgste);
-		young = !!(pgste_val(pgste) & KVM_UR_BIT);
-		pgste_val(pgste) &= ~KVM_UR_BIT;
+		young = !!(pgste_val(pgste) & PGSTE_UR_BIT);
+		pgste_val(pgste) &= ~PGSTE_UR_BIT;
 		pgste_set_unlock(ptep, pgste);
 	}
 	return young;
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 559512a455d..52b56533c57 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -24,6 +24,7 @@ struct pt_regs
 	unsigned long gprs[NUM_GPRS];
 	unsigned long orig_gpr2;
 	unsigned int int_code;
+	unsigned int int_parm;
 	unsigned long int_parm_long;
 };
 
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 9ccd1905bda..6a9a9eb645f 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -35,6 +35,7 @@ header-y += siginfo.h
 header-y += signal.h
 header-y += socket.h
 header-y += sockios.h
+header-y += sclp_ctl.h
 header-y += stat.h
 header-y += statfs.h
 header-y += swab.h
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
index 1c6a7f85a58..65dc694725a 100644
--- a/arch/s390/include/uapi/asm/chsc.h
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -29,6 +29,16 @@ struct chsc_async_area {
 	__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
 } __attribute__ ((packed));
 
+struct chsc_header {
+	__u16 length;
+	__u16 code;
+} __attribute__ ((packed));
+
+struct chsc_sync_area {
+	struct chsc_header header;
+	__u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+} __attribute__ ((packed));
+
 struct chsc_response_struct {
 	__u16 length;
 	__u16 code;
@@ -126,5 +136,8 @@ struct chsc_cpd_info {
 #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
 #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
 #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
 
 #endif
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 38eca3ba40e..5812a3b2df9 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data {
 #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) 
 /* Resume IO on device */
 #define BIODASDRESUME  _IO(DASD_IOCTL_LETTER,7) 
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
 
 
 /* retrieve API version number */
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 00000000000..f2818613ee4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,24 @@
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+	__u32	cmdw;
+	__u64	sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+	_IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 7a82f9f7010..2416138ebd3 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
 #define ASM_OFFSETS_C
 
 #include <linux/kbuild.h>
+#include <linux/kvm_host.h>
 #include <linux/sched.h>
 #include <asm/cputime.h>
 #include <asm/vdso.h>
@@ -47,6 +48,7 @@ int main(void)
 	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
 	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
 	DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+	DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
 	DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
@@ -161,6 +163,8 @@ int main(void)
 	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
 	DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
 	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
 #endif /* CONFIG_32BIT */
 	return 0;
 }
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 4d5e6f8a797..be7a408be7a 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -429,11 +429,19 @@ io_skip:
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
 	stm	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
 	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# call do_IRQ
+	tm	__LC_MACHINE_FLAGS+2,0x10	# MACHINE_FLAG_LPAR
+	jz	io_return
+	tpi	0
+	jz	io_return
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	j	io_loop
 io_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
@@ -573,10 +581,10 @@ ext_skip:
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
 	stm	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	TRACE_IRQS_OFF
 	lr	%r2,%r11		# pass pointer to pt_regs
-	l	%r3,__LC_EXT_CPU_ADDR	# get cpu address + interruption code
-	l	%r4,__LC_EXT_PARAMS	# get external parameters
 	l	%r1,BASED(.Ldo_extint)
 	basr	%r14,%r1		# call do_extint
 	j	io_return
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index aa0ab02e959..3ddbc26d246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -54,7 +54,7 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka,
 void do_notify_resume(struct pt_regs *regs);
 
 struct ext_code;
-void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long);
+void do_extint(struct pt_regs *regs);
 void do_restart(void);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 4c17eece707..1c039d0c24c 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
 
 #define BASED(name) name-system_call(%r13)
 
@@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
 #endif
 	.endm
 
-	.macro	HANDLE_SIE_INTERCEPT scratch,pgmcheck
+	.macro	HANDLE_SIE_INTERCEPT scratch,reason
 #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
 	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+42
+	jnz	.+62
 	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_loop)
-	clg	\scratch,BASED(.Lsie_length)
-	.if	\pgmcheck
+	slg	\scratch,BASED(.Lsie_critical)
+	clg	\scratch,BASED(.Lsie_critical_length)
+	.if	\reason==1
 	# Some program interrupts are suppressing (e.g. protection).
 	# We must also check the instruction after SIE in that case.
 	# do_protection_exception will rewind to rewind_pad
-	jh	.+22
+	jh	.+42
 	.else
-	jhe	.+22
+	jhe	.+42
 	.endif
-	lg	%r9,BASED(.Lsie_loop)
-	LPP	BASED(.Lhost_id)	# set host id
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	LPP	__SF_EMPTY+16(%r15)		# set host id
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
 #endif
 	.endm
 
@@ -450,7 +453,7 @@ ENTRY(io_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,system_call
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,0
+	HANDLE_SIE_INTERCEPT %r14,2
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user?
 	jz	io_skip
@@ -460,10 +463,18 @@ io_skip:
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_IRQ
+	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
+	jz	io_return
+	tpi	0
+	jz	io_return
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	j	io_loop
 io_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
@@ -595,7 +606,7 @@ ENTRY(ext_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,system_call
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,0
+	HANDLE_SIE_INTERCEPT %r14,3
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jz	ext_skip
@@ -605,13 +616,13 @@ ext_skip:
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
+	lghi	%r1,__LC_EXT_PARAMS2
+	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lghi	%r1,4096
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	llgf	%r3,__LC_EXT_CPU_ADDR	# get cpu address + interruption code
-	llgf	%r4,__LC_EXT_PARAMS	# get external parameter
-	lg	%r5,__LC_EXT_PARAMS2-4096(%r1)	# get 64 bit external parameter
 	brasl	%r14,do_extint
 	j	io_return
 
@@ -643,7 +654,7 @@ ENTRY(mcck_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,system_call
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,0
+	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	mcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
@@ -937,56 +948,50 @@ ENTRY(sie64a)
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
 	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
 	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
 	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions in the sie_loop should not cause program interrupts. So
-# lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-rewind_pad:
-	nop	0
-sie_loop:
-	lg	%r14,__LC_THREAD_INFO		# pointer thread_info struct
-	tm	__TI_flags+7(%r14),_TIF_EXIT_SIE
-	jnz	sie_exit
 	lg	%r14,__LC_GMAP			# get gmap pointer
 	ltgr	%r14,%r14
 	jz	sie_gmap
 	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
 sie_gmap:
 	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),1		# last exit...
+	jnz	sie_done
 	LPP	__SF_EMPTY(%r15)		# set guest id
 	sie	0(%r14)
 sie_done:
 	LPP	__SF_EMPTY+16(%r15)		# set host id
-	lg	%r14,__LC_THREAD_INFO		# pointer thread_info struct
-sie_exit:
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions beween sie64a and sie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+rewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
 	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lghi	%r2,0
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
 	br	%r14
 sie_fault:
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	lg	%r14,__LC_THREAD_INFO		# pointer thread_info struct
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lghi	%r2,-EFAULT
-	br	%r14
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
 
 	.align	8
-.Lsie_loop:
-	.quad	sie_loop
-.Lsie_length:
-	.quad	sie_done - sie_loop
-.Lhost_id:
-	.quad	0
+.Lsie_critical:
+	.quad	sie_gmap
+.Lsie_critical_length:
+	.quad	sie_done - sie_gmap
 
 	EX_TABLE(rewind_pad,sie_fault)
-	EX_TABLE(sie_loop,sie_fault)
+	EX_TABLE(sie_exit,sie_fault)
 #endif
 
 		.section .rodata, "a"
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index dd3c1994b8b..54b0995514e 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -234,9 +234,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
 }
 EXPORT_SYMBOL(unregister_external_interrupt);
 
-void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
-			   unsigned int param32, unsigned long param64)
+void __irq_entry do_extint(struct pt_regs *regs)
 {
+	struct ext_code ext_code;
 	struct pt_regs *old_regs;
 	struct ext_int_info *p;
 	int index;
@@ -248,6 +248,7 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
 		clock_comparator_work();
 	}
 	kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
+	ext_code = *(struct ext_code *) &regs->int_code;
 	if (ext_code.code != 0x1004)
 		__get_cpu_var(s390_idle).nohz_delay = 1;
 
@@ -255,7 +256,8 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
 	rcu_read_lock();
 	list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
 		if (likely(p->code == ext_code.code))
-			p->handler(ext_code, param32, param64);
+			p->handler(ext_code, regs->int_parm,
+				   regs->int_parm_long);
 	rcu_read_unlock();
 	irq_exit();
 	set_irq_regs(old_regs);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index f58f37f6682..a6fc037671b 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
+#include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/export.h>
 #include <asm/irq.h>
@@ -39,6 +40,57 @@ int perf_num_counters(void)
 }
 EXPORT_SYMBOL(perf_num_counters);
 
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+	struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+	if (!stack)
+		return NULL;
+
+	return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+	unsigned long ip = instruction_pointer(regs);
+
+	if (user_mode(regs))
+		return false;
+
+	return ip == (unsigned long) &sie_exit;
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	return is_in_guest(regs) ? instruction_pointer_guest(regs)
+				 : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+	return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	if (is_in_guest(regs))
+		return perf_misc_guest_flags(regs);
+
+	return user_mode(regs) ? PERF_RECORD_MISC_USER
+			       : PERF_RECORD_MISC_KERNEL;
+}
+
 void perf_event_print_debug(void)
 {
 	struct cpumf_ctr_info cf_info;
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9bdbcef1da9..3bac589844a 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount);
 #endif
 #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
 EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 0a49095104c..497451ec5e2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -719,10 +719,6 @@ static void reserve_oldmem(void)
 	}
 	create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
 	create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
-	if (OLDMEM_BASE + OLDMEM_SIZE == real_size)
-		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
-	else
-		saved_max_pfn = PFN_DOWN(real_size) - 1;
 #endif
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 4f977d0d25c..15a016c1056 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -49,7 +49,6 @@
 
 enum {
 	ec_schedule = 0,
-	ec_call_function,
 	ec_call_function_single,
 	ec_stop_cpu,
 };
@@ -438,8 +437,6 @@ static void smp_handle_ext_call(void)
 		smp_stop_cpu();
 	if (test_bit(ec_schedule, &bits))
 		scheduler_ipi();
-	if (test_bit(ec_call_function, &bits))
-		generic_smp_call_function_interrupt();
 	if (test_bit(ec_call_function_single, &bits))
 		generic_smp_call_function_single_interrupt();
 }
@@ -456,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	int cpu;
 
 	for_each_cpu(cpu, mask)
-		pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
+		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
 }
 
 void arch_send_call_function_single_ipi(int cpu)
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a458..40b4c6470f8 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1c01a991298..3074475c8ae 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
 	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	trace_kvm_s390_handle_diag(vcpu, code);
 	switch (code) {
 	case 0x10:
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b7d1b2edeeb..5ee56e5acc2 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -22,87 +22,6 @@
 #include "trace.h"
 #include "trace-s390.h"
 
-static int handle_lctlg(struct kvm_vcpu *vcpu)
-{
-	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 useraddr;
-	int reg, rc;
-
-	vcpu->stat.instruction_lctlg++;
-
-	useraddr = kvm_s390_get_base_disp_rsy(vcpu);
-
-	if (useraddr & 7)
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
-	reg = reg1;
-
-	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
-		   useraddr);
-	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
-
-	do {
-		rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
-			       (u64 __user *) useraddr);
-		if (rc)
-			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		useraddr += 8;
-		if (reg == reg3)
-			break;
-		reg = (reg + 1) % 16;
-	} while (1);
-	return 0;
-}
-
-static int handle_lctl(struct kvm_vcpu *vcpu)
-{
-	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 useraddr;
-	u32 val = 0;
-	int reg, rc;
-
-	vcpu->stat.instruction_lctl++;
-
-	useraddr = kvm_s390_get_base_disp_rs(vcpu);
-
-	if (useraddr & 3)
-		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-
-	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
-		   useraddr);
-	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
-
-	reg = reg1;
-	do {
-		rc = get_guest(vcpu, val, (u32 __user *) useraddr);
-		if (rc)
-			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= val;
-		useraddr += 4;
-		if (reg == reg3)
-			break;
-		reg = (reg + 1) % 16;
-	} while (1);
-	return 0;
-}
-
-static const intercept_handler_t eb_handlers[256] = {
-	[0x2f] = handle_lctlg,
-	[0x8a] = kvm_s390_handle_priv_eb,
-};
-
-static int handle_eb(struct kvm_vcpu *vcpu)
-{
-	intercept_handler_t handler;
-
-	handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
-	if (handler)
-		return handler(vcpu);
-	return -EOPNOTSUPP;
-}
 
 static const intercept_handler_t instruction_handlers[256] = {
 	[0x01] = kvm_s390_handle_01,
@@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_b2,
-	[0xb7] = handle_lctl,
+	[0xb7] = kvm_s390_handle_lctl,
 	[0xb9] = kvm_s390_handle_b9,
 	[0xe5] = kvm_s390_handle_e5,
-	[0xeb] = handle_eb,
+	[0xeb] = kvm_s390_handle_eb,
 };
 
 static int handle_noop(struct kvm_vcpu *vcpu)
@@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
 static int handle_validity(struct kvm_vcpu *vcpu)
 {
-	unsigned long vmaddr;
 	int viwhy = vcpu->arch.sie_block->ipb >> 16;
-	int rc;
 
 	vcpu->stat.exit_validity++;
 	trace_kvm_s390_intercept_validity(vcpu, viwhy);
-	if (viwhy == 0x37) {
-		vmaddr = gmap_fault(vcpu->arch.sie_block->prefix,
-				    vcpu->arch.gmap);
-		if (IS_ERR_VALUE(vmaddr)) {
-			rc = -EOPNOTSUPP;
-			goto out;
-		}
-		rc = fault_in_pages_writeable((char __user *) vmaddr,
-			 PAGE_SIZE);
-		if (rc) {
-			/* user will receive sigsegv, exit to user */
-			rc = -EOPNOTSUPP;
-			goto out;
-		}
-		vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE,
-				    vcpu->arch.gmap);
-		if (IS_ERR_VALUE(vmaddr)) {
-			rc = -EOPNOTSUPP;
-			goto out;
-		}
-		rc = fault_in_pages_writeable((char __user *) vmaddr,
-			 PAGE_SIZE);
-		if (rc) {
-			/* user will receive sigsegv, exit to user */
-			rc = -EOPNOTSUPP;
-			goto out;
-		}
-	} else
-		rc = -EOPNOTSUPP;
-
-out:
-	if (rc)
-		VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
-			   viwhy);
-	return rc;
+	WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+	return -EOPNOTSUPP;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c948177529..7f35cb33e51 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 no_timer:
 	spin_lock(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
-	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+	add_wait_queue(&vcpu->wq, &wait);
 	while (list_empty(&vcpu->arch.local_int.list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
 		(!vcpu->arch.local_int.timer_due) &&
@@ -452,7 +452,7 @@ no_timer:
 	}
 	__unset_cpu_idle(vcpu);
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
+	remove_wait_queue(&vcpu->wq, &wait);
 	spin_unlock_bh(&vcpu->arch.local_int.lock);
 	spin_unlock(&vcpu->arch.local_int.float_int->lock);
 	hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
@@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm)
 
 	spin_lock(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.timer_due = 1;
-	if (waitqueue_active(&vcpu->arch.local_int.wq))
-		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	if (waitqueue_active(&vcpu->wq))
+		wake_up_interruptible(&vcpu->wq);
 	spin_unlock(&vcpu->arch.local_int.lock);
 }
 
@@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	spin_lock_bh(&li->lock);
 	list_add(&inti->list, &li->list);
 	atomic_set(&li->active, 1);
-	BUG_ON(waitqueue_active(&li->wq));
+	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock_bh(&li->lock);
 	return 0;
 }
@@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	li = fi->local_int[sigcpu];
 	spin_lock_bh(&li->lock);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&li->wq);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
 	spin_unlock_bh(&li->lock);
 	spin_unlock(&fi->lock);
 	mutex_unlock(&kvm->lock);
@@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	if (inti->type == KVM_S390_SIGP_STOP)
 		li->action_bits |= ACTION_STOP_ON_STOP;
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	if (waitqueue_active(&vcpu->wq))
+		wake_up_interruptible(&vcpu->wq);
 	spin_unlock_bh(&li->lock);
 	mutex_unlock(&vcpu->kvm->lock);
 	return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c1c7c683fa2..ba694d2ba51 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
@@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 };
 
 static unsigned long long *facilities;
+static struct gmap_notifier gmap_notifier;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void *garbage)
@@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage)
 {
 }
 
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
 int kvm_arch_hardware_setup(void)
 {
+	gmap_notifier.notifier_call = kvm_gmap_notifier;
+	gmap_register_ipte_notifier(&gmap_notifier);
 	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
+	gmap_unregister_ipte_notifier(&gmap_notifier);
 }
 
 void kvm_arch_check_processor_compat(void *rtn)
@@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		kvm->arch.gmap = gmap_alloc(current->mm);
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
+		kvm->arch.gmap->private = kvm;
 	}
 
 	kvm->arch.css_support = 0;
@@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 	free_page((unsigned long)(vcpu->arch.sie_block));
 	kvm_vcpu_uninit(vcpu);
-	kfree(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		vcpu->arch.gmap = gmap_alloc(current->mm);
 		if (!vcpu->arch.gmap)
 			return -ENOMEM;
+		vcpu->arch.gmap->private = vcpu->kvm;
 		return 0;
 	}
 
@@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 						    CPUSTAT_SM |
-						    CPUSTAT_STOPPED);
+						    CPUSTAT_STOPPED |
+						    CPUSTAT_GED);
 	vcpu->arch.sie_block->ecb   = 6;
+	vcpu->arch.sie_block->ecb2  = 8;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) facilities;
 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
@@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	rc = -ENOMEM;
 
-	vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu)
 		goto out;
 
@@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	spin_lock(&kvm->arch.float_int.lock);
 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
-	init_waitqueue_head(&vcpu->arch.local_int.wq);
+	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 	spin_unlock(&kvm->arch.float_int.lock);
 
@@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 out_free_sie_block:
 	free_page((unsigned long)(vcpu->arch.sie_block));
 out_free_cpu:
-	kfree(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:
 	return ERR_PTR(rc);
 }
@@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+		cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+	s390_vcpu_block(vcpu);
+	exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+	int i;
+	struct kvm *kvm = gmap->private;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/* match against both prefix pages */
+		if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
+			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+			exit_sie_sync(vcpu);
+		}
+	}
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	/* kvm common code refers to this, but never calls it */
@@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
+	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+	 * This ensures that the ipte instruction for this request has
+	 * already finished. We might race against a second unmapper that
+	 * wants to set the blocking bit. Lets just retry the request loop.
+	 */
+	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+		int rc;
+		rc = gmap_ipte_notify(vcpu->arch.gmap,
+				      vcpu->arch.sie_block->prefix,
+				      PAGE_SIZE * 2);
+		if (rc)
+			return rc;
+		s390_vcpu_unblock(vcpu);
+	}
+	return 0;
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int rc;
@@ -621,6 +697,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		kvm_s390_deliver_pending_interrupts(vcpu);
 
+	rc = kvm_s390_handle_requests(vcpu);
+	if (rc)
+		return rc;
+
 	vcpu->arch.sie_block->icptcode = 0;
 	preempt_disable();
 	kvm_guest_enter();
@@ -630,7 +710,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_sie_enter(vcpu,
 				 atomic_read(&vcpu->arch.sie_block->cpuflags));
 	rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
-	if (rc) {
+	if (rc > 0)
+		rc = 0;
+	if (rc < 0) {
 		if (kvm_is_ucontrol(vcpu->kvm)) {
 			rc = SIE_INTERCEPT_UCONTROL;
 		} else {
@@ -1046,7 +1128,7 @@ static int __init kvm_s390_init(void)
 		return -ENOMEM;
 	}
 	memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
-	facilities[0] &= 0xff00fff3f47c0000ULL;
+	facilities[0] &= 0xff82fff3f47c0000ULL;
 	facilities[1] &= 0x001c000000000000ULL;
 	return 0;
 }
@@ -1059,3 +1141,12 @@ static void __exit kvm_s390_exit(void)
 
 module_init(kvm_s390_init);
 module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index efc14f68726..028ca9fd215 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
 	vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u;
 	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
 static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
@@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+	*r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+	*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
 {
 	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
@@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 /* implemented in kvm-s390.c */
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
 				 unsigned long addr);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 6bbd7b5a0bb..0da3e6eb6be 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,7 +1,7 @@
 /*
  * handling privileged instructions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -20,6 +20,9 @@
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
 #include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
 #include <asm/ptrace.h>
 #include <asm/compat.h>
 #include "gaccess.h"
@@ -34,6 +37,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.instruction_spx++;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
 	/* must be word boundary */
@@ -65,6 +71,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.instruction_stpx++;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
 	/* must be word boundary */
@@ -89,6 +98,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.instruction_stap++;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	useraddr = kvm_s390_get_base_disp_s(vcpu);
 
 	if (useraddr & 1)
@@ -105,7 +117,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 static int handle_skey(struct kvm_vcpu *vcpu)
 {
 	vcpu->stat.instruction_storage_key++;
-	vcpu->arch.sie_block->gpsw.addr -= 4;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	vcpu->arch.sie_block->gpsw.addr =
+		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 	return 0;
 }
@@ -129,9 +146,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 		 * Store the two-word I/O interruption code into the
 		 * provided area.
 		 */
-		put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr);
-		put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2));
-		put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4));
+		if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
+		    || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
+		    || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	} else {
 		/*
 		 * Store the three-word I/O interruption code into
@@ -182,6 +200,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	if (vcpu->kvm->arch.css_support) {
 		/*
 		 * Most I/O instructions will be handled by userspace.
@@ -210,8 +231,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	int rc;
 
 	vcpu->stat.instruction_stfl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	/* only pass the facility bits, which we can handle */
-	facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3;
+	facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
 
 	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
 			   &facility_list, sizeof(facility_list));
@@ -255,8 +280,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	u64 addr;
 
 	if (gpsw->mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -278,6 +303,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 	psw_t new_psw;
 	u64 addr;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -296,6 +324,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.instruction_stidp++;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	operand2 = kvm_s390_get_base_disp_s(vcpu);
 
 	if (operand2 & 7)
@@ -351,16 +382,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	vcpu->stat.instruction_stsi++;
 	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
 
-	operand2 = kvm_s390_get_base_disp_s(vcpu);
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (fc > 3) {
+		vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;	  /* cc 3 */
+		return 0;
+	}
 
-	if (operand2 & 0xfff && fc > 0)
+	if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+	    || vcpu->run->s.regs.gprs[1] & 0xffff0000)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	switch (fc) {
-	case 0:
+	if (fc == 0) {
 		vcpu->run->s.regs.gprs[0] = 3 << 28;
-		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);  /* cc 0 */
 		return 0;
+	}
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+	if (operand2 & 0xfff)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
 	case 1: /* same handling for 1 and 2 */
 	case 2:
 		mem = get_zeroed_page(GFP_KERNEL);
@@ -377,8 +422,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 			goto out_no_data;
 		handle_stsi_3_2_2(vcpu, (void *) mem);
 		break;
-	default:
-		goto out_no_data;
 	}
 
 	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
@@ -432,20 +475,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 	intercept_handler_t handler;
 
 	/*
-	 * a lot of B2 instructions are priviledged. We first check for
-	 * the privileged ones, that we can handle in the kernel. If the
-	 * kernel can handle this instruction, we check for the problem
-	 * state bit and (a) handle the instruction or (b) send a code 2
-	 * program check.
-	 * Anything else goes to userspace.*/
+	 * A lot of B2 instructions are priviledged. Here we check for
+	 * the privileged ones, that we can handle in the kernel.
+	 * Anything else goes to userspace.
+	 */
 	handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler) {
-		if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-			return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
-		else
-			return handler(vcpu);
-	}
+	if (handler)
+		return handler(vcpu);
+
 	return -EOPNOTSUPP;
 }
 
@@ -453,8 +490,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
 {
 	int reg1, reg2;
 
-	reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24;
-	reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
 
 	/* This basically extracts the mask half of the psw. */
 	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
@@ -467,9 +503,88 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+#define PFMF_RESERVED   0xfffc0101UL
+#define PFMF_SK         0x00020000UL
+#define PFMF_CF         0x00010000UL
+#define PFMF_UI         0x00008000UL
+#define PFMF_FSC        0x00007000UL
+#define PFMF_NQ         0x00000800UL
+#define PFMF_MR         0x00000400UL
+#define PFMF_MC         0x00000200UL
+#define PFMF_KEY        0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2;
+	unsigned long start, end;
+
+	vcpu->stat.instruction_pfmf++;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	if (!MACHINE_HAS_PFMF)
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* Only provide non-quiescing support if the host supports it */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+	    S390_lowcore.stfl_fac_list & 0x00020000)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* No support for conditional-SSKE */
+	if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+	case 0x00000000:
+		end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+		break;
+	case 0x00001000:
+		end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+		break;
+	/* We dont support EDAT2
+	case 0x00002000:
+		end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+		break;*/
+	default:
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	}
+	while (start < end) {
+		unsigned long useraddr;
+
+		useraddr = gmap_translate(start, vcpu->arch.gmap);
+		if (IS_ERR((void *)useraddr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+			if (clear_user((void __user *)useraddr, PAGE_SIZE))
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		}
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+			if (set_guest_storage_key(current->mm, useraddr,
+					vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+					vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		}
+
+		start += PAGE_SIZE;
+	}
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+		vcpu->run->s.regs.gprs[reg2] = end;
+	return 0;
+}
+
 static const intercept_handler_t b9_handlers[256] = {
 	[0x8d] = handle_epsw,
 	[0x9c] = handle_io_inst,
+	[0xaf] = handle_pfmf,
 };
 
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
@@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
 
 	/* This is handled just as for the B2 instructions. */
 	handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-	if (handler) {
-		if ((handler != handle_epsw) &&
-		    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE))
-			return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
-		else
-			return handler(vcpu);
-	}
+	if (handler)
+		return handler(vcpu);
+
 	return -EOPNOTSUPP;
 }
 
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u64 useraddr;
+	u32 val = 0;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	useraddr = kvm_s390_get_base_disp_rs(vcpu);
+
+	if (useraddr & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
+		   useraddr);
+	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
+
+	reg = reg1;
+	do {
+		rc = get_guest(vcpu, val, (u32 __user *) useraddr);
+		if (rc)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= val;
+		useraddr += 4;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+
+	return 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u64 useraddr;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctlg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	useraddr = kvm_s390_get_base_disp_rsy(vcpu);
+
+	if (useraddr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
+		   useraddr);
+	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
+
+	do {
+		rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
+			       (u64 __user *) useraddr);
+		if (rc)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		useraddr += 8;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+
+	return 0;
+}
+
 static const intercept_handler_t eb_handlers[256] = {
+	[0x2f] = handle_lctlg,
 	[0x8a] = handle_io_inst,
 };
 
-int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 {
 	intercept_handler_t handler;
 
-	/* All eb instructions that end up here are privileged. */
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
 	handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
 	if (handler)
 		return handler(vcpu);
@@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.instruction_tprot++;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
 
 	/* we only handle the Linux memory detection case:
@@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
 	u32 value;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
 		return kvm_s390_inject_program_int(vcpu,
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1c48ab2845e..bec398c57ac 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	list_add_tail(&inti->list, &li->list);
 	atomic_set(&li->active, 1);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&li->wq);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
 	spin_unlock_bh(&li->lock);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
@@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	list_add_tail(&inti->list, &li->list);
 	atomic_set(&li->active, 1);
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&li->wq);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
 	spin_unlock_bh(&li->lock);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 	VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
@@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 	atomic_set(&li->active, 1);
 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 	li->action_bits |= action;
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&li->wq);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
 out:
 	spin_unlock_bh(&li->lock);
 
@@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
 	list_add_tail(&inti->list, &li->list);
 	atomic_set(&li->active, 1);
-	if (waitqueue_active(&li->wq))
-		wake_up_interruptible(&li->wq);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
 	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
@@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 
 	/* sigp in userspace can exit */
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu,
-						   PGM_PRIVILEGED_OPERATION);
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	order_code = kvm_s390_get_base_disp_rs(vcpu);
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 89ebae4008f..ce36ea80e4f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -135,30 +135,17 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	unsigned long codesize, reservedpages, datasize, initsize;
-
-        max_mapnr = num_physpages = max_low_pfn;
+        max_mapnr = max_low_pfn;
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 	/* Setup guest page hinting */
 	cmma_init();
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages += free_all_bootmem();
+	free_all_bootmem();
 	setup_zero_pages();	/* Setup zeroed pages. */
 
-	reservedpages = 0;
-
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-        printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-                max_mapnr << (PAGE_SHIFT-10),
-                codesize >> 10,
-                reservedpages << (PAGE_SHIFT-10),
-                datasize >>10,
-                initsize >> 10);
+	mem_init_print_info(NULL);
 	printk("Write protected kernel read-only data: %#lx - %#lx\n",
 	       (unsigned long)&_stext,
 	       PFN_ALIGN((unsigned long)&_eshared) - 1);
@@ -166,13 +153,14 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(POISON_FREE_INITMEM);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a938b548f07..17bf4d3d303 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -689,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
 		entry = *ptep;
 		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) {
 			pgste = pgste_get_lock(ptep);
-			pgste_val(pgste) |= RCP_IN_BIT;
+			pgste_val(pgste) |= PGSTE_IN_BIT;
 			pgste_set_unlock(ptep, pgste);
 			start += PAGE_SIZE;
 			len -= PAGE_SIZE;
@@ -771,6 +771,54 @@ static inline void page_table_free_pgste(unsigned long *table)
 	__free_page(page);
 }
 
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned long key, bool nq)
+{
+	spinlock_t *ptl;
+	pgste_t old, new;
+	pte_t *ptep;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(current->mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+
+	new = old = pgste_get_lock(ptep);
+	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+		unsigned long address, bits;
+		unsigned char skey;
+
+		address = pte_val(*ptep) & PAGE_MASK;
+		skey = page_get_storage_key(address);
+		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+		/* Set storage key ACC and FP */
+		page_set_storage_key(address,
+				(key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
+				!nq);
+
+		/* Merge host changed & referenced into pgste  */
+		pgste_val(new) |= bits << 52;
+		/* Transfer skey changed & referenced bit to kvm user bits */
+		pgste_val(new) |= bits << 45;	/* PGSTE_UR_BIT & PGSTE_UC_BIT */
+	}
+	/* changing the guest storage key is considered a change of the page */
+	if ((pgste_val(new) ^ pgste_val(old)) &
+	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+		pgste_val(new) |= PGSTE_UC_BIT;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(*ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 1912f3bb190..0022e1ebfbd 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -81,8 +81,8 @@ struct hws_data_entry {
 	unsigned int:16;
 	unsigned int prim_asn:16;   /* primary ASN                       */
 	unsigned long long ia;      /* Instruction Address               */
-	unsigned long long lpp;     /* Logical-Partition Program Param.  */
-	unsigned long long vpp;     /* Virtual-Machine Program Param.    */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
 };
 
 struct hws_trailer_entry {
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index f1e5be85d59..e2956ad39a4 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -82,10 +82,13 @@ struct intr_bucket {
 
 static struct intr_bucket *bucket;
 
-/* Adapter local summary indicator */
-static u8 *zpci_irq_si;
+/* Adapter interrupt definitions */
+static void zpci_irq_handler(struct airq_struct *airq);
 
-static atomic_t irq_retries = ATOMIC_INIT(0);
+static struct airq_struct zpci_airq = {
+	.handler = zpci_irq_handler,
+	.isc = PCI_ISC,
+};
 
 /* I/O Map */
 static DEFINE_SPINLOCK(zpci_iomap_lock);
@@ -404,7 +407,7 @@ static struct pci_ops pci_root_ops = {
 /* store the last handled bit to implement fair scheduling of devices */
 static DEFINE_PER_CPU(unsigned long, next_sbit);
 
-static void zpci_irq_handler(void *dont, void *need)
+static void zpci_irq_handler(struct airq_struct *airq)
 {
 	unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit);
 	int rescan = 0, max = aisb_max;
@@ -452,7 +455,6 @@ scan:
 	max = aisb_max;
 	sbit = find_first_bit_left(bucket->aisb, max);
 	if (sbit != max) {
-		atomic_inc(&irq_retries);
 		rescan++;
 		goto scan;
 	}
@@ -565,7 +567,21 @@ static void zpci_map_resources(struct zpci_dev *zdev)
 		pr_debug("BAR%i: -> start: %Lx  end: %Lx\n",
 			i, pdev->resource[i].start, pdev->resource[i].end);
 	}
-};
+}
+
+static void zpci_unmap_resources(struct zpci_dev *zdev)
+{
+	struct pci_dev *pdev = zdev->pdev;
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			continue;
+		pci_iounmap(pdev, (void *) pdev->resource[i].start);
+	}
+}
 
 struct zpci_dev *zpci_alloc_device(void)
 {
@@ -701,25 +717,20 @@ static int __init zpci_irq_init(void)
 		goto out_alloc;
 	}
 
-	isc_register(PCI_ISC);
-	zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC);
-	if (IS_ERR(zpci_irq_si)) {
-		rc = PTR_ERR(zpci_irq_si);
-		zpci_irq_si = NULL;
+	rc = register_adapter_interrupt(&zpci_airq);
+	if (rc)
 		goto out_ai;
-	}
+	/* Set summary to 1 to be called every time for the ISC. */
+	*zpci_airq.lsi_ptr = 1;
 
 	for_each_online_cpu(cpu)
 		per_cpu(next_sbit, cpu) = 0;
 
 	spin_lock_init(&bucket->lock);
-	/* set summary to 1 to be called every time for the ISC */
-	*zpci_irq_si = 1;
 	set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 
 out_ai:
-	isc_unregister(PCI_ISC);
 	free_page((unsigned long) bucket->alloc);
 out_alloc:
 	free_page((unsigned long) bucket->aisb);
@@ -732,21 +743,10 @@ static void zpci_irq_exit(void)
 {
 	free_page((unsigned long) bucket->alloc);
 	free_page((unsigned long) bucket->aisb);
-	s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC);
-	isc_unregister(PCI_ISC);
+	unregister_adapter_interrupt(&zpci_airq);
 	kfree(bucket);
 }
 
-void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m)
-{
-	if (!zdev)
-		return;
-
-	seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries));
-	seq_printf(m, "aibv[0]:%016lx  aibv[1]:%016lx  aisb:%016lx\n",
-		   get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb);
-}
-
 static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
 						unsigned long flags, int domain)
 {
@@ -810,6 +810,16 @@ int pcibios_add_device(struct pci_dev *pdev)
 	return 0;
 }
 
+void pcibios_release_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	zpci_unmap_resources(zdev);
+	zpci_fmb_disable_device(zdev);
+	zpci_debug_exit_device(zdev);
+	zdev->pdev = NULL;
+}
+
 static int zpci_scan_bus(struct zpci_dev *zdev)
 {
 	struct resource *res;
@@ -950,25 +960,6 @@ void zpci_stop_device(struct zpci_dev *zdev)
 }
 EXPORT_SYMBOL_GPL(zpci_stop_device);
 
-int zpci_scan_device(struct zpci_dev *zdev)
-{
-	zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN);
-	if (!zdev->pdev) {
-		pr_err("pci_scan_single_device failed for fid: 0x%x\n",
-			zdev->fid);
-		goto out;
-	}
-
-	pci_bus_add_devices(zdev->bus);
-
-	return 0;
-out:
-	zpci_dma_exit_device(zdev);
-	clp_disable_fh(zdev);
-	return -EIO;
-}
-EXPORT_SYMBOL_GPL(zpci_scan_device);
-
 static inline int barsize(u8 size)
 {
 	return (size) ? (1 << size) >> 10 : 0;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd34359d154..2e9539625d9 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev)
 	if (!zdev_enabled(zdev))
 		return 0;
 
-	dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh);
 	rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
 	if (!rc)
 		/* Success -> store disabled handle in zdev */
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 771b82359af..75c69b402e0 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = {
 	.release = single_release,
 };
 
-static int pci_debug_show(struct seq_file *m, void *v)
-{
-	struct zpci_dev *zdev = m->private;
-
-	zpci_debug_info(zdev, m);
-	return 0;
-}
-
-static int pci_debug_seq_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, pci_debug_show,
-			   file_inode(filp)->i_private);
-}
-
-static const struct file_operations debugfs_pci_debug_fops = {
-	.open	 = pci_debug_seq_open,
-	.read	 = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
 void zpci_debug_init_device(struct zpci_dev *zdev)
 {
 	zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
@@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev)
 				&debugfs_pci_perf_fops);
 	if (IS_ERR(zdev->debugfs_perf))
 		zdev->debugfs_perf = NULL;
-
-	zdev->debugfs_debug = debugfs_create_file("debug",
-				S_IFREG | S_IRUGO | S_IWUSR,
-				zdev->debugfs_dev, zdev,
-				&debugfs_pci_debug_fops);
-	if (IS_ERR(zdev->debugfs_debug))
-		zdev->debugfs_debug = NULL;
 }
 
 void zpci_debug_exit_device(struct zpci_dev *zdev)
 {
 	debugfs_remove(zdev->debugfs_perf);
-	debugfs_remove(zdev->debugfs_debug);
 	debugfs_remove(zdev->debugfs_dev);
 }
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f8e69d5bc0a..a2343c1f6e0 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -263,7 +263,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
@@ -304,7 +304,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
 				 struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 	unsigned long iommu_page_index;
 	int npages;
 
@@ -323,7 +323,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 	struct page *page;
 	unsigned long pa;
 	dma_addr_t map;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a42cce69d0a..e99a2557f18 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -15,40 +15,36 @@
 static ssize_t show_fid(struct device *dev, struct device_attribute *attr,
 			char *buf)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 
-	sprintf(buf, "0x%08x\n", zdev->fid);
-	return strlen(buf);
+	return sprintf(buf, "0x%08x\n", zdev->fid);
 }
 static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL);
 
 static ssize_t show_fh(struct device *dev, struct device_attribute *attr,
 		       char *buf)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 
-	sprintf(buf, "0x%08x\n", zdev->fh);
-	return strlen(buf);
+	return sprintf(buf, "0x%08x\n", zdev->fh);
 }
 static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL);
 
 static ssize_t show_pchid(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 
-	sprintf(buf, "0x%04x\n", zdev->pchid);
-	return strlen(buf);
+	return sprintf(buf, "0x%04x\n", zdev->pchid);
 }
 static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL);
 
 static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
-	struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev));
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
 
-	sprintf(buf, "0x%02x\n", zdev->pfgid);
-	return strlen(buf);
+	return sprintf(buf, "0x%02x\n", zdev->pfgid);
 }
 static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
 
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index cebaff8069a..e1c7bb999b0 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -3,3 +3,4 @@ header-y +=
 
 generic-y += clkdev.h
 generic-y += trace_clock.h
+generic-y += xor.h
diff --git a/arch/score/include/asm/dma-mapping.h b/arch/score/include/asm/dma-mapping.h
deleted file mode 100644
index f9c0193c7a5..00000000000
--- a/arch/score/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_DMA_MAPPING_H
-#define _ASM_SCORE_DMA_MAPPING_H
-
-#include <asm-generic/dma-mapping-broken.h>
-
-#endif /* _ASM_SCORE_DMA_MAPPING_H */
diff --git a/arch/score/kernel/vmlinux.lds.S b/arch/score/kernel/vmlinux.lds.S
index eebcbaa4e97..7274b5c4287 100644
--- a/arch/score/kernel/vmlinux.lds.S
+++ b/arch/score/kernel/vmlinux.lds.S
@@ -49,6 +49,7 @@ SECTIONS
 	}
 
 	. = ALIGN(16);
+	_sdata =  .;			/* Start of data section */
 	RODATA
 
 	EXCEPTION_TABLE(16)
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 0940682ab38..9fbce49ad3b 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -75,40 +75,19 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-	unsigned long codesize, reservedpages, datasize, initsize;
-	unsigned long tmp, ram = 0;
-
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
-	totalram_pages += free_all_bootmem();
+	free_all_bootmem();
 	setup_zero_page();	/* Setup zeroed pages. */
-	reservedpages = 0;
-
-	for (tmp = 0; tmp < max_low_pfn; tmp++)
-		if (page_is_ram(tmp)) {
-			ram++;
-			if (PageReserved(pfn_to_page(tmp)))
-				reservedpages++;
-		}
-
-	num_physpages = ram;
-	codesize = (unsigned long) &_etext - (unsigned long) &_text;
-	datasize = (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
-			"%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
-			(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-			ram << (PAGE_SHIFT-10), codesize >> 10,
-			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
-			initsize >> 10,
-			totalhigh_pages << (PAGE_SHIFT-10));
+
+	mem_init_print_info(NULL);
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h
index 090358a7e1b..dad29b687bd 100644
--- a/arch/sh/include/asm/mutex-llsc.h
+++ b/arch/sh/include/asm/mutex-llsc.h
@@ -37,7 +37,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 }
 
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
 {
 	int __done, __res;
 
@@ -51,7 +51,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 		: "t");
 
 	if (unlikely(!__done || __res != 0))
-		__res = fail_fn(count);
+		__res = -1;
 
 	return __res;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 20f9ead650d..33890fd267c 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -407,30 +407,16 @@ unsigned int mem_init_done = 0;
 
 void __init mem_init(void)
 {
-	int codesize, datasize, initsize;
-	int nid;
+	pg_data_t *pgdat;
 
 	iommu_init();
 
-	num_physpages = 0;
 	high_memory = NULL;
+	for_each_online_pgdat(pgdat)
+		high_memory = max_t(void *, high_memory,
+				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		void *node_high_memory;
-
-		num_physpages += pgdat->node_present_pages;
-
-		if (pgdat->node_spanned_pages)
-			totalram_pages += free_all_bootmem_node(pgdat);
-
-
-		node_high_memory = (void *)__va((pgdat->node_start_pfn +
-						 pgdat->node_spanned_pages) <<
-						 PAGE_SHIFT);
-		if (node_high_memory > high_memory)
-			high_memory = node_high_memory;
-	}
+	free_all_bootmem();
 
 	/* Set this up early, so we can take care of the zero page */
 	cpu_cache_init();
@@ -441,19 +427,8 @@ void __init mem_init(void)
 
 	vsyscall_init();
 
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
-	       "%dk data, %dk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		datasize >> 10,
-		initsize >> 10);
-
-	printk(KERN_INFO "virtual kernel memory layout:\n"
+	mem_init_print_info(NULL);
+	pr_info("virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
 		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
@@ -499,13 +474,13 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
diff --git a/arch/sparc/include/uapi/asm/fcntl.h b/arch/sparc/include/uapi/asm/fcntl.h
index d0b83f66f35..d73e5e008b0 100644
--- a/arch/sparc/include/uapi/asm/fcntl.h
+++ b/arch/sparc/include/uapi/asm/fcntl.h
@@ -35,6 +35,7 @@
 #define O_SYNC		(__O_SYNC|O_DSYNC)
 
 #define O_PATH		0x1000000
+#define O_TMPFILE	0x2000000
 
 #define F_GETOWN	5	/*  for sockets. */
 #define F_SETOWN	6	/*  for sockets. */
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 6cfc1b09ec2..d7aa524b728 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -254,15 +254,12 @@ void __init leon_smp_done(void)
 	/* Free unneeded trap tables */
 	if (!cpu_present(1)) {
 		free_reserved_page(virt_to_page(&trapbase_cpu1));
-		num_physpages++;
 	}
 	if (!cpu_present(2)) {
 		free_reserved_page(virt_to_page(&trapbase_cpu2));
-		num_physpages++;
 	}
 	if (!cpu_present(3)) {
 		free_reserved_page(virt_to_page(&trapbase_cpu3));
-		num_physpages++;
 	}
 	/* Ok, they are spinning and ready to go. */
 	smp_processors_ready = 1;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 2031c65fd4e..bc4d3f5d2e5 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -254,7 +254,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	const char *type;
 	u32 class;
 
-	dev = alloc_pci_dev();
+	dev = pci_alloc_dev(bus);
 	if (!dev)
 		return NULL;
 
@@ -281,7 +281,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 		printk("    create device, devfn: %x, type: %s\n",
 		       devfn, type);
 
-	dev->bus = bus;
 	dev->sysdata = node;
 	dev->dev.parent = bus->bridge;
 	dev->dev.bus = &pci_bus_type;
@@ -327,7 +326,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
 		pci_set_master(dev);
 
-	dev->current_state = 4;		/* unknown power state */
+	dev->current_state = PCI_UNKNOWN;	/* unknown power state */
 	dev->error_state = pci_channel_io_normal;
 	dev->dma_mask = 0xffffffff;
 
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index af472cf7c69..db698708280 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -288,10 +288,6 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
 
 void __init mem_init(void)
 {
-	int codepages = 0;
-	int datapages = 0;
-	int initpages = 0; 
-	int reservedpages = 0;
 	int i;
 
 	if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
@@ -323,15 +319,12 @@ void __init mem_init(void)
 
 	max_mapnr = last_valid_pfn - pfn_base;
 	high_memory = __va(max_low_pfn << PAGE_SHIFT);
-
-	totalram_pages = free_all_bootmem();
+	free_all_bootmem();
 
 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
 		unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
 		unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
 
-		num_physpages += sp_banks[i].num_bytes >> PAGE_SHIFT;
-
 		if (end_pfn <= highstart_pfn)
 			continue;
 
@@ -341,39 +334,19 @@ void __init mem_init(void)
 		map_high_region(start_pfn, end_pfn);
 	}
 	
-	codepages = (((unsigned long) &_etext) - ((unsigned long)&_start));
-	codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
-	datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext));
-	datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT;
-	initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin));
-	initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT;
-
-	/* Ignore memory holes for the purpose of counting reserved pages */
-	for (i=0; i < max_low_pfn; i++)
-		if (test_bit(i >> (20 - PAGE_SHIFT), sparc_valid_addr_bitmap)
-		    && PageReserved(pfn_to_page(i)))
-			reservedpages++;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       num_physpages << (PAGE_SHIFT - 10),
-	       codepages << (PAGE_SHIFT-10),
-	       reservedpages << (PAGE_SHIFT - 10),
-	       datapages << (PAGE_SHIFT-10), 
-	       initpages << (PAGE_SHIFT-10),
-	       totalhigh_pages << (PAGE_SHIFT-10));
+	mem_init_print_info(NULL);
 }
 
 void free_initmem (void)
 {
-	num_physpages += free_initmem_default(POISON_FREE_INITMEM);
+	free_initmem_default(POISON_FREE_INITMEM);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM,
-					    "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 04fd55a6e46..a9c42a7ffb6 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2045,7 +2045,6 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
-	unsigned long codepages, datapages, initpages;
 	unsigned long addr, last;
 
 	addr = PAGE_OFFSET + kern_base;
@@ -2061,12 +2060,7 @@ void __init mem_init(void)
 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
 	register_page_bootmem_info();
-	totalram_pages = free_all_bootmem();
-
-	/* We subtract one to account for the mem_map_zero page
-	 * allocated below.
-	 */
-	num_physpages = totalram_pages - 1;
+	free_all_bootmem();
 
 	/*
 	 * Set up the zero page, mark it reserved, so that page count
@@ -2079,19 +2073,7 @@ void __init mem_init(void)
 	}
 	mark_page_reserved(mem_map_zero);
 
-	codepages = (((unsigned long) _etext) - ((unsigned long) _start));
-	codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
-	datapages = (((unsigned long) _edata) - ((unsigned long) _etext));
-	datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT;
-	initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin));
-	initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT;
-
-	printk("Memory: %luk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       codepages << (PAGE_SHIFT-10),
-	       datapages << (PAGE_SHIFT-10), 
-	       initpages << (PAGE_SHIFT-10), 
-	       PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT));
+	mem_init_print_info(NULL);
 
 	if (tlb_type == cheetah || tlb_type == cheetah_plus)
 		cheetah_ecache_flush_init();
@@ -2131,8 +2113,8 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM,
-					    "initrd");
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
 }
 #endif
 
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 2b70dfb1442..b3f104953da 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -225,7 +225,7 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags);
 
 /*
  * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
  */
 #define thread_saved_pc(t)   ((t)->thread.pc)
 
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index d062d463fca..7d8a935a923 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -34,7 +34,7 @@ extern char __sys_cmpxchg_grab_lock[];
 extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
 #endif
 
-/* Handle the discontiguity between _sdata and _stext. */
+/* Handle the discontiguity between _sdata and _text. */
 static inline int arch_is_kernel_data(unsigned long addr)
 {
 	return addr >= (unsigned long)_sdata &&
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index 8a082bc6bca..e4d44bd7df2 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -442,7 +442,7 @@ extern unsigned long __copy_in_user_inatomic(
 static inline unsigned long __must_check
 __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	might_sleep();
+	might_fault();
 	return __copy_in_user_inatomic(to, from, n);
 }
 
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 7a5aa1a7864..68b542677f6 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -307,8 +307,8 @@ static void __cpuinit store_permanent_mappings(void)
 		hv_store_mapping(addr, pages << PAGE_SHIFT, pa);
 	}
 
-	hv_store_mapping((HV_VirtAddr)_stext,
-			 (uint32_t)(_einittext - _stext), 0);
+	hv_store_mapping((HV_VirtAddr)_text,
+			 (uint32_t)(_einittext - _text), 0);
 }
 
 /*
@@ -329,6 +329,7 @@ static void __init setup_memory(void)
 #if defined(CONFIG_HIGHMEM) || defined(__tilegx__)
 	long lowmem_pages;
 #endif
+	unsigned long physpages = 0;
 
 	/* We are using a char to hold the cpu_2_node[] mapping */
 	BUILD_BUG_ON(MAX_NUMNODES > 127);
@@ -388,8 +389,8 @@ static void __init setup_memory(void)
 				continue;
 			}
 		}
-		if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) {
-			int max_size = maxmem_pfn - num_physpages;
+		if (physpages + PFN_DOWN(range.size) > maxmem_pfn) {
+			int max_size = maxmem_pfn - physpages;
 			if (max_size > 0) {
 				pr_err("Maxmem reduced node %d to %d pages\n",
 				       i, max_size);
@@ -446,7 +447,7 @@ static void __init setup_memory(void)
 		node_start_pfn[i] = start;
 		node_end_pfn[i] = end;
 		node_controller[i] = range.controller;
-		num_physpages += size;
+		physpages += size;
 		max_pfn = end;
 
 		/* Mark node as online */
@@ -465,7 +466,7 @@ static void __init setup_memory(void)
 	 * we're willing to use at 8 million pages (32GB of 4KB pages).
 	 */
 	cap = 8 * 1024 * 1024;  /* 8 million pages */
-	if (num_physpages > cap) {
+	if (physpages > cap) {
 		int num_nodes = num_online_nodes();
 		int cap_each = cap / num_nodes;
 		unsigned long dropped_pages = 0;
@@ -476,10 +477,10 @@ static void __init setup_memory(void)
 				node_end_pfn[i] = node_start_pfn[i] + cap_each;
 			}
 		}
-		num_physpages -= dropped_pages;
+		physpages -= dropped_pages;
 		pr_warning("Only using %ldMB memory;"
 		       " ignoring %ldMB.\n",
-		       num_physpages >> (20 - PAGE_SHIFT),
+		       physpages >> (20 - PAGE_SHIFT),
 		       dropped_pages >> (20 - PAGE_SHIFT));
 		pr_warning("Consider using a larger page size.\n");
 	}
@@ -497,7 +498,7 @@ static void __init setup_memory(void)
 
 	lowmem_pages = (mappable_physpages > MAXMEM_PFN) ?
 		MAXMEM_PFN : mappable_physpages;
-	highmem_pages = (long) (num_physpages - lowmem_pages);
+	highmem_pages = (long) (physpages - lowmem_pages);
 
 	pr_notice("%ldMB HIGHMEM available.\n",
 	       pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
@@ -514,7 +515,6 @@ static void __init setup_memory(void)
 		pr_warning("Use a HIGHMEM enabled kernel.\n");
 		max_low_pfn = MAXMEM_PFN;
 		max_pfn = MAXMEM_PFN;
-		num_physpages = MAXMEM_PFN;
 		node_end_pfn[0] = MAXMEM_PFN;
 	} else {
 		pr_notice("%ldMB memory available.\n",
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index ed258b8ae32..af8dfc9665f 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -442,7 +442,7 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
 				regs_to_pt_regs(&regs, pc, lr, sp, r52));
 }
 
-/* This is called only from kernel/sched.c, with esp == NULL */
+/* This is called only from kernel/sched/core.c, with esp == NULL */
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 	struct KBacktraceIterator kbt;
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 631f10de12f..a13ed902afb 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -27,7 +27,6 @@ SECTIONS
   .intrpt1 (LOAD_OFFSET) : AT ( 0 )   /* put at the start of physical memory */
   {
     _text = .;
-    _stext = .;
     *(.intrpt1)
   } :intrpt1 =0
 
@@ -36,6 +35,7 @@ SECTIONS
 
   /* Now the real code */
   . = ALIGN(0x20000);
+  _stext = .;
   .text : AT (ADDR(.text) - LOAD_OFFSET) {
     HEAD_TEXT
     SCHED_TEXT
@@ -58,11 +58,13 @@ SECTIONS
   #define LOAD_OFFSET PAGE_OFFSET
 
   . = ALIGN(PAGE_SIZE);
+  __init_begin = .;
   VMLINUX_SYMBOL(_sinitdata) = .;
   INIT_DATA_SECTION(16) :data =0
   PERCPU_SECTION(L2_CACHE_BYTES)
   . = ALIGN(PAGE_SIZE);
   VMLINUX_SYMBOL(_einitdata) = .;
+  __init_end = .;
 
   _sdata = .;                   /* Start of data section */
 
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 2749515a054..e182958c707 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -562,7 +562,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 			prot = ktext_set_nocache(prot);
 		}
 
-		BUG_ON(address != (unsigned long)_stext);
+		BUG_ON(address != (unsigned long)_text);
 		pte = NULL;
 		for (; address < (unsigned long)_einittext;
 		     pfn++, address += PAGE_SIZE) {
@@ -720,7 +720,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end)
 		}
 		init_page_count(page);
 		__free_pages(page, order);
-		totalram_pages += count;
+		adjust_managed_page_count(page, count);
 
 		page += count;
 		pfn += count;
@@ -821,7 +821,6 @@ static void __init set_max_mapnr_init(void)
 
 void __init mem_init(void)
 {
-	int codesize, datasize, initsize;
 	int i;
 #ifndef __tilegx__
 	void *last;
@@ -846,26 +845,14 @@ void __init mem_init(void)
 	set_max_mapnr_init();
 
 	/* this will put all bootmem onto the freelists */
-	totalram_pages += free_all_bootmem();
+	free_all_bootmem();
 
 #ifndef CONFIG_64BIT
 	/* count all remaining LOWMEM and give all HIGHMEM to page allocator */
 	set_non_bootmem_pages_init();
 #endif
 
-	codesize =  (unsigned long)&_etext - (unsigned long)&_text;
-	datasize =  (unsigned long)&_end - (unsigned long)&_sdata;
-	initsize =  (unsigned long)&_einittext - (unsigned long)&_sinittext;
-	initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata;
-
-	pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		datasize >> 10,
-		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+	mem_init_print_info(NULL);
 
 	/*
 	 * In debug mode, dump some interesting memory mappings.
@@ -1024,16 +1011,13 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
 			pte_clear(&init_mm, addr, ptep);
 			continue;
 		}
-		__ClearPageReserved(page);
-		init_page_count(page);
 		if (pte_huge(*ptep))
 			BUG_ON(!kdata_huge);
 		else
 			set_pte_at(&init_mm, addr, ptep,
 				   pfn_pte(pfn, PAGE_KERNEL));
 		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
-		free_page(addr);
-		totalram_pages++;
+		free_reserved_page(page);
 	}
 	pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
 }
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 4938de5512d..1dd5bd8a8c5 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -57,7 +57,6 @@
 	*(.uml.initcall.init)
 	__uml_initcall_end = .;
   }
-  __init_end = .;
 
   SECURITY_INIT
 
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index fb8fd6fb656..adde088aeef 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -14,8 +14,6 @@ SECTIONS
   __binary_start = .;
   . = ALIGN(4096);		/* Init code and data */
   _text = .;
-  _stext = .;
-  __init_begin = .;
   INIT_TEXT_SECTION(PAGE_SIZE)
 
   . = ALIGN(PAGE_SIZE);
@@ -67,6 +65,7 @@ SECTIONS
   } =0x90909090
   .plt            : { *(.plt) }
   .text           : {
+    _stext = .;
     TEXT_TEXT
     SCHED_TEXT
     LOCK_TEXT
@@ -91,7 +90,9 @@ SECTIONS
 
   #include <asm/common.lds.S>
 
+  __init_begin = .;
   init.data : { INIT_DATA }
+  __init_end = .;
 
   /* Ensure the __preinit_array_start label is properly aligned.  We
      could instead move the label definition inside the section, but
@@ -155,6 +156,7 @@ SECTIONS
    . = ALIGN(32 / 8);
   . = ALIGN(32 / 8);
   }
+   __bss_stop = .;
   _end = .;
   PROVIDE (end = .);
 
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 9df292b270a..7ddb64baf32 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -65,15 +65,13 @@ void __init mem_init(void)
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages = free_all_bootmem();
+	free_all_bootmem();
 	max_low_pfn = totalram_pages;
 #ifdef CONFIG_HIGHMEM
 	setup_highmem(end_iomem, highmem);
 #endif
-	num_physpages = totalram_pages;
 	max_pfn = totalram_pages;
-	printk(KERN_INFO "Memory: %luk available\n",
-	       nr_free_pages() << (PAGE_SHIFT-10));
+	mem_init_print_info(NULL);
 	kmalloc_ok = 1;
 }
 
@@ -244,7 +242,7 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	free_reserved_area(start, end, 0, "initrd");
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 7d101a2a154..0dc4d1c6f98 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -39,7 +39,7 @@ void show_trace(struct task_struct *task, unsigned long * stack)
 static const int kstack_depth_to_print = 24;
 
 /* This recently started being used in arch-independent code too, as in
- * kernel/sched.c.*/
+ * kernel/sched/core.c.*/
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 	unsigned long *stack;
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index ff65fb4f1a9..6899195602b 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -20,13 +20,12 @@ SECTIONS
   . = START + SIZEOF_HEADERS;
 
   _text = .;
-  _stext = .;
-  __init_begin = .;
   INIT_TEXT_SECTION(0)
   . = ALIGN(PAGE_SIZE);
 
   .text      :
   {
+    _stext = .;
     TEXT_TEXT
     SCHED_TEXT
     LOCK_TEXT
@@ -62,7 +61,10 @@ SECTIONS
 
   #include <asm/common.lds.S>
 
+  __init_begin = .;
   init.data : { INIT_DATA }
+  __init_end = .;
+
   .data    :
   {
     INIT_TASK_DATA(KERNEL_STACK_SIZE)
@@ -97,6 +99,7 @@ SECTIONS
   PROVIDE(_bss_start = .);
   SBSS(0)
   BSS(0)
+   __bss_stop = .;
   _end = .;
   PROVIDE (end = .);
 
diff --git a/arch/unicore32/boot/compressed/Makefile b/arch/unicore32/boot/compressed/Makefile
index 950a9afa38f..96494fb646f 100644
--- a/arch/unicore32/boot/compressed/Makefile
+++ b/arch/unicore32/boot/compressed/Makefile
@@ -17,7 +17,7 @@ OBJS		:= misc.o
 
 # font.c and font.o
 CFLAGS_font.o	:= -Dstatic=
-$(obj)/font.c: $(srctree)/drivers/video/console/font_8x8.c
+$(obj)/font.c: $(srctree)/lib/fonts/font_8x8.c
 	$(call cmd,shipped)
 
 # piggy.S and piggy.o
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index 5eddb997def..debafc40200 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -98,12 +98,6 @@
 /*
  * Conversion between a struct page and a physical address.
  *
- * Note: when converting an unknown physical address to a
- * struct page, the resulting pointer must be validated
- * using VALID_PAGE().  It must return an invalid struct page
- * for any physical address not corresponding to a system
- * RAM address.
- *
  *  page_to_pfn(page)	convert a struct page * to a PFN number
  *  pfn_to_page(pfn)	convert a _valid_ PFN number to struct page *
  *
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index ef69c0c8282..374a055a8e6 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -277,11 +277,6 @@ static int __init pci_common_init(void)
 		pci_bus_assign_resources(puv3_bus);
 	}
 
-	/*
-	 * Tell drivers about devices found.
-	 */
-	pci_bus_add_devices(puv3_bus);
-
 	return 0;
 }
 subsys_initcall(pci_common_init);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 63df12d71ce..ae6bc036db9 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -383,59 +383,14 @@ static void __init free_unused_memmap(struct meminfo *mi)
  */
 void __init mem_init(void)
 {
-	unsigned long reserved_pages, free_pages;
-	struct memblock_region *reg;
-	int i;
-
 	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 
 	free_unused_memmap(&meminfo);
 
 	/* this will put all unused low memory onto the freelists */
-	totalram_pages += free_all_bootmem();
-
-	reserved_pages = free_pages = 0;
-
-	for_each_bank(i, &meminfo) {
-		struct membank *bank = &meminfo.bank[i];
-		unsigned int pfn1, pfn2;
-		struct page *page, *end;
-
-		pfn1 = bank_pfn_start(bank);
-		pfn2 = bank_pfn_end(bank);
-
-		page = pfn_to_page(pfn1);
-		end  = pfn_to_page(pfn2 - 1) + 1;
-
-		do {
-			if (PageReserved(page))
-				reserved_pages++;
-			else if (!page_count(page))
-				free_pages++;
-			page++;
-		} while (page < end);
-	}
-
-	/*
-	 * Since our memory may not be contiguous, calculate the
-	 * real number of pages we have in this system
-	 */
-	printk(KERN_INFO "Memory:");
-	num_physpages = 0;
-	for_each_memblock(memory, reg) {
-		unsigned long pages = memblock_region_memory_end_pfn(reg) -
-			memblock_region_memory_base_pfn(reg);
-		num_physpages += pages;
-		printk(" %ldMB", pages >> (20 - PAGE_SHIFT));
-	}
-	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
-
-	printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		free_pages << (PAGE_SHIFT-10),
-		reserved_pages << (PAGE_SHIFT-10),
-		totalhigh_pages << (PAGE_SHIFT-10));
+	free_all_bootmem();
 
+	mem_init_print_info(NULL);
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
 		"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
@@ -464,7 +419,7 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE				> MODULES_VADDR);
 	BUG_ON(TASK_SIZE				> MODULES_VADDR);
 
-	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		/*
 		 * On a machine this small we won't get
 		 * anywhere without overcommit, so turn
@@ -476,7 +431,7 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -486,7 +441,7 @@ static int keep_initrd;
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 	if (!keep_initrd)
-		free_reserved_area(start, end, 0, "initrd");
+		free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 
 static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c49b4dc8ffe..2775023a074 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -102,6 +102,7 @@ config X86
 	select HAVE_ARCH_SECCOMP_FILTER
 	select BUILDTIME_EXTABLE_SORT
 	select GENERIC_CMOS_UPDATE
+	select HAVE_ARCH_SOFT_DIRTY
 	select CLOCKSOURCE_WATCHDOG
 	select GENERIC_CLOCKEVENTS
 	select ARCH_CLOCKSOURCE_DATA if X86_64
@@ -207,6 +208,12 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
+config ARCH_WANT_HUGE_PMD_SHARE
+	def_bool y
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
 config ZONE_DMA32
 	bool
 	default X86_64
@@ -336,6 +343,7 @@ config X86_EXTENDED_PLATFORM
 
 	  If you enable this option then you'll be able to select support
 	  for the following (non-PC) 32 bit x86 platforms:
+		Goldfish (Android emulator)
 		AMD Elan
 		NUMAQ (IBM/Sequent)
 		RDC R-321x SoC
@@ -410,6 +418,7 @@ config X86_UV
 config X86_GOLDFISH
        bool "Goldfish (Virtual Platform)"
        depends on X86_32
+       depends on X86_EXTENDED_PLATFORM
        ---help---
 	 Enable support for the Goldfish virtual platform used primarily
 	 for Android development. Unless you are building for the Android
@@ -1058,8 +1067,16 @@ config MICROCODE_INTEL_LIB
 	depends on MICROCODE_INTEL
 
 config MICROCODE_INTEL_EARLY
+	def_bool n
+
+config MICROCODE_AMD_EARLY
+	def_bool n
+
+config MICROCODE_EARLY
 	bool "Early load microcode"
-	depends on MICROCODE_INTEL && BLK_DEV_INITRD
+	depends on MICROCODE=y && BLK_DEV_INITRD
+	select MICROCODE_INTEL_EARLY if MICROCODE_INTEL
+	select MICROCODE_AMD_EARLY if MICROCODE_AMD
 	default y
 	help
 	  This option provides functionality to read additional microcode data
@@ -1067,10 +1084,6 @@ config MICROCODE_INTEL_EARLY
 	  microcode to CPU's as early as possible. No functional change if no
 	  microcode data is glued to the initrd, therefore it's safe to say Y.
 
-config MICROCODE_EARLY
-	def_bool y
-	depends on MICROCODE_INTEL_EARLY
-
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	---help---
@@ -2246,11 +2259,11 @@ source "drivers/pcmcia/Kconfig"
 source "drivers/pci/hotplug/Kconfig"
 
 config RAPIDIO
-	bool "RapidIO support"
+	tristate "RapidIO support"
 	depends on PCI
 	default n
 	help
-	  If you say Y here, the kernel will include drivers and
+	  If enabled this option will include drivers and the core
 	  infrastructure code to support RapidIO interconnect devices.
 
 source "drivers/rapidio/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c198b7e13e7..c963881de0d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -122,7 +122,6 @@ config DEBUG_NX_TEST
 config DOUBLEFAULT
 	default y
 	bool "Enable doublefault exception handler" if EXPERT
-	depends on X86_32
 	---help---
 	  This option allows trapping of rare doublefault exceptions that
 	  would otherwise cause a system to silently reboot. Disabling this
@@ -304,4 +303,14 @@ config DEBUG_NMI_SELFTEST
 
 	  If unsure, say N.
 
+config X86_DEBUG_STATIC_CPU_HAS
+	bool "Debug alternatives"
+	depends on DEBUG_KERNEL
+	---help---
+	  This option causes additional code to be generated which
+	  fails if static_cpu_has() is used before alternatives have
+	  run.
+
+	  If unsure, say N.
+
 endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5c477260294..07639c656fc 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,6 +220,12 @@ archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 	$(Q)$(MAKE) $(clean)=arch/x86/tools
 
+PHONY += kvmconfig
+kvmconfig:
+	$(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
+	$(Q)yes "" | $(MAKE) oldconfig
+
 define archhelp
   echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
   echo  '  install      - Install kernel using'
@@ -233,4 +239,5 @@ define archhelp
   echo  '                  bzdisk/fdimage*/isoimage also accept:'
   echo  '                  FDARGS="..."  arguments for the booted kernel'
   echo  '                  FDINITRD=file initrd for the booted kernel'
+  echo  '  kvmconfig	- Enable additional options for guest kernel support'
 endef
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c205035a6b9..d606463aa6d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -992,18 +992,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
 	efi_memory_desc_t *mem_map;
 	efi_status_t status;
 	__u32 desc_version;
+	bool called_exit = false;
 	u8 nr_entries;
 	int i;
 
 	size = sizeof(*mem_map) * 32;
 
 again:
-	size += sizeof(*mem_map);
+	size += sizeof(*mem_map) * 2;
 	_size = size;
 	status = low_alloc(size, 1, (unsigned long *)&mem_map);
 	if (status != EFI_SUCCESS)
 		return status;
 
+get_map:
 	status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
 				mem_map, &key, &desc_size, &desc_version);
 	if (status == EFI_BUFFER_TOO_SMALL) {
@@ -1029,8 +1031,20 @@ again:
 	/* Might as well exit boot services now */
 	status = efi_call_phys2(sys_table->boottime->exit_boot_services,
 				handle, key);
-	if (status != EFI_SUCCESS)
-		goto free_mem_map;
+	if (status != EFI_SUCCESS) {
+		/*
+		 * ExitBootServices() will fail if any of the event
+		 * handlers change the memory map. In which case, we
+		 * must be prepared to retry, but only once so that
+		 * we're guaranteed to exit on repeated failures instead
+		 * of spinning forever.
+		 */
+		if (called_exit)
+			goto free_mem_map;
+
+		called_exit = true;
+		goto get_map;
+	}
 
 	/* Historic? */
 	boot_params->alt_mem_k = 32 * 1024;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 16f24e6dad7..06e71c2c16b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -27,8 +27,6 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/segment.h>
-#include <asm/pgtable_types.h>
-#include <asm/page_types.h>
 #include <asm/boot.h>
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 94c54465002..c941d6a8887 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -243,6 +243,7 @@ static void parse_zoffset(char *fname)
 	c = fread(buf, 1, sizeof(buf) - 1, file);
 	if (ferror(file))
 		die("read-error on `zoffset.h'");
+	fclose(file);
 	buf[c] = 0;
 
 	p = (char *)buf;
diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config
new file mode 100644
index 00000000000..f9affcc3b9f
--- /dev/null
+++ b/arch/x86/configs/kvm_guest.config
@@ -0,0 +1,28 @@
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_BLK_DEV=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_TTY=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_BINFMT_ELF=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_9P_FS=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cf1a471a18a..bccfca68430 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,8 +34,6 @@
 #include <asm/sys_ia32.h>
 #include <asm/smap.h>
 
-#define FIX_EFLAGS	__FIX_EFLAGS
-
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
 	int err = 0;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index b31bf97775f..2dfac58f3b1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -111,7 +111,7 @@ static inline void acpi_disable_pci(void)
 }
 
 /* Low-level suspend routine. */
-extern int acpi_suspend_lowlevel(void);
+extern int (*acpi_suspend_lowlevel)(void);
 
 /* Physical address to resume after wakeup */
 #define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 33880342223..f8119b582c3 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -12,6 +12,7 @@
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/msr.h>
+#include <asm/idle.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
@@ -687,5 +688,31 @@ extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
+extern void irq_enter(void);
+extern void irq_exit(void);
+
+static inline void entering_irq(void)
+{
+	irq_enter();
+	exit_idle();
+}
+
+static inline void entering_ack_irq(void)
+{
+	ack_APIC_irq();
+	entering_irq();
+}
+
+static inline void exiting_irq(void)
+{
+	irq_exit();
+}
+
+static inline void exiting_ack_irq(void)
+{
+	irq_exit();
+	/* Ack only at the end to avoid potential reentry */
+	ack_APIC_irq();
+}
 
 #endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e99ac27f95b..47538a61c91 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -92,7 +92,7 @@
 #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
 #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
 #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
-					  /* 21 available, was AMD_C1E */
+#define X86_FEATURE_ALWAYS	(3*32+21) /* "" Always-present feature */
 #define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 #define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
@@ -356,15 +356,36 @@ extern const char * const x86_power_flags[32];
 #endif /* CONFIG_X86_64 */
 
 #if __GNUC__ >= 4
+extern void warn_pre_alternatives(void);
+extern bool __static_cpu_has_safe(u16 bit);
+
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
  * These are only valid after alternatives have run, but will statically
  * patch the target code for additional performance.
- *
  */
 static __always_inline __pure bool __static_cpu_has(u16 bit)
 {
 #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+		/*
+		 * Catch too early usage of this before alternatives
+		 * have run.
+		 */
+		asm goto("1: jmp %l[t_warn]\n"
+			 "2:\n"
+			 ".section .altinstructions,\"a\"\n"
+			 " .long 1b - .\n"
+			 " .long 0\n"		/* no replacement */
+			 " .word %P0\n"		/* 1: do replace */
+			 " .byte 2b - 1b\n"	/* source len */
+			 " .byte 0\n"		/* replacement len */
+			 ".previous\n"
+			 /* skipping size check since replacement size = 0 */
+			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
+#endif
+
 		asm goto("1: jmp %l[t_no]\n"
 			 "2:\n"
 			 ".section .altinstructions,\"a\"\n"
@@ -379,7 +400,13 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		return true;
 	t_no:
 		return false;
-#else
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+	t_warn:
+		warn_pre_alternatives();
+		return false;
+#endif
+#else /* GCC_VERSION >= 40500 */
 		u8 flag;
 		/* Open-coded due to __stringify() in ALTERNATIVE() */
 		asm volatile("1: movb $0,%0\n"
@@ -411,11 +438,94 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 		__static_cpu_has(bit) :				\
 		boot_cpu_has(bit)				\
 )
+
+static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+{
+#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
+/*
+ * We need to spell the jumps to the compiler because, depending on the offset,
+ * the replacement jump can be bigger than the original jump, and this we cannot
+ * have. Thus, we force the jump to the widest, 4-byte, signed relative
+ * offset even though the last would often fit in less bytes.
+ */
+		asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+			 "2:\n"
+			 ".section .altinstructions,\"a\"\n"
+			 " .long 1b - .\n"		/* src offset */
+			 " .long 3f - .\n"		/* repl offset */
+			 " .word %P1\n"			/* always replace */
+			 " .byte 2b - 1b\n"		/* src len */
+			 " .byte 4f - 3f\n"		/* repl len */
+			 ".previous\n"
+			 ".section .altinstr_replacement,\"ax\"\n"
+			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
+			 "4:\n"
+			 ".previous\n"
+			 ".section .altinstructions,\"a\"\n"
+			 " .long 1b - .\n"		/* src offset */
+			 " .long 0\n"			/* no replacement */
+			 " .word %P0\n"			/* feature bit */
+			 " .byte 2b - 1b\n"		/* src len */
+			 " .byte 0\n"			/* repl len */
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
+			 : : t_dynamic, t_no);
+		return true;
+	t_no:
+		return false;
+	t_dynamic:
+		return __static_cpu_has_safe(bit);
+#else /* GCC_VERSION >= 40500 */
+		u8 flag;
+		/* Open-coded due to __stringify() in ALTERNATIVE() */
+		asm volatile("1: movb $2,%0\n"
+			     "2:\n"
+			     ".section .altinstructions,\"a\"\n"
+			     " .long 1b - .\n"		/* src offset */
+			     " .long 3f - .\n"		/* repl offset */
+			     " .word %P2\n"		/* always replace */
+			     " .byte 2b - 1b\n"		/* source len */
+			     " .byte 4f - 3f\n"		/* replacement len */
+			     ".previous\n"
+			     ".section .discard,\"aw\",@progbits\n"
+			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
+			     ".previous\n"
+			     ".section .altinstr_replacement,\"ax\"\n"
+			     "3: movb $0,%0\n"
+			     "4:\n"
+			     ".previous\n"
+			     ".section .altinstructions,\"a\"\n"
+			     " .long 1b - .\n"		/* src offset */
+			     " .long 5f - .\n"		/* repl offset */
+			     " .word %P1\n"		/* feature bit */
+			     " .byte 4b - 3b\n"		/* src len */
+			     " .byte 6f - 5f\n"		/* repl len */
+			     ".previous\n"
+			     ".section .discard,\"aw\",@progbits\n"
+			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
+			     ".previous\n"
+			     ".section .altinstr_replacement,\"ax\"\n"
+			     "5: movb $1,%0\n"
+			     "6:\n"
+			     ".previous\n"
+			     : "=qm" (flag)
+			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
+		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
+#endif
+}
+
+#define static_cpu_has_safe(bit)				\
+(								\
+	__builtin_constant_p(boot_cpu_has(bit)) ?		\
+		boot_cpu_has(bit) :				\
+		_static_cpu_has_safe(bit)			\
+)
 #else
 /*
  * gcc 3.x is too stupid to do the static test; fall back to dynamic.
  */
-#define static_cpu_has(bit) boot_cpu_has(bit)
+#define static_cpu_has(bit)		boot_cpu_has(bit)
+#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)	cpu_has(c, (bit))
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 8bf1c06070d..b90e5dfeee4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,8 +36,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
-extern struct desc_ptr nmi_idt_descr;
-extern gate_desc nmi_idt_table[];
+extern struct desc_ptr debug_idt_descr;
+extern gate_desc debug_idt_table[];
 
 struct gdt_page {
 	struct desc_struct gdt[GDT_ENTRIES];
@@ -316,7 +316,20 @@ static inline void set_nmi_gate(int gate, void *addr)
 	gate_desc s;
 
 	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
-	write_idt_entry(nmi_idt_table, gate, &s);
+	write_idt_entry(debug_idt_table, gate, &s);
+}
+#endif
+
+#ifdef CONFIG_TRACING
+extern struct desc_ptr trace_idt_descr;
+extern gate_desc trace_idt_table[];
+static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
+{
+	write_idt_entry(trace_idt_table, entry, gate);
+}
+#else
+static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
+{
 }
 #endif
 
@@ -331,6 +344,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
 	 * setup time
 	 */
 	write_idt_entry(idt_table, gate, &s);
+	write_trace_idt_entry(gate, &s);
 }
 
 /*
@@ -360,12 +374,39 @@ static inline void alloc_system_vector(int vector)
 	}
 }
 
-static inline void alloc_intr_gate(unsigned int n, void *addr)
+#ifdef CONFIG_TRACING
+static inline void trace_set_intr_gate(unsigned int gate, void *addr)
+{
+	gate_desc s;
+
+	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
+	write_idt_entry(trace_idt_table, gate, &s);
+}
+
+static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
+{
+	trace_set_intr_gate(n, addr);
+}
+#else
+static inline void trace_set_intr_gate(unsigned int gate, void *addr)
+{
+}
+
+#define __trace_alloc_intr_gate(n, addr)
+#endif
+
+static inline void __alloc_intr_gate(unsigned int n, void *addr)
 {
-	alloc_system_vector(n);
 	set_intr_gate(n, addr);
 }
 
+#define alloc_intr_gate(n, addr)				\
+	do {							\
+		alloc_system_vector(n);				\
+		__alloc_intr_gate(n, addr);			\
+		__trace_alloc_intr_gate(n, trace_##addr);	\
+	} while (0)
+
 /*
  * This routine sets up an interrupt gate at directory privilege level 3.
  */
@@ -405,4 +446,70 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 }
 
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(u32, debug_idt_ctr);
+static inline bool is_debug_idt_enabled(void)
+{
+	if (this_cpu_read(debug_idt_ctr))
+		return true;
+
+	return false;
+}
+
+static inline void load_debug_idt(void)
+{
+	load_idt((const struct desc_ptr *)&debug_idt_descr);
+}
+#else
+static inline bool is_debug_idt_enabled(void)
+{
+	return false;
+}
+
+static inline void load_debug_idt(void)
+{
+}
+#endif
+
+#ifdef CONFIG_TRACING
+extern atomic_t trace_idt_ctr;
+static inline bool is_trace_idt_enabled(void)
+{
+	if (atomic_read(&trace_idt_ctr))
+		return true;
+
+	return false;
+}
+
+static inline void load_trace_idt(void)
+{
+	load_idt((const struct desc_ptr *)&trace_idt_descr);
+}
+#else
+static inline bool is_trace_idt_enabled(void)
+{
+	return false;
+}
+
+static inline void load_trace_idt(void)
+{
+}
+#endif
+
+/*
+ * The load_current_idt() must be called with interrupts disabled
+ * to avoid races. That way the IDT will always be set back to the expected
+ * descriptor. It's also called when a CPU is being initialized, and
+ * that doesn't need to disable interrupts, as nothing should be
+ * bothering the CPU then.
+ */
+static inline void load_current_idt(void)
+{
+	if (is_debug_idt_enabled())
+		load_debug_idt();
+	else if (is_trace_idt_enabled())
+		load_trace_idt();
+	else
+		load_idt((const struct desc_ptr *)&idt_descr);
+}
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f30c72..0062a012504 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -52,40 +52,40 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 		     u64 arg4, u64 arg5, u64 arg6);
 
 #define efi_call_phys0(f)			\
-	efi_call0((void *)(f))
+	efi_call0((f))
 #define efi_call_phys1(f, a1)			\
-	efi_call1((void *)(f), (u64)(a1))
+	efi_call1((f), (u64)(a1))
 #define efi_call_phys2(f, a1, a2)			\
-	efi_call2((void *)(f), (u64)(a1), (u64)(a2))
+	efi_call2((f), (u64)(a1), (u64)(a2))
 #define efi_call_phys3(f, a1, a2, a3)				\
-	efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3))
+	efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3))
 #define efi_call_phys4(f, a1, a2, a3, a4)				\
-	efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+	efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4))
 #define efi_call_phys5(f, a1, a2, a3, a4, a5)				\
-	efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+	efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5))
 #define efi_call_phys6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
+	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
 #define efi_call_virt0(f)				\
-	efi_call0((void *)(efi.systab->runtime->f))
+	efi_call0((efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)					\
-	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+	efi_call1((efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)					\
-	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)					\
-	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 9bd4ecac72b..dc5fa661465 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,14 +13,16 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
-BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
+BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
+		 smp_irq_move_cleanup_interrupt)
+BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
 #endif
 
 BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
 #ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
+		 smp_kvm_posted_intr_ipi)
 #endif
 
 /*
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0dc7d9e21c3..e846225265e 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -81,11 +81,11 @@ enum fixed_addresses {
 			    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
 	VVAR_PAGE,
 	VSYSCALL_HPET,
-#endif
 #ifdef CONFIG_PARAVIRT_CLOCK
 	PVCLOCK_FIXMAP_BEGIN,
 	PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
 #endif
+#endif
 	FIX_DBGP_BASE,
 	FIX_EARLYCON_MEM_BASE,
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e25cc33ec54..4d0bda7b11e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -62,10 +62,8 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
 #define xstateregs_active	fpregs_active
 
 #ifdef CONFIG_MATH_EMULATION
-# define HAVE_HWFP		(boot_cpu_data.hard_math)
 extern void finit_soft_fpu(struct i387_soft_struct *soft);
 #else
-# define HAVE_HWFP		1
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
@@ -345,7 +343,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk)
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-	if (!use_eager_fpu())
+	if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU))
 		clts();
 	__thread_set_has_fpu(tsk);
 }
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1da97efad08..e4ac559c4a2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -77,6 +77,23 @@ extern void threshold_interrupt(void);
 extern void call_function_interrupt(void);
 extern void call_function_single_interrupt(void);
 
+#ifdef CONFIG_TRACING
+/* Interrupt handlers registered during init_IRQ */
+extern void trace_apic_timer_interrupt(void);
+extern void trace_x86_platform_ipi(void);
+extern void trace_error_interrupt(void);
+extern void trace_irq_work_interrupt(void);
+extern void trace_spurious_interrupt(void);
+extern void trace_thermal_interrupt(void);
+extern void trace_reschedule_interrupt(void);
+extern void trace_threshold_interrupt(void);
+extern void trace_call_function_interrupt(void);
+extern void trace_call_function_single_interrupt(void);
+#define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
+#define trace_reboot_interrupt  reboot_interrupt
+#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
+#endif /* CONFIG_TRACING */
+
 /* IOAPIC */
 #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
 extern unsigned long io_apic_irqs;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767..f87f7fcefa0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -59,7 +59,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
-			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
+			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -222,14 +222,22 @@ struct kvm_mmu_page {
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
 	unsigned long parent_ptes;	/* Reverse mapping for parent_pte */
+
+	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
+	unsigned long mmu_valid_gen;
+
 	DECLARE_BITMAP(unsync_child_bitmap, 512);
 
 #ifdef CONFIG_X86_32
+	/*
+	 * Used out of the mmu-lock to avoid reading spte values while an
+	 * update is in progress; see the comments in __get_spte_lockless().
+	 */
 	int clear_spte_count;
 #endif
 
+	/* Number of writes since the last time traversal visited this page.  */
 	int write_flooding_count;
-	bool mmio_cached;
 };
 
 struct kvm_pio_request {
@@ -529,11 +537,14 @@ struct kvm_arch {
 	unsigned int n_requested_mmu_pages;
 	unsigned int n_max_mmu_pages;
 	unsigned int indirect_shadow_pages;
+	unsigned long mmu_valid_gen;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
+	struct list_head zapped_obsolete_pages;
+
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	int iommu_flags;
@@ -769,7 +780,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fa5f71e021d..6b52980c29c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -61,7 +61,7 @@
 #define MCJ_CTX_IRQ		0x2  /* inject context: IRQ */
 #define MCJ_NMI_BROADCAST	0x4  /* do NMI broadcasting */
 #define MCJ_EXCEPTION		0x8  /* raise as exception */
-#define MCJ_IRQ_BRAODCAST	0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BROADCAST	0x10 /* do IRQ broadcasting */
 
 #define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */
 
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
new file mode 100644
index 00000000000..c6b043f4027
--- /dev/null
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -0,0 +1,78 @@
+#ifndef _ASM_X86_MICROCODE_AMD_H
+#define _ASM_X86_MICROCODE_AMD_H
+
+#include <asm/microcode.h>
+
+#define UCODE_MAGIC			0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE	0x00000000
+#define UCODE_UCODE_TYPE		0x00000001
+
+#define SECTION_HDR_SIZE		8
+#define CONTAINER_HDR_SZ		12
+
+struct equiv_cpu_entry {
+	u32	installed_cpu;
+	u32	fixed_errata_mask;
+	u32	fixed_errata_compare;
+	u16	equiv_cpu;
+	u16	res;
+} __attribute__((packed));
+
+struct microcode_header_amd {
+	u32	data_code;
+	u32	patch_id;
+	u16	mc_patch_data_id;
+	u8	mc_patch_data_len;
+	u8	init_flag;
+	u32	mc_patch_data_checksum;
+	u32	nb_dev_id;
+	u32	sb_dev_id;
+	u16	processor_rev_id;
+	u8	nb_rev_id;
+	u8	sb_rev_id;
+	u8	bios_api_rev;
+	u8	reserved1[3];
+	u32	match_reg[8];
+} __attribute__((packed));
+
+struct microcode_amd {
+	struct microcode_header_amd	hdr;
+	unsigned int			mpb[0];
+};
+
+static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
+				unsigned int sig)
+{
+	int i = 0;
+
+	if (!equiv_cpu_table)
+		return 0;
+
+	while (equiv_cpu_table[i].installed_cpu != 0) {
+		if (sig == equiv_cpu_table[i].installed_cpu)
+			return equiv_cpu_table[i].equiv_cpu;
+
+		i++;
+	}
+	return 0;
+}
+
+extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
+extern int apply_microcode_amd(int cpu);
+extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size);
+
+#ifdef CONFIG_MICROCODE_AMD_EARLY
+#ifdef CONFIG_X86_32
+#define MPB_MAX_SIZE PAGE_SIZE
+extern u8 amd_bsp_mpb[MPB_MAX_SIZE];
+#endif
+extern void __init load_ucode_amd_bsp(void);
+extern void __cpuinit load_ucode_amd_ap(void);
+extern int __init save_microcode_in_initrd_amd(void);
+#else
+static inline void __init load_ucode_amd_bsp(void) {}
+static inline void __cpuinit load_ucode_amd_ap(void) {}
+static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
+#endif
+
+#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 5356f927d41..87a085333cb 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -67,10 +67,12 @@ update_match_revision(struct microcode_header_intel *mc_header, int rev);
 extern void __init load_ucode_intel_bsp(void);
 extern void __cpuinit load_ucode_intel_ap(void);
 extern void show_ucode_info_early(void);
+extern int __init save_microcode_in_initrd_intel(void);
 #else
 static inline __init void load_ucode_intel_bsp(void) {}
 static inline __cpuinit void load_ucode_intel_ap(void) {}
 static inline void show_ucode_info_early(void) {}
+static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
 #endif
 
 #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c2934be2446..cd9c41938b8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -12,6 +12,9 @@ struct ms_hyperv_info {
 extern struct ms_hyperv_info ms_hyperv;
 
 void hyperv_callback_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_hyperv_callback_vector hyperv_callback_vector
+#endif
 void hyperv_vector_handler(struct pt_regs *regs);
 void hv_register_vmbus_handler(int irq, irq_handler_t handler);
 
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 03f90c8a5a7..0208c3c2cbc 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -42,17 +42,14 @@ do {								\
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
  *                                 from 1 to a 0 value
  *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
  *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
  */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
-					       int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 {
 	if (unlikely(atomic_dec_return(count) < 0))
-		return fail_fn(count);
+		return -1;
 	else
 		return 0;
 }
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 68a87b0f8e2..2c543fff241 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -37,17 +37,14 @@ do {								\
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
  *                                 from 1 to a 0 value
  *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
  *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
  */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
-					       int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 {
 	if (unlikely(atomic_dec_return(count) < 0))
-		return fail_fn(count);
+		return -1;
 	else
 		return 0;
 }
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 57cb6340221..8249df45d2f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 
+#define HSW_IN_TX					(1ULL << 32)
+#define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
+
 #define AMD64_EVENTSEL_INT_CORE_ENABLE			(1ULL << 36)
 #define AMD64_EVENTSEL_GUESTONLY			(1ULL << 40)
 #define AMD64_EVENTSEL_HOSTONLY				(1ULL << 41)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b0818bc896..7dc305a4605 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return pte_set_flags(pte, _PAGE_DIRTY);
+	return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
@@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	return pmd_set_flags(pmd, _PAGE_DIRTY);
+	return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 }
 
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
@@ -294,6 +294,26 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 	return pmd_clear_flags(pmd, _PAGE_PRESENT);
 }
 
+static inline int pte_soft_dirty(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SOFT_DIRTY;
+}
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
+}
+
 /*
  * Mask out unsupported bits in a present pgprot.  Non-present pgprots
  * can use those bits for other purposes, so leave them be.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index e6423002c10..c98ac63aae4 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -55,6 +55,18 @@
 #define _PAGE_HIDDEN	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * The same hidden bit is used by kmemcheck, but since kmemcheck
+ * works on kernel pages while soft-dirty engine on user space,
+ * they do not conflict with each other.
+ */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#else
+#define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22224b3b43b..29937c4f6ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,9 +89,9 @@ struct cpuinfo_x86 {
 	char			wp_works_ok;	/* It doesn't on 386's */
 
 	/* Problems on some 486Dx4's and old 386's: */
-	char			hard_math;
 	char			rfu;
 	char			pad0;
+	char			pad1;
 #else
 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
 	int			x86_tlbsize;
@@ -164,6 +164,7 @@ extern const struct seq_operations cpuinfo_op;
 #define cache_line_size()	(boot_cpu_data.x86_cache_alignment)
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
+extern void __cpuinit fpu_detect(struct cpuinfo_x86 *c);
 
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
@@ -981,5 +982,5 @@ bool xen_set_default_idle(void);
 #endif
 
 void stop_this_cpu(void *dummy);
-
+void df_debug(struct pt_regs *regs, long error_code);
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index beff97f7df3..7a958164088 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,10 +7,10 @@
 
 #include <asm/processor-flags.h>
 
-#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+#define FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
 			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
 			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
-			 X86_EFLAGS_CF)
+			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 41fc93a2e22..2f4d924fe6c 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -16,7 +16,7 @@ static inline void native_clts(void)
  * all loads stores around it, which can hurt performance. Solution is to
  * use a variable and mimic reads and writes to it to enforce serialization
  */
-static unsigned long __force_order;
+extern unsigned long __force_order;
 
 static inline unsigned long native_read_cr0(void)
 {
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1df6e84691..27811190cbd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,7 +89,6 @@ struct thread_info {
 #define TIF_FORK		18	/* ret_from_fork */
 #define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
-#define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
@@ -113,7 +112,6 @@ struct thread_info {
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
 #define _TIF_NOHZ		(1 << TIF_NOHZ)
-#define _TIF_DEBUG		(1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
@@ -154,7 +152,7 @@ struct thread_info {
 	(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define PREEMPT_ACTIVE		0x10000000
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50a7fc0f824..cf512003e66 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
 
 static inline void __flush_tlb_one(unsigned long addr)
 {
-		__flush_tlb_single(addr);
+	__flush_tlb_single(addr);
 }
 
 #define TLB_FLUSH_ALL	-1UL
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
new file mode 100644
index 00000000000..2874df24e7a
--- /dev/null
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -0,0 +1,104 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq_vectors
+
+#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_VECTORS_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_irq_vector,
+
+	TP_PROTO(int vector),
+
+	TP_ARGS(vector),
+
+	TP_STRUCT__entry(
+		__field(		int,	vector	)
+	),
+
+	TP_fast_assign(
+		__entry->vector = vector;
+	),
+
+	TP_printk("vector=%d", __entry->vector) );
+
+#define DEFINE_IRQ_VECTOR_EVENT(name)		\
+DEFINE_EVENT_FN(x86_irq_vector, name##_entry,	\
+	TP_PROTO(int vector),			\
+	TP_ARGS(vector),			\
+	trace_irq_vector_regfunc,		\
+	trace_irq_vector_unregfunc);		\
+DEFINE_EVENT_FN(x86_irq_vector, name##_exit,	\
+	TP_PROTO(int vector),			\
+	TP_ARGS(vector),			\
+	trace_irq_vector_regfunc,		\
+	trace_irq_vector_unregfunc);
+
+
+/*
+ * local_timer - called when entering/exiting a local timer interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(local_timer);
+
+/*
+ * reschedule - called when entering/exiting a reschedule vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(reschedule);
+
+/*
+ * spurious_apic - called when entering/exiting a spurious apic vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(spurious_apic);
+
+/*
+ * error_apic - called when entering/exiting an error apic vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(error_apic);
+
+/*
+ * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
+
+/*
+ * irq_work - called when entering/exiting a irq work interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(irq_work);
+
+/*
+ * call_function - called when entering/exiting a call function interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(call_function);
+
+/*
+ * call_function_single - called when entering/exiting a call function
+ * single interrupt vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(call_function_single);
+
+/*
+ * threshold_apic - called when entering/exiting a threshold apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
+
+/*
+ * thermal_apic - called when entering/exiting a thermal apic interrupt
+ * vector handler
+ */
+DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE irq_vectors
+#endif /*  _TRACE_IRQ_VECTORS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 142810c457d..4f7923dd000 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -235,7 +235,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src,
 static inline int
 __copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
 {
-	might_sleep();
+	might_fault();
 	return __copy_user_nocache(dst, src, size, 1);
 }
 
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index a06983cdc12..0b46ef261c7 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -731,6 +731,9 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
 }
 
 extern void uv_bau_message_intr1(void);
+#ifdef CONFIG_TRACING
+#define trace_uv_bau_message_intr1 uv_bau_message_intr1
+#endif
 extern void uv_bau_timeout_intr1(void);
 
 struct atomic_short {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 2af848dfa75..bb0465090ae 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -170,6 +170,9 @@
 #define MSR_KNC_EVNTSEL0               0x00000028
 #define MSR_KNC_EVNTSEL1               0x00000029
 
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0			0x000004c1
+
 /* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
 
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 54991a74604..180a0c3c224 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -2,75 +2,129 @@
 #define _UAPI_ASM_X86_PROCESSOR_FLAGS_H
 /* Various flags defined: can be included from assembler. */
 
+#include <linux/const.h>
+
 /*
  * EFLAGS bits
  */
-#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1	0x00000002 /* Bit 1 - always on */
-#define X86_EFLAGS_PF	0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF	0x00000010 /* Auxiliary carry Flag */
-#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF	0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF	0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF	0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF	0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF	0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL	0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT	0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF	0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM	0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC	0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF	0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
+#define X86_EFLAGS_CF_BIT	0 /* Carry Flag */
+#define X86_EFLAGS_CF		_BITUL(X86_EFLAGS_CF_BIT)
+#define X86_EFLAGS_FIXED_BIT	1 /* Bit 1 - always on */
+#define X86_EFLAGS_FIXED	_BITUL(X86_EFLAGS_FIXED_BIT)
+#define X86_EFLAGS_PF_BIT	2 /* Parity Flag */
+#define X86_EFLAGS_PF		_BITUL(X86_EFLAGS_PF_BIT)
+#define X86_EFLAGS_AF_BIT	4 /* Auxiliary carry Flag */
+#define X86_EFLAGS_AF		_BITUL(X86_EFLAGS_AF_BIT)
+#define X86_EFLAGS_ZF_BIT	6 /* Zero Flag */
+#define X86_EFLAGS_ZF		_BITUL(X86_EFLAGS_ZF_BIT)
+#define X86_EFLAGS_SF_BIT	7 /* Sign Flag */
+#define X86_EFLAGS_SF		_BITUL(X86_EFLAGS_SF_BIT)
+#define X86_EFLAGS_TF_BIT	8 /* Trap Flag */
+#define X86_EFLAGS_TF		_BITUL(X86_EFLAGS_TF_BIT)
+#define X86_EFLAGS_IF_BIT	9 /* Interrupt Flag */
+#define X86_EFLAGS_IF		_BITUL(X86_EFLAGS_IF_BIT)
+#define X86_EFLAGS_DF_BIT	10 /* Direction Flag */
+#define X86_EFLAGS_DF		_BITUL(X86_EFLAGS_DF_BIT)
+#define X86_EFLAGS_OF_BIT	11 /* Overflow Flag */
+#define X86_EFLAGS_OF		_BITUL(X86_EFLAGS_OF_BIT)
+#define X86_EFLAGS_IOPL_BIT	12 /* I/O Privilege Level (2 bits) */
+#define X86_EFLAGS_IOPL		(_AC(3,UL) << X86_EFLAGS_IOPL_BIT)
+#define X86_EFLAGS_NT_BIT	14 /* Nested Task */
+#define X86_EFLAGS_NT		_BITUL(X86_EFLAGS_NT_BIT)
+#define X86_EFLAGS_RF_BIT	16 /* Resume Flag */
+#define X86_EFLAGS_RF		_BITUL(X86_EFLAGS_RF_BIT)
+#define X86_EFLAGS_VM_BIT	17 /* Virtual Mode */
+#define X86_EFLAGS_VM		_BITUL(X86_EFLAGS_VM_BIT)
+#define X86_EFLAGS_AC_BIT	18 /* Alignment Check/Access Control */
+#define X86_EFLAGS_AC		_BITUL(X86_EFLAGS_AC_BIT)
+#define X86_EFLAGS_AC_BIT	18 /* Alignment Check/Access Control */
+#define X86_EFLAGS_AC		_BITUL(X86_EFLAGS_AC_BIT)
+#define X86_EFLAGS_VIF_BIT	19 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIF		_BITUL(X86_EFLAGS_VIF_BIT)
+#define X86_EFLAGS_VIP_BIT	20 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_VIP		_BITUL(X86_EFLAGS_VIP_BIT)
+#define X86_EFLAGS_ID_BIT	21 /* CPUID detection */
+#define X86_EFLAGS_ID		_BITUL(X86_EFLAGS_ID_BIT)
 
 /*
  * Basic CPU control in CR0
  */
-#define X86_CR0_PE	0x00000001 /* Protection Enable */
-#define X86_CR0_MP	0x00000002 /* Monitor Coprocessor */
-#define X86_CR0_EM	0x00000004 /* Emulation */
-#define X86_CR0_TS	0x00000008 /* Task Switched */
-#define X86_CR0_ET	0x00000010 /* Extension Type */
-#define X86_CR0_NE	0x00000020 /* Numeric Error */
-#define X86_CR0_WP	0x00010000 /* Write Protect */
-#define X86_CR0_AM	0x00040000 /* Alignment Mask */
-#define X86_CR0_NW	0x20000000 /* Not Write-through */
-#define X86_CR0_CD	0x40000000 /* Cache Disable */
-#define X86_CR0_PG	0x80000000 /* Paging */
+#define X86_CR0_PE_BIT		0 /* Protection Enable */
+#define X86_CR0_PE		_BITUL(X86_CR0_PE_BIT)
+#define X86_CR0_MP_BIT		1 /* Monitor Coprocessor */
+#define X86_CR0_MP		_BITUL(X86_CR0_MP_BIT)
+#define X86_CR0_EM_BIT		2 /* Emulation */
+#define X86_CR0_EM		_BITUL(X86_CR0_EM_BIT)
+#define X86_CR0_TS_BIT		3 /* Task Switched */
+#define X86_CR0_TS		_BITUL(X86_CR0_TS_BIT)
+#define X86_CR0_ET_BIT		4 /* Extension Type */
+#define X86_CR0_ET		_BITUL(X86_CR0_ET_BIT)
+#define X86_CR0_NE_BIT		5 /* Numeric Error */
+#define X86_CR0_NE		_BITUL(X86_CR0_NE_BIT)
+#define X86_CR0_WP_BIT		16 /* Write Protect */
+#define X86_CR0_WP		_BITUL(X86_CR0_WP_BIT)
+#define X86_CR0_AM_BIT		18 /* Alignment Mask */
+#define X86_CR0_AM		_BITUL(X86_CR0_AM_BIT)
+#define X86_CR0_NW_BIT		29 /* Not Write-through */
+#define X86_CR0_NW		_BITUL(X86_CR0_NW_BIT)
+#define X86_CR0_CD_BIT		30 /* Cache Disable */
+#define X86_CR0_CD		_BITUL(X86_CR0_CD_BIT)
+#define X86_CR0_PG_BIT		31 /* Paging */
+#define X86_CR0_PG		_BITUL(X86_CR0_PG_BIT)
 
 /*
  * Paging options in CR3
  */
-#define X86_CR3_PWT	0x00000008 /* Page Write Through */
-#define X86_CR3_PCD	0x00000010 /* Page Cache Disable */
-#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
+#define X86_CR3_PWT_BIT		3 /* Page Write Through */
+#define X86_CR3_PWT		_BITUL(X86_CR3_PWT_BIT)
+#define X86_CR3_PCD_BIT		4 /* Page Cache Disable */
+#define X86_CR3_PCD		_BITUL(X86_CR3_PCD_BIT)
+#define X86_CR3_PCID_MASK	_AC(0x00000fff,UL) /* PCID Mask */
 
 /*
  * Intel CPU features in CR4
  */
-#define X86_CR4_VME	0x00000001 /* enable vm86 extensions */
-#define X86_CR4_PVI	0x00000002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD	0x00000004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE	0x00000008 /* enable debugging extensions */
-#define X86_CR4_PSE	0x00000010 /* enable page size extensions */
-#define X86_CR4_PAE	0x00000020 /* enable physical address extensions */
-#define X86_CR4_MCE	0x00000040 /* Machine check enable */
-#define X86_CR4_PGE	0x00000080 /* enable global pages */
-#define X86_CR4_PCE	0x00000100 /* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR	0x00000200 /* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
-#define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
-#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
-#define X86_CR4_PCIDE	0x00020000 /* enable PCID support */
-#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
-#define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
-#define X86_CR4_SMAP	0x00200000 /* enable SMAP support */
+#define X86_CR4_VME_BIT		0 /* enable vm86 extensions */
+#define X86_CR4_VME		_BITUL(X86_CR4_VME_BIT)
+#define X86_CR4_PVI_BIT		1 /* virtual interrupts flag enable */
+#define X86_CR4_PVI		_BITUL(X86_CR4_PVI_BIT)
+#define X86_CR4_TSD_BIT		2 /* disable time stamp at ipl 3 */
+#define X86_CR4_TSD		_BITUL(X86_CR4_TSD_BIT)
+#define X86_CR4_DE_BIT		3 /* enable debugging extensions */
+#define X86_CR4_DE		_BITUL(X86_CR4_DE_BIT)
+#define X86_CR4_PSE_BIT		4 /* enable page size extensions */
+#define X86_CR4_PSE		_BITUL(X86_CR4_PSE_BIT)
+#define X86_CR4_PAE_BIT		5 /* enable physical address extensions */
+#define X86_CR4_PAE		_BITUL(X86_CR4_PAE_BIT)
+#define X86_CR4_MCE_BIT		6 /* Machine check enable */
+#define X86_CR4_MCE		_BITUL(X86_CR4_MCE_BIT)
+#define X86_CR4_PGE_BIT		7 /* enable global pages */
+#define X86_CR4_PGE		_BITUL(X86_CR4_PGE_BIT)
+#define X86_CR4_PCE_BIT		8 /* enable performance counters at ipl 3 */
+#define X86_CR4_PCE		_BITUL(X86_CR4_PCE_BIT)
+#define X86_CR4_OSFXSR_BIT	9 /* enable fast FPU save and restore */
+#define X86_CR4_OSFXSR		_BITUL(X86_CR4_OSFXSR_BIT)
+#define X86_CR4_OSXMMEXCPT_BIT	10 /* enable unmasked SSE exceptions */
+#define X86_CR4_OSXMMEXCPT	_BITUL(X86_CR4_OSXMMEXCPT_BIT)
+#define X86_CR4_VMXE_BIT	13 /* enable VMX virtualization */
+#define X86_CR4_VMXE		_BITUL(X86_CR4_VMXE_BIT)
+#define X86_CR4_SMXE_BIT	14 /* enable safer mode (TXT) */
+#define X86_CR4_SMXE		_BITUL(X86_CR4_SMXE_BIT)
+#define X86_CR4_FSGSBASE_BIT	16 /* enable RDWRFSGS support */
+#define X86_CR4_FSGSBASE	_BITUL(X86_CR4_FSGSBASE_BIT)
+#define X86_CR4_PCIDE_BIT	17 /* enable PCID support */
+#define X86_CR4_PCIDE		_BITUL(X86_CR4_PCIDE_BIT)
+#define X86_CR4_OSXSAVE_BIT	18 /* enable xsave and xrestore */
+#define X86_CR4_OSXSAVE		_BITUL(X86_CR4_OSXSAVE_BIT)
+#define X86_CR4_SMEP_BIT	20 /* enable SMEP support */
+#define X86_CR4_SMEP		_BITUL(X86_CR4_SMEP_BIT)
+#define X86_CR4_SMAP_BIT	21 /* enable SMAP support */
+#define X86_CR4_SMAP		_BITUL(X86_CR4_SMAP_BIT)
 
 /*
  * x86-64 Task Priority Register, CR8
  */
-#define X86_CR8_TPR	0x0000000F /* task priority register */
+#define X86_CR8_TPR		_AC(0x0000000f,UL) /* task priority register */
 
 /*
  * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7bd3bd31010..88d99ea7772 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
 endif
 
+CFLAGS_irq.o := -I$(src)/../include/asm/trace
+
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
@@ -67,7 +69,7 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-y				+= kprobes/
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
+obj-$(CONFIG_DOUBLEFAULT)	+= doublefault.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
@@ -93,6 +95,7 @@ obj-$(CONFIG_MICROCODE_INTEL_LIB)	+= microcode_intel_lib.o
 microcode-y				:= microcode_core.o
 microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
+obj-$(CONFIG_MICROCODE_AMD_EARLY)	+= microcode_amd_early.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
 
 obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -102,6 +105,7 @@ obj-$(CONFIG_OF)			+= devicetree.o
 obj-$(CONFIG_UPROBES)			+= uprobes.o
 
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
+obj-$(CONFIG_TRACING)			+= tracepoint.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 230c8ea878e..d81a972dd50 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,6 +44,7 @@
 #include <asm/mpspec.h>
 #include <asm/smp.h>
 
+#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
 static int __initdata acpi_force = 0;
 u32 acpi_rsdt_forced;
 int acpi_disabled;
@@ -559,6 +560,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
 			   int trigger, int polarity) = acpi_register_gsi_pic;
 
+#ifdef CONFIG_ACPI_SLEEP
+int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
+#else
+int (*acpi_suspend_lowlevel)(void);
+#endif
+
 /*
  * success: return IRQ number (>=0)
  * failure: return < 0
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index b44577bc974..2a34aaf3c8f 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -26,12 +26,12 @@ static char temp_stack[4096];
 #endif
 
 /**
- * acpi_suspend_lowlevel - save kernel state
+ * x86_acpi_suspend_lowlevel - save kernel state
  *
  * Create an identity mapped page table and copy the wakeup routine to
  * low memory.
  */
-int acpi_suspend_lowlevel(void)
+int x86_acpi_suspend_lowlevel(void)
 {
 	struct wakeup_header *header =
 		(struct wakeup_header *) __va(real_mode_header->wakeup_header);
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 67f59f8c695..c9c2c982d5e 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -15,3 +15,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long);
 extern void wakeup_long64(void);
 
 extern void do_suspend_lowlevel(void);
+
+extern int x86_acpi_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611bf0e5..99663b59123 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
+#include <asm/trace/irq_vectors.h>
 #include <asm/irq_remapping.h>
 #include <asm/perf_event.h>
 #include <asm/x86_init.h>
@@ -919,17 +920,35 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 	/*
 	 * NOTE! We'd better ACK the irq immediately,
 	 * because timer handling can be slow.
+	 *
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
-	ack_APIC_irq();
+	entering_ack_irq();
+	local_apic_timer_interrupt();
+	exiting_irq();
+
+	set_irq_regs(old_regs);
+}
+
+void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
 	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 *
 	 * update_process_times() expects us to have done irq_enter().
 	 * Besides, if we don't timer interrupts ignore the global
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
-	irq_enter();
-	exit_idle();
+	entering_ack_irq();
+	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
 	local_apic_timer_interrupt();
-	irq_exit();
+	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
+	exiting_irq();
 
 	set_irq_regs(old_regs);
 }
@@ -1907,12 +1926,10 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-void smp_spurious_interrupt(struct pt_regs *regs)
+static inline void __smp_spurious_interrupt(void)
 {
 	u32 v;
 
-	irq_enter();
-	exit_idle();
 	/*
 	 * Check if this really is a spurious interrupt and ACK it
 	 * if it is a vectored one.  Just in case...
@@ -1927,13 +1944,28 @@ void smp_spurious_interrupt(struct pt_regs *regs)
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	pr_info("spurious APIC interrupt on CPU#%d, "
 		"should never happen.\n", smp_processor_id());
-	irq_exit();
+}
+
+void smp_spurious_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	__smp_spurious_interrupt();
+	exiting_irq();
+}
+
+void smp_trace_spurious_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
+	__smp_spurious_interrupt();
+	trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+	exiting_irq();
 }
 
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-void smp_error_interrupt(struct pt_regs *regs)
+static inline void __smp_error_interrupt(struct pt_regs *regs)
 {
 	u32 v0, v1;
 	u32 i = 0;
@@ -1948,8 +1980,6 @@ void smp_error_interrupt(struct pt_regs *regs)
 		"Illegal register address",	/* APIC Error Bit 7 */
 	};
 
-	irq_enter();
-	exit_idle();
 	/* First tickle the hardware, only then report what went on. -- REW */
 	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
@@ -1970,7 +2000,22 @@ void smp_error_interrupt(struct pt_regs *regs)
 
 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
 
-	irq_exit();
+}
+
+void smp_error_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	__smp_error_interrupt(regs);
+	exiting_irq();
+}
+
+void smp_trace_error_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	trace_error_apic_entry(ERROR_APIC_VECTOR);
+	__smp_error_interrupt(regs);
+	trace_error_apic_exit(ERROR_APIC_VECTOR);
+	exiting_irq();
 }
 
 /**
@@ -2302,7 +2347,7 @@ static void lapic_resume(void)
 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#if defined(CONFIG_X86_MCE_INTEL)
 	if (maxlvt >= 5)
 		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
 #endif
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 794f6eb54cd..39cc7f7acab 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -51,6 +51,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 static enum uv_system_type uv_system_type;
 static u64 gru_start_paddr, gru_end_paddr;
+static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
+static u64 gru_dist_lmask, gru_dist_umask;
 static union uvh_apicid uvh_apicid;
 int uv_min_hub_revision_id;
 EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
@@ -72,7 +74,20 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
 
 static inline bool is_GRU_range(u64 start, u64 end)
 {
-	return start >= gru_start_paddr && end <= gru_end_paddr;
+	if (gru_dist_base) {
+		u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
+		u64 sl = start & gru_dist_lmask; /* base offset bits */
+		u64 eu = end & gru_dist_umask;
+		u64 el = end & gru_dist_lmask;
+
+		/* Must reside completely within a single GRU range */
+		return (sl == gru_dist_base && el == gru_dist_base &&
+			su >= gru_first_node_paddr &&
+			su <= gru_last_node_paddr &&
+			eu == su);
+	} else {
+		return start >= gru_start_paddr && end <= gru_end_paddr;
+	}
 }
 
 static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -463,26 +478,63 @@ static __init void map_high(char *id, unsigned long base, int pshift,
 		pr_info("UV: Map %s_HI base address NULL\n", id);
 		return;
 	}
-	pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
+	pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
 	if (map_type == map_uc)
 		init_extra_mapping_uc(paddr, bytes);
 	else
 		init_extra_mapping_wb(paddr, bytes);
 }
 
+static __init void map_gru_distributed(unsigned long c)
+{
+	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+	u64 paddr;
+	unsigned long bytes;
+	int nid;
+
+	gru.v = c;
+	/* only base bits 42:28 relevant in dist mode */
+	gru_dist_base = gru.v & 0x000007fff0000000UL;
+	if (!gru_dist_base) {
+		pr_info("UV: Map GRU_DIST base address NULL\n");
+		return;
+	}
+	bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+	gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
+	gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
+	gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
+	for_each_online_node(nid) {
+		paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
+				gru_dist_base;
+		init_extra_mapping_wb(paddr, bytes);
+		gru_first_node_paddr = min(paddr, gru_first_node_paddr);
+		gru_last_node_paddr = max(paddr, gru_last_node_paddr);
+	}
+	/* Save upper (63:M) bits of address only for is_GRU_range */
+	gru_first_node_paddr &= gru_dist_umask;
+	gru_last_node_paddr &= gru_dist_umask;
+	pr_debug("UV: Map GRU_DIST base 0x%016llx  0x%016llx - 0x%016llx\n",
+		gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+}
+
 static __init void map_gru_high(int max_pnode)
 {
 	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
 	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
 
 	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
-	if (gru.s.enable) {
-		map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
-		gru_start_paddr = ((u64)gru.s.base << shift);
-		gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
-	} else {
+	if (!gru.s.enable) {
 		pr_info("UV: GRU disabled\n");
+		return;
+	}
+
+	if (is_uv3_hub() && gru.s3.mode) {
+		map_gru_distributed(gru.v);
+		return;
 	}
+	map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
+	gru_start_paddr = ((u64)gru.s.base << shift);
+	gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
 }
 
 static __init void map_mmr_high(int max_pnode)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0ef4bba2acb..d67c4be3e8b 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -28,7 +28,6 @@ void foo(void)
 	OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
 	OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
 	OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
-	OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
 	OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
 	OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
 	OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73a..47b56a7e99c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 ifdef CONFIG_PERF_EVENTS
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o perf_event_amd_uncore.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd_iommu.o
+endif
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
 endif
 
+
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5013a48d1af..c587a875722 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -90,7 +90,7 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
 static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
-	int mbytes = num_physpages >> (20-PAGE_SHIFT);
+	int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
 
 	if (c->x86_model < 6) {
 		/* Based on AMD doc 20734R - June 2000 */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4112be9a465..03445346ee0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
 #include <asm/paravirt.h>
 #include <asm/alternative.h>
 
-static int __init no_387(char *s)
-{
-	boot_cpu_data.hard_math = 0;
-	write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
-	return 1;
-}
-
-__setup("no387", no_387);
-
 static double __initdata x = 4195835.0;
 static double __initdata y = 3145727.0;
 
@@ -44,15 +35,6 @@ static void __init check_fpu(void)
 {
 	s32 fdiv_bug;
 
-	if (!boot_cpu_data.hard_math) {
-#ifndef CONFIG_MATH_EMULATION
-		pr_emerg("No coprocessor found and no math emulation present\n");
-		pr_emerg("Giving up\n");
-		for (;;) ;
-#endif
-		return;
-	}
-
 	kernel_fpu_begin();
 
 	/*
@@ -107,5 +89,6 @@ void __init check_bugs(void)
 	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
 	 * alternative instructions.
 	 */
-	check_fpu();
+	if (cpu_has_fpu)
+		check_fpu();
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f70a67..548bd039784 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -711,10 +711,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		return;
 
 	cpu_detect(c);
-
 	get_cpu_vendor(c);
-
 	get_cpu_cap(c);
+	fpu_detect(c);
 
 	if (this_cpu->c_early_init)
 		this_cpu->c_early_init(c);
@@ -724,6 +723,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	if (this_cpu->c_bsp_init)
 		this_cpu->c_bsp_init(c);
+
+	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 }
 
 void __init early_cpu_init(void)
@@ -1071,8 +1072,8 @@ __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
-				    (unsigned long) nmi_idt_table };
+struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
+				    (unsigned long) debug_idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE);
@@ -1148,20 +1149,20 @@ int is_debug_stack(unsigned long addr)
 		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
 }
 
-static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+DEFINE_PER_CPU(u32, debug_idt_ctr);
 
 void debug_stack_set_zero(void)
 {
-	this_cpu_inc(debug_stack_use_ctr);
-	load_idt((const struct desc_ptr *)&nmi_idt_descr);
+	this_cpu_inc(debug_idt_ctr);
+	load_current_idt();
 }
 
 void debug_stack_reset(void)
 {
-	if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+	if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
 		return;
-	if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
-		load_idt((const struct desc_ptr *)&idt_descr);
+	if (this_cpu_dec_return(debug_idt_ctr) == 0)
+		load_current_idt();
 }
 
 #else	/* CONFIG_X86_64 */
@@ -1257,7 +1258,7 @@ void __cpuinit cpu_init(void)
 	switch_to_new_gdt(cpu);
 	loadsegment(fs, 0);
 
-	load_idt((const struct desc_ptr *)&idt_descr);
+	load_current_idt();
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
 	syscall_init();
@@ -1334,7 +1335,7 @@ void __cpuinit cpu_init(void)
 	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
-	load_idt(&idt_descr);
+	load_current_idt();
 	switch_to_new_gdt(cpu);
 
 	/*
@@ -1363,3 +1364,17 @@ void __cpuinit cpu_init(void)
 	fpu_init();
 }
 #endif
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+void warn_pre_alternatives(void)
+{
+	WARN(1, "You're using static_cpu_has before alternatives have run!\n");
+}
+EXPORT_SYMBOL_GPL(warn_pre_alternatives);
+#endif
+
+inline bool __static_cpu_has_safe(u16 bit)
+{
+	return boot_cpu_has(bit);
+}
+EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d048d5ca43c..7582f475b16 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		switch (dir0_lsn) {
 		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
 			dir0_msn = 0;
-			p = Cx486_name[(c->hard_math) ? 1 : 0];
+			p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
 			break;
 
 		case 0xe:  /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7c6f7d548c0..8dc72dda66f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -618,36 +618,34 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 		 * parameters cpuid leaf to find the cache details
 		 */
 		for (i = 0; i < num_cache_leaves; i++) {
-			struct _cpuid4_info_regs this_leaf;
+			struct _cpuid4_info_regs this_leaf = {};
 			int retval;
 
 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
-			if (retval >= 0) {
-				switch (this_leaf.eax.split.level) {
-				case 1:
-					if (this_leaf.eax.split.type ==
-							CACHE_TYPE_DATA)
-						new_l1d = this_leaf.size/1024;
-					else if (this_leaf.eax.split.type ==
-							CACHE_TYPE_INST)
-						new_l1i = this_leaf.size/1024;
-					break;
-				case 2:
-					new_l2 = this_leaf.size/1024;
-					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
-					index_msb = get_count_order(num_threads_sharing);
-					l2_id = c->apicid & ~((1 << index_msb) - 1);
-					break;
-				case 3:
-					new_l3 = this_leaf.size/1024;
-					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
-					index_msb = get_count_order(
-							num_threads_sharing);
-					l3_id = c->apicid & ~((1 << index_msb) - 1);
-					break;
-				default:
-					break;
-				}
+			if (retval < 0)
+				continue;
+
+			switch (this_leaf.eax.split.level) {
+			case 1:
+				if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+					new_l1d = this_leaf.size/1024;
+				else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+					new_l1i = this_leaf.size/1024;
+				break;
+			case 2:
+				new_l2 = this_leaf.size/1024;
+				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+				index_msb = get_count_order(num_threads_sharing);
+				l2_id = c->apicid & ~((1 << index_msb) - 1);
+				break;
+			case 3:
+				new_l3 = this_leaf.size/1024;
+				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+				index_msb = get_count_order(num_threads_sharing);
+				l3_id = c->apicid & ~((1 << index_msb) - 1);
+				break;
+			default:
+				break;
 			}
 		}
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f83933..5ac2d1fb28b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
 		return;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
+	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
 		unsigned long start;
 		int cpu;
 
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
 				cpumask_clear_cpu(cpu, mce_inject_cpumask);
 		}
 		if (!cpumask_empty(mce_inject_cpumask)) {
-			if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
+			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
 				/*
 				 * don't wait because mce_irq_ipi is necessary
 				 * to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f1689e5..e2703520d12 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
 	/* known AR MCACODs: */
 #ifdef	CONFIG_MEMORY_FAILURE
 	MCESEV(
-		KEEP, "HT thread notices Action required: data load error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
-		MCGMASK(MCG_STATUS_EIPV, 0)
+		KEEP, "Action required but unaffected thread is continuable",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
+		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
 		),
 	MCESEV(
-		AR, "Action required: data load error",
+		AR, "Action required: data load error in a user process",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
 		USER
 		),
 	MCESEV(
-		KEEP, "HT thread notices Action required: instruction fetch error",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
-		MCGMASK(MCG_STATUS_EIPV, 0)
-		),
-	MCESEV(
-		AR, "Action required: instruction fetch error",
+		AR, "Action required: instruction fetch error in a user process",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
 		USER
 		),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41c..bf49cdbb010 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
 
-/* MCA banks polled by the period polling timer for corrected events */
+/*
+ * MCA banks polled by the period polling timer for corrected events.
+ * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
+ */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
 };
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c2afe..d56405309dc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
  * Also supports reliable discovery of shared banks.
  */
 
+/*
+ * CMCI can be delivered to multiple cpus that share a machine check bank
+ * so we need to designate a single cpu to process errors logged in each bank
+ * in the interrupt handler (otherwise we would have many races and potential
+ * double reporting of the same error).
+ * Note that this can change when a cpu is offlined or brought online since
+ * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
+ * disables CMCI on all banks owned by the cpu and clears this bitfield. At
+ * this point, cmci_rediscover() kicks in and a different cpu may end up
+ * taking ownership of some of the shared MCA banks that were previously
+ * owned by the offlined cpu.
+ */
 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 47a1870279a..98f2083832e 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -29,6 +29,7 @@
 #include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
 
 /* How long to wait between reporting thermal events */
 #define CHECK_INTERVAL		(300 * HZ)
@@ -181,11 +182,6 @@ static int therm_throt_process(bool new_event, int event, int level)
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-		else
-			printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
-				this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package",
-				state->count);
 		return 1;
 	}
 	if (old_event) {
@@ -193,10 +189,6 @@ static int therm_throt_process(bool new_event, int event, int level)
 			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package");
-		else
-			printk(KERN_INFO "CPU%d: %s power limit normal\n",
-				this_cpu,
-				level == CORE_LEVEL ? "Core" : "Package");
 		return 1;
 	}
 
@@ -219,6 +211,15 @@ static int thresh_event_valid(int event)
 	return 1;
 }
 
+static bool int_pln_enable;
+static int __init int_pln_enable_setup(char *s)
+{
+	int_pln_enable = true;
+
+	return 1;
+}
+__setup("int_pln_enable", int_pln_enable_setup);
+
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct device *dev,
@@ -231,7 +232,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
 	if (err)
 		return err;
 
-	if (cpu_has(c, X86_FEATURE_PLN))
+	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_core_power_limit_count.attr,
 					      thermal_attr_group.name);
@@ -239,7 +240,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
 		err = sysfs_add_file_to_group(&dev->kobj,
 					      &dev_attr_package_throttle_count.attr,
 					      thermal_attr_group.name);
-		if (cpu_has(c, X86_FEATURE_PLN))
+		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 			err = sysfs_add_file_to_group(&dev->kobj,
 					&dev_attr_package_power_limit_count.attr,
 					thermal_attr_group.name);
@@ -352,7 +353,7 @@ static void intel_thermal_interrupt(void)
 				CORE_LEVEL) != 0)
 		mce_log_therm_throt_event(msr_val);
 
-	if (this_cpu_has(X86_FEATURE_PLN))
+	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					CORE_LEVEL);
@@ -362,7 +363,7 @@ static void intel_thermal_interrupt(void)
 		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 					THERMAL_THROTTLING_EVENT,
 					PACKAGE_LEVEL);
-		if (this_cpu_has(X86_FEATURE_PLN))
+		if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
 			therm_throt_process(msr_val &
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
@@ -378,15 +379,26 @@ static void unexpected_thermal_interrupt(void)
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
 
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+static inline void __smp_thermal_interrupt(void)
 {
-	irq_enter();
-	exit_idle();
 	inc_irq_stat(irq_thermal_count);
 	smp_thermal_vector();
-	irq_exit();
-	/* Ack only at the end to avoid potential reentry */
-	ack_APIC_irq();
+}
+
+asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	__smp_thermal_interrupt();
+	exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+	__smp_thermal_interrupt();
+	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+	exiting_ack_irq();
 }
 
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
@@ -470,9 +482,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	apic_write(APIC_LVTTHMR, h);
 
 	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	if (cpu_has(c, X86_FEATURE_PLN))
+	if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+		wrmsr(MSR_IA32_THERM_INTERRUPT,
+			(l | (THERM_INT_LOW_ENABLE
+			| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
+	else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
 		wrmsr(MSR_IA32_THERM_INTERRUPT,
-		      l | (THERM_INT_LOW_ENABLE
+			l | (THERM_INT_LOW_ENABLE
 			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
 	else
 		wrmsr(MSR_IA32_THERM_INTERRUPT,
@@ -480,9 +496,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 
 	if (cpu_has(c, X86_FEATURE_PTS)) {
 		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
-		if (cpu_has(c, X86_FEATURE_PLN))
+		if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
 			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-			      l | (PACKAGE_THERM_INT_LOW_ENABLE
+				(l | (PACKAGE_THERM_INT_LOW_ENABLE
+				| PACKAGE_THERM_INT_HIGH_ENABLE))
+				& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
+		else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+				l | (PACKAGE_THERM_INT_LOW_ENABLE
 				| PACKAGE_THERM_INT_HIGH_ENABLE
 				| PACKAGE_THERM_INT_PLN_ENABLE), h);
 		else
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578cadb94..fe6b1c86645 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -8,6 +8,7 @@
 #include <asm/apic.h>
 #include <asm/idle.h>
 #include <asm/mce.h>
+#include <asm/trace/irq_vectors.h>
 
 static void default_threshold_interrupt(void)
 {
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void)
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
 
-asmlinkage void smp_threshold_interrupt(void)
+static inline void __smp_threshold_interrupt(void)
 {
-	irq_enter();
-	exit_idle();
 	inc_irq_stat(irq_threshold_count);
 	mce_threshold_vector();
-	irq_exit();
-	/* Ack only at the end to avoid potential reentry */
-	ack_APIC_irq();
+}
+
+asmlinkage void smp_threshold_interrupt(void)
+{
+	entering_irq();
+	__smp_threshold_interrupt();
+	exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_threshold_interrupt(void)
+{
+	entering_irq();
+	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
+	__smp_threshold_interrupt();
+	trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
+	exiting_ack_irq();
 }
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343e579..9e451b0876b 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -167,7 +167,7 @@ static void post_set(void)
 	setCx86(CX86_CCR3, ccr3);
 
 	/* Enable caches */
-	write_cr0(read_cr0() & 0xbfffffff);
+	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/* Restore value of CR4 */
 	if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fa72a39e5d4..d4cdfa67509 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 			     unsigned long *size, mtrr_type *type)
 {
-	unsigned int mask_lo, mask_hi, base_lo, base_hi;
-	unsigned int tmp, hi;
+	u32 mask_lo, mask_hi, base_lo, base_hi;
+	unsigned int hi;
+	u64 tmp, mask;
 
 	/*
 	 * get_mtrr doesn't need to update mtrr_state, also it could be called
@@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 	rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
 
 	/* Work out the shifted address mask: */
-	tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
-	mask_lo = size_or_mask | tmp;
+	tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+	mask = size_or_mask | tmp;
 
 	/* Expand tmp with high bits to all 1s: */
-	hi = fls(tmp);
+	hi = fls64(tmp);
 	if (hi > 0) {
-		tmp |= ~((1<<(hi - 1)) - 1);
+		tmp |= ~((1ULL<<(hi - 1)) - 1);
 
-		if (tmp != mask_lo) {
+		if (tmp != mask) {
 			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
 			add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
-			mask_lo = tmp;
+			mask = tmp;
 		}
 	}
 
@@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 	 * This works correctly if size is a power of two, i.e. a
 	 * contiguous range:
 	 */
-	*size = -mask_lo;
-	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+	*size = -mask;
+	*base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
 	*type = base_lo & 0xff;
 
 out_put_cpu:
@@ -701,7 +702,7 @@ static void post_set(void) __releases(set_atomicity_lock)
 	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 
 	/* Enable caches */
-	write_cr0(read_cr0() & 0xbfffffff);
+	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/* Restore value of CR4 */
 	if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 726bf963c22..ca22b73aaa2 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -305,7 +305,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 		return -EINVAL;
 	}
 
-	if (base & size_or_mask || size & size_or_mask) {
+	if ((base | (base + size - 1)) >>
+	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
 		pr_warning("mtrr: base or size exceeds the MTRR width\n");
 		return -EINVAL;
 	}
@@ -583,6 +584,7 @@ static struct syscore_ops mtrr_syscore_ops = {
 
 int __initdata changed_by_mtrr_cleanup;
 
+#define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
@@ -600,7 +602,7 @@ void __init mtrr_bp_init(void)
 
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
-		size_or_mask = 0xff000000;			/* 36 bits */
+		size_or_mask = SIZE_OR_MASK_BITS(36);
 		size_and_mask = 0x00f00000;
 		phys_addr = 36;
 
@@ -619,7 +621,7 @@ void __init mtrr_bp_init(void)
 			     boot_cpu_data.x86_mask == 0x4))
 				phys_addr = 36;
 
-			size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+			size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
 			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
 		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
 			   boot_cpu_data.x86 == 6) {
@@ -627,7 +629,7 @@ void __init mtrr_bp_init(void)
 			 * VIA C* family have Intel style MTRRs,
 			 * but don't support PAE
 			 */
-			size_or_mask = 0xfff00000;		/* 32 bits */
+			size_or_mask = SIZE_OR_MASK_BITS(32);
 			size_and_mask = 0;
 			phys_addr = 32;
 		}
@@ -637,21 +639,21 @@ void __init mtrr_bp_init(void)
 			if (cpu_has_k6_mtrr) {
 				/* Pre-Athlon (K6) AMD CPU MTRRs */
 				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
-				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_or_mask = SIZE_OR_MASK_BITS(32);
 				size_and_mask = 0;
 			}
 			break;
 		case X86_VENDOR_CENTAUR:
 			if (cpu_has_centaur_mcr) {
 				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
-				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_or_mask = SIZE_OR_MASK_BITS(32);
 				size_and_mask = 0;
 			}
 			break;
 		case X86_VENDOR_CYRIX:
 			if (cpu_has_cyrix_arr) {
 				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
-				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_or_mask = SIZE_OR_MASK_BITS(32);
 				size_and_mask = 0;
 			}
 			break;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d2..9e581c5cf6d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
 		 * check that PEBS LBR correction does not conflict with
 		 * whatever the user is asking with attr->branch_sample_type
 		 */
-		if (event->attr.precise_ip > 1) {
+		if (event->attr.precise_ip > 1 &&
+		    x86_pmu.intel_cap.pebs_format < 2) {
 			u64 *br_type = &event->attr.branch_sample_type;
 
 			if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
 struct perf_sched {
 	int			max_weight;
 	int			max_events;
-	struct event_constraint	**constraints;
+	struct perf_event	**events;
 	struct sched_state	state;
 	int			saved_states;
 	struct sched_state	saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
 /*
  * Initialize interator that runs through all events and counters.
  */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
 			    int num, int wmin, int wmax)
 {
 	int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
 	memset(sched, 0, sizeof(*sched));
 	sched->max_events	= num;
 	sched->max_weight	= wmax;
-	sched->constraints	= c;
+	sched->events		= events;
 
 	for (idx = 0; idx < num; idx++) {
-		if (c[idx]->weight == wmin)
+		if (events[idx]->hw.constraint->weight == wmin)
 			break;
 	}
 
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
 	if (sched->state.event >= sched->max_events)
 		return false;
 
-	c = sched->constraints[sched->state.event];
-
+	c = sched->events[sched->state.event]->hw.constraint;
 	/* Prefer fixed purpose counters */
 	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 		idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 			if (sched->state.weight > sched->max_weight)
 				return false;
 		}
-		c = sched->constraints[sched->state.event];
+		c = sched->events[sched->state.event]->hw.constraint;
 	} while (c->weight != sched->state.weight);
 
 	sched->state.counter = 0;	/* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
 /*
  * Assign a counter for each event.
  */
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
 			int wmin, int wmax, int *assign)
 {
 	struct perf_sched sched;
 
-	perf_sched_init(&sched, constraints, n, wmin, wmax);
+	perf_sched_init(&sched, events, n, wmin, wmax);
 
 	do {
 		if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
 
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+	struct event_constraint *c;
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	struct perf_event *e;
 	int i, wmin, wmax, num = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+		hwc = &cpuc->event_list[i]->hw;
 		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
-		constraints[i] = c;
+		hwc->constraint = c;
+
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
 	}
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 */
 	for (i = 0; i < n; i++) {
 		hwc = &cpuc->event_list[i]->hw;
-		c = constraints[i];
+		c = hwc->constraint;
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 
 	/* slow path */
 	if (i != n)
-		num = perf_assign_events(constraints, n, wmin, wmax, assign);
+		num = perf_assign_events(cpuc->event_list, n, wmin,
+					 wmax, assign);
 
 	/*
+	 * Mark the event as committed, so we do not put_constraint()
+	 * in case new events are added and fail scheduling.
+	 */
+	if (!num && assign) {
+		for (i = 0; i < n; i++) {
+			e = cpuc->event_list[i];
+			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+		}
+	}
+	/*
 	 * scheduling failed or is just a simulation,
 	 * free resources if necessary
 	 */
 	if (!assign || num) {
 		for (i = 0; i < n; i++) {
+			e = cpuc->event_list[i];
+			/*
+			 * do not put_constraint() on comitted events,
+			 * because they are good to go
+			 */
+			if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+				continue;
+
 			if (x86_pmu.put_event_constraints)
-				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+				x86_pmu.put_event_constraints(cpuc, e);
 		}
 	}
 	return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	int i;
 
 	/*
+	 * event is descheduled
+	 */
+	event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+	/*
 	 * If we're called during a txn, we don't need to do anything.
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * the event_list.
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
 static int __kprobes
 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
+	int ret;
+	u64 start_clock;
+	u64 finish_clock;
+
 	if (!atomic_read(&active_events))
 		return NMI_DONE;
 
-	return x86_pmu.handle_irq(regs);
+	start_clock = local_clock();
+	ret = x86_pmu.handle_irq(regs);
+	finish_clock = local_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+
+	return ret;
 }
 
 struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683..97e557bc4c9 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
 	int	flags;
 };
 /*
- * struct event_constraint flags
+ * struct hw_perf_event.flags flags
  */
 #define PERF_X86_EVENT_PEBS_LDLAT	0x1 /* ld+ldlat data address sampling */
 #define PERF_X86_EVENT_PEBS_ST		0x2 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW	0x4 /* haswell style st data sampling */
+#define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
  *  - inv
  *  - edge
  *  - cnt-mask
+ *  - in_tx
+ *  - in_tx_checkpointed
  *  The other filters are supported by fixed counters.
  *  The any-thread option is supported starting with v3.
  */
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
 
 /*
  * Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
 	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
+/* DataLA version of store sampling without extra enable bit. */
+#define INTEL_PST_HSW_CONSTRAINT(c, n)	\
+	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
 #define EVENT_CONSTRAINT_END		\
 	EVENT_CONSTRAINT(0, 0, 0)
 
@@ -301,6 +311,11 @@ union perf_capabilities {
 		u64	pebs_arch_reg:1;
 		u64	pebs_format:4;
 		u64	smm_freeze:1;
+		/*
+		 * PMU supports separate counter range for writing
+		 * values > 32bit.
+		 */
+		u64	full_width_write:1;
 	};
 	u64	capabilities;
 };
@@ -375,6 +390,7 @@ struct x86_pmu {
 	struct event_constraint *event_constraints;
 	struct x86_pmu_quirk *quirks;
 	int		perfctr_second_write;
+	bool		late_ack;
 
 	/*
 	 * sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 
 void x86_pmu_enable_all(int added);
 
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
 			int wmin, int wmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
 
 extern struct event_constraint intel_ivb_pebs_event_constraints[];
 
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb..4cbe03287b0 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.cpu_dead		= amd_pmu_cpu_dead,
 };
 
-static int setup_event_constraints(void)
+static int __init amd_core_pmu_init(void)
 {
-	if (boot_cpu_data.x86 == 0x15)
+	if (!cpu_has_perfctr_core)
+		return 0;
+
+	switch (boot_cpu_data.x86) {
+	case 0x15:
+		pr_cont("Fam15h ");
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
-	return 0;
-}
+		break;
 
-static int setup_perfctr_core(void)
-{
-	if (!cpu_has_perfctr_core) {
-		WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
-		     KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+	default:
+		pr_err("core perfctr but no constraints; unknown hardware!\n");
 		return -ENODEV;
 	}
 
-	WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
-	     KERN_ERR "hw perf events core counters need constraints handler!");
-
 	/*
 	 * If core performance counter extensions exists, we must use
 	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
-	 * x86_pmu_addr_offset().
+	 * amd_pmu_addr_offset().
 	 */
 	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
 	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
 	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
 
-	printk(KERN_INFO "perf: AMD core performance counters detected\n");
-
+	pr_cont("core perfctr, ");
 	return 0;
 }
 
 __init int amd_pmu_init(void)
 {
+	int ret;
+
 	/* Performance-monitoring supported from K7 and later: */
 	if (boot_cpu_data.x86 < 6)
 		return -ENODEV;
 
 	x86_pmu = amd_pmu;
 
-	setup_event_constraints();
-	setup_perfctr_core();
+	ret = amd_core_pmu_init();
+	if (ret)
+		return ret;
 
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 00000000000..0db655ef391
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "perf_event.h"
+#include "perf_event_amd_iommu.h"
+
+#define COUNTER_SHIFT		16
+
+#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
+#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+	struct pmu pmu;
+	u8 max_banks;
+	u8 max_counters;
+	u64 cntr_assign_mask;
+	raw_spinlock_t lock;
+	const struct attribute_group *attr_groups[4];
+};
+
+#define format_group	attr_groups[0]
+#define cpumask_group	attr_groups[1]
+#define events_group	attr_groups[2]
+#define null_group	attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource,    "config:0-7");
+PMU_FORMAT_ATTR(devid,      "config:8-23");
+PMU_FORMAT_ATTR(pasid,      "config:24-39");
+PMU_FORMAT_ATTR(domid,      "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+	&format_attr_csource.attr,
+	&format_attr_devid.attr,
+	&format_attr_pasid.attr,
+	&format_attr_domid.attr,
+	&format_attr_devid_mask.attr,
+	&format_attr_pasid_mask.attr,
+	&format_attr_domid_mask.attr,
+	NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+	.name = "format",
+	.attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+	struct kobj_attribute attr;
+	const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct amd_iommu_event_desc *event =
+		container_of(attr, struct amd_iommu_event_desc, attr);
+	return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event)			\
+{								\
+	.attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),	\
+	.event = _event,					\
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+	AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
+	AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
+	AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
+	AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
+	AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
+	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
+	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
+	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
+	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
+	AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
+	AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
+	AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
+	AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
+	AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
+	AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
+	AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
+	AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
+	AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
+	AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
+	{ /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+	.attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+	unsigned long flags;
+	int shift, bank, cntr, retval;
+	int max_banks = perf_iommu->max_banks;
+	int max_cntrs = perf_iommu->max_counters;
+
+	raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+	for (bank = 0, shift = 0; bank < max_banks; bank++) {
+		for (cntr = 0; cntr < max_cntrs; cntr++) {
+			shift = bank + (bank*3) + cntr;
+			if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+				continue;
+			} else {
+				perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+				retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+				goto out;
+			}
+		}
+	}
+	retval = -ENOSPC;
+out:
+	raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+	return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+					u8 bank, u8 cntr)
+{
+	unsigned long flags;
+	int max_banks, max_cntrs;
+	int shift = 0;
+
+	max_banks = perf_iommu->max_banks;
+	max_cntrs = perf_iommu->max_counters;
+
+	if ((bank > max_banks) || (cntr > max_cntrs))
+		return -EINVAL;
+
+	shift = bank + cntr + (bank*3);
+
+	raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+	perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+	raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+	return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_amd_iommu *perf_iommu;
+	u64 config, config1;
+
+	/* test the event attr type check for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * IOMMU counters are shared across all cores.
+	 * Therefore, it does not support per-process mode.
+	 * Also, it does not support event sampling mode.
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	/* IOMMU counters do not have usr/os/guest/host bits */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_host || event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	perf_iommu = &__perf_iommu;
+
+	if (event->pmu != &perf_iommu->pmu)
+		return -ENOENT;
+
+	if (perf_iommu) {
+		config = event->attr.config;
+		config1 = event->attr.config1;
+	} else {
+		return -EINVAL;
+	}
+
+	/* integrate with iommu base devid (0000), assume one iommu */
+	perf_iommu->max_banks =
+		amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+	perf_iommu->max_counters =
+		amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+	if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+		return -EINVAL;
+
+	/* update the hw_perf_event struct with the iommu config data */
+	hwc->config = config;
+	hwc->extra_reg.config = config1;
+
+	return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+	u8 csource = _GET_CSOURCE(ev);
+	u16 devid = _GET_DEVID(ev);
+	u64 reg = 0ULL;
+
+	reg = csource;
+	amd_iommu_pc_get_set_reg_val(devid,
+			_GET_BANK(ev), _GET_CNTR(ev) ,
+			 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+	reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+	if (reg)
+		reg |= (1UL << 31);
+	amd_iommu_pc_get_set_reg_val(devid,
+			_GET_BANK(ev), _GET_CNTR(ev) ,
+			 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+	reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+	if (reg)
+		reg |= (1UL << 31);
+	amd_iommu_pc_get_set_reg_val(devid,
+			_GET_BANK(ev), _GET_CNTR(ev) ,
+			 IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+	reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+	if (reg)
+		reg |= (1UL << 31);
+	amd_iommu_pc_get_set_reg_val(devid,
+			_GET_BANK(ev), _GET_CNTR(ev) ,
+			 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+	u64 reg = 0ULL;
+
+	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+			_GET_BANK(event), _GET_CNTR(event),
+			IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	pr_debug("perf: amd_iommu:perf_iommu_start\n");
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	if (flags & PERF_EF_RELOAD) {
+		u64 prev_raw_count =  local64_read(&hwc->prev_count);
+		amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+				_GET_BANK(event), _GET_CNTR(event),
+				IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+	}
+
+	perf_iommu_enable_event(event);
+	perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+	u64 count = 0ULL;
+	u64 prev_raw_count = 0ULL;
+	u64 delta = 0ULL;
+	struct hw_perf_event *hwc = &event->hw;
+	pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+	amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+				_GET_BANK(event), _GET_CNTR(event),
+				IOMMU_PC_COUNTER_REG, &count, false);
+
+	/* IOMMU pc counter register is only 48 bits */
+	count &= 0xFFFFFFFFFFFFULL;
+
+	prev_raw_count =  local64_read(&hwc->prev_count);
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					count) != prev_raw_count)
+		return;
+
+	/* Handling 48-bit counter overflowing */
+	delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+	delta >>= COUNTER_SHIFT;
+	local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 config;
+
+	pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	perf_iommu_disable_event(event);
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	config = hwc->config;
+	perf_iommu_read(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+	int retval;
+	struct perf_amd_iommu *perf_iommu =
+			container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+	pr_debug("perf: amd_iommu:perf_iommu_add\n");
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* request an iommu bank/counter */
+	retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+	if (retval != -ENOSPC)
+		event->hw.extra_reg.reg = (u16)retval;
+	else
+		return retval;
+
+	if (flags & PERF_EF_START)
+		perf_iommu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+	struct perf_amd_iommu *perf_iommu =
+			container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+	pr_debug("perf: amd_iommu:perf_iommu_del\n");
+	perf_iommu_stop(event, PERF_EF_UPDATE);
+
+	/* clear the assigned iommu bank/counter */
+	clear_avail_iommu_bnk_cntr(perf_iommu,
+				     _GET_BANK(event),
+				     _GET_CNTR(event));
+
+	perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+	struct attribute **attrs;
+	struct attribute_group *attr_group;
+	int i = 0, j;
+
+	while (amd_iommu_v2_event_descs[i].attr.attr.name)
+		i++;
+
+	attr_group = kzalloc(sizeof(struct attribute *)
+		* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+	if (!attr_group)
+		return -ENOMEM;
+
+	attrs = (struct attribute **)(attr_group + 1);
+	for (j = 0; j < i; j++)
+		attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+	attr_group->name = "events";
+	attr_group->attrs = attrs;
+	perf_iommu->events_group = attr_group;
+
+	return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+	if (__perf_iommu.events_group != NULL) {
+		kfree(__perf_iommu.events_group);
+		__perf_iommu.events_group = NULL;
+	}
+}
+
+static __init int _init_perf_amd_iommu(
+	struct perf_amd_iommu *perf_iommu, char *name)
+{
+	int ret;
+
+	raw_spin_lock_init(&perf_iommu->lock);
+
+	/* Init format attributes */
+	perf_iommu->format_group = &amd_iommu_format_group;
+
+	/* Init cpumask attributes to only core 0 */
+	cpumask_set_cpu(0, &iommu_cpumask);
+	perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+	/* Init events attributes */
+	if (_init_events_attrs(perf_iommu) != 0)
+		pr_err("perf: amd_iommu: Only support raw events.\n");
+
+	/* Init null attributes */
+	perf_iommu->null_group = NULL;
+	perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+	ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+	if (ret) {
+		pr_err("perf: amd_iommu: Failed to initialized.\n");
+		amd_iommu_pc_exit();
+	} else {
+		pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+			amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+			amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+	}
+
+	return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+	.pmu = {
+		.event_init	= perf_iommu_event_init,
+		.add		= perf_iommu_add,
+		.del		= perf_iommu_del,
+		.start		= perf_iommu_start,
+		.stop		= perf_iommu_stop,
+		.read		= perf_iommu_read,
+	},
+	.max_banks		= 0x00,
+	.max_counters		= 0x00,
+	.cntr_assign_mask	= 0ULL,
+	.format_group		= NULL,
+	.cpumask_group		= NULL,
+	.events_group		= NULL,
+	.null_group		= NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+	/* Make sure the IOMMU PC resource is available */
+	if (!amd_iommu_pc_supported()) {
+		pr_err("perf: amd_iommu PMU not installed. No support!\n");
+		return -ENODEV;
+	}
+
+	_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+	return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 00000000000..845d173278e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG			0x00
+#define IOMMU_PC_COUNTER_SRC_REG		0x08
+#define IOMMU_PC_PASID_MATCH_REG		0x10
+#define IOMMU_PC_DOMID_MATCH_REG		0x18
+#define IOMMU_PC_DEVID_MATCH_REG		0x20
+#define IOMMU_PC_COUNTER_REPORT_REG		0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS			64
+#define PC_MAX_SPEC_CNTRS			16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID			0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+			u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd5..fbc9210b45b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
+#include <asm/cpufeature.h>
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
 	NULL,
 };
 
+static struct event_constraint intel_hsw_event_constraints[] = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+	/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+	INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+	/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+	INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+	INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
 	return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
 		return true;
 
 	/* implicit branch sampling to correct PEBS skid */
-	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+	    x86_pmu.intel_cap.pebs_format < 2)
 		return true;
 
 	return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	/*
-	 * Some chipsets need to unmask the LVTPC in a particular spot
-	 * inside the nmi handler.  As a result, the unmasking was pushed
-	 * into all the nmi handlers.
-	 *
-	 * This handler doesn't seem to have any issues with the unmasking
-	 * so it was left at the top.
+	 * No known reason to not always do late ACK,
+	 * but just in case do it opt-in.
 	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
+	if (!x86_pmu.late_ack)
+		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 again:
 	intel_pmu_ack_status(status);
 	if (++loops > 100) {
-		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
-		perf_event_print_debug();
+		static bool warned = false;
+		if (!warned) {
+			WARN(1, "perfevents: irq loop stuck!\n");
+			perf_event_print_debug();
+			warned = true;
+		}
 		intel_pmu_reset();
 		goto done;
 	}
@@ -1235,6 +1253,13 @@ again:
 
 done:
 	intel_pmu_enable_all(0);
+	/*
+	 * Only unmask the NMI after the overflow counters
+	 * have been reset. This avoids spurious NMIs on
+	 * Haswell CPUs.
+	 */
+	if (x86_pmu.late_ack)
+		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	return handled;
 }
 
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if ((event->hw.config & c->cmask) == c->code) {
-				/* hw.flags zeroed at initialization */
 				event->hw.flags |= c->flags;
 				return c;
 			}
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event)
 {
-	event->hw.flags = 0;
 	intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
 	}
 }
 
+static int hsw_hw_config(struct perf_event *event)
+{
+	int ret = intel_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+	if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+		return 0;
+	event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+	/*
+	 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+	 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+	 * this combination.
+	 */
+	if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+	     ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+	      event->attr.precise_ip > 0))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static struct event_constraint counter2_constraint =
+			EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+	/* Handle special quirk on in_tx_checkpointed only in counter 2 */
+	if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+		if (c->idxmsk64 & (1U << 2))
+			return &counter2_constraint;
+		return &emptyconstraint;
+	}
+
+	return c;
+}
+
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
 PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc,	"config:19"	);
 PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
 PMU_FORMAT_ATTR(inv,	"config:23"	);
 PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
+PMU_FORMAT_ATTR(in_tx,  "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
 
 static struct attribute *intel_arch_formats_attr[] = {
 	&format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_any.attr,
 	&format_attr_inv.attr,
 	&format_attr_cmask.attr,
+	&format_attr_in_tx.attr,
+	&format_attr_in_tx_cp.attr,
 
 	&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
 	&format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
 	}
 }
 
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+static struct attribute *hsw_events_attrs[] = {
+	EVENT_PTR(mem_ld_hsw),
+	EVENT_PTR(mem_st_hsw),
+	NULL
+};
+
 __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
 		break;
 
 
+	case 60: /* Haswell Client */
+	case 70:
+	case 71:
+	case 63:
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_snb();
+
+		x86_pmu.event_constraints = intel_hsw_event_constraints;
+		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_snb_extra_regs;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = hsw_get_event_constraints;
+		x86_pmu.cpu_events = hsw_events_attrs;
+		pr_cont("Haswell events, ");
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
 		 * counter, so do not extend mask to generic counters
 		 */
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if (c->cmask != X86_RAW_EVENT_MASK
+			if (c->cmask != FIXED_EVENT_FLAGS
 			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
 				continue;
 			}
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
 		}
 	}
 
+	/* Support full width counters using alternative MSR range */
+	if (x86_pmu.intel_cap.full_width_write) {
+		x86_pmu.max_period = x86_pmu.cntval_mask;
+		x86_pmu.perfctr = MSR_IA32_PMC0;
+		pr_cont("full-width counters, ");
+	}
+
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f68705..3065c57a63c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
 	return val;
 }
 
+static u64 precise_store_data_hsw(u64 status)
+{
+	union perf_mem_data_src dse;
+
+	dse.val = 0;
+	dse.mem_op = PERF_MEM_OP_STORE;
+	dse.mem_lvl = PERF_MEM_LVL_NA;
+	if (status & 1)
+		dse.mem_lvl = PERF_MEM_LVL_L1;
+	/* Nothing else supported. Sorry. */
+	return dse.val;
+}
+
 static u64 load_latency_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
 	u64 status, dla, dse, lat;
 };
 
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+	struct pebs_record_nhm nhm;
+	/*
+	 * Real IP of the event. In the Intel documentation this
+	 * is called eventingrip.
+	 */
+	u64 real_ip;
+	/*
+	 * TSX tuning information field: abort cycles and abort flags.
+	 */
+	u64 tsx_tuning;
+};
+
 void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
         EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+	INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+	INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+	INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+	INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+	INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
+	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.* */
+	/* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
+	/* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	/* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
+	/* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+	INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
+	/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+	INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
+	/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+	INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
+	/* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
+	INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
+	INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
+
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+	if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+	else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+		cpuc->pebs_enabled &= ~(1ULL << 63);
+
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 */
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct pebs_record_nhm *pebs = __pebs;
+	struct pebs_record_hsw *pebs_hsw = __pebs;
 	struct perf_sample_data data;
 	struct pt_regs regs;
 	u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		return;
 
 	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
-	fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+	fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
+				 PERF_X86_EVENT_PEBS_ST_HSW);
 
 	perf_sample_data_init(&data, 0, event->hw.last_period);
 
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 * if PEBS-LL or PreciseStore
 	 */
 	if (fll || fst) {
-		if (sample_type & PERF_SAMPLE_ADDR)
-			data.addr = pebs->dla;
-
 		/*
 		 * Use latency for weight (only avail with PEBS-LL)
 		 */
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		if (sample_type & PERF_SAMPLE_DATA_SRC) {
 			if (fll)
 				data.data_src.val = load_latency_data(pebs->dse);
+			else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+				data.data_src.val =
+					precise_store_data_hsw(pebs->dse);
 			else
 				data.data_src.val = precise_store_data(pebs->dse);
 		}
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	regs.bp = pebs->bp;
 	regs.sp = pebs->sp;
 
-	if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
+	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+		regs.ip = pebs_hsw->real_ip;
+		regs.flags |= PERF_EFLAGS_EXACT;
+	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
 		regs.flags |= PERF_EFLAGS_EXACT;
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
+	if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+		x86_pmu.intel_cap.pebs_format >= 1)
+		data.addr = pebs->dla;
+
 	if (has_branch_stack(event))
 		data.br_stack = &cpuc->lbr_stack;
 
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at);
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
+					void *top)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_nhm *at, *top;
 	struct perf_event *event = NULL;
 	u64 status = 0;
-	int bit, n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+	int bit;
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	n = top - at;
-	if (n <= 0)
-		return;
-
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
+	for (; at < top; at += x86_pmu.pebs_record_size) {
+		struct pebs_record_nhm *p = at;
 
-	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
+		for_each_set_bit(bit, (unsigned long *)&p->status,
+				 x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	}
 }
 
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct pebs_record_nhm *at, *top;
+	int n;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+	ds->pebs_index = ds->pebs_buffer_base;
+
+	n = top - at;
+	if (n <= 0)
+		return;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(n > x86_pmu.max_pebs_events,
+		  "Unexpected number of pebs records %d\n", n);
+
+	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
+static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct pebs_record_hsw *at, *top;
+	int n;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
+
+	n = top - at;
+	if (n <= 0)
+		return;
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(n > x86_pmu.max_pebs_events,
+		  "Unexpected number of pebs records %d\n", n);
+
+	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
 /*
  * BTS, PEBS probe and setup
  */
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
 
+		case 2:
+			pr_cont("PEBS fmt2%c, ", pebs_type);
+			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+			break;
+
 		default:
 			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939..d5be06a5005 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
 	LBR_FORMAT_LIP		= 0x01,
 	LBR_FORMAT_EIP		= 0x02,
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
+	LBR_FORMAT_EIP_FLAGS2	= 0x04,
+	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_EIP_FLAGS2,
+};
+
+static enum {
+	LBR_EIP_FLAGS		= 1,
+	LBR_TSX			= 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+	[LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
+	[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
 };
 
 /*
@@ -56,6 +66,8 @@ enum {
 	 LBR_FAR)
 
 #define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
 #define for_each_branch_sample_type(x) \
 	for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
 	X86_BR_JMP      = 1 << 9, /* jump */
 	X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
 	X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+	X86_BR_ABORT    = 1 << 12,/* transaction abort */
+	X86_BR_IN_TX    = 1 << 13,/* in transaction */
+	X86_BR_NO_TX    = 1 << 14,/* not in transaction */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
 
 #define X86_BR_ANY       \
 	(X86_BR_CALL    |\
@@ -95,6 +111,7 @@ enum {
 	 X86_BR_JCC     |\
 	 X86_BR_JMP	 |\
 	 X86_BR_IRQ	 |\
+	 X86_BR_ABORT	 |\
 	 X86_BR_IND_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
-		u64 from, to, mis = 0, pred = 0;
+		u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+		int skip = 0;
+		int lbr_flags = lbr_desc[lbr_format];
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
-		if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
+		if (lbr_flags & LBR_EIP_FLAGS) {
 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
 			pred = !mis;
-			from = (u64)((((s64)from) << 1) >> 1);
+			skip = 1;
+		}
+		if (lbr_flags & LBR_TSX) {
+			in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+			abort = !!(from & LBR_FROM_FLAG_ABORT);
+			skip = 3;
 		}
+		from = (u64)((((s64)from) << skip) >> skip);
 
 		cpuc->lbr_entries[i].from	= from;
 		cpuc->lbr_entries[i].to		= to;
 		cpuc->lbr_entries[i].mispred	= mis;
 		cpuc->lbr_entries[i].predicted	= pred;
+		cpuc->lbr_entries[i].in_tx	= in_tx;
+		cpuc->lbr_entries[i].abort	= abort;
 		cpuc->lbr_entries[i].reserved	= 0;
 	}
 	cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
  * - in case there is no HW filter
  * - in case the HW filter has errata or limitations
  */
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 {
 	u64 br_type = event->attr.branch_sample_type;
 	int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 	if (br_type & PERF_SAMPLE_BRANCH_USER)
 		mask |= X86_BR_USER;
 
-	if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
+	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
 		mask |= X86_BR_KERNEL;
-	}
 
 	/* we ignore BRANCH_HV here */
 
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
 	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
 		mask |= X86_BR_IND_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+		mask |= X86_BR_ABORT;
+
+	if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+		mask |= X86_BR_IN_TX;
+
+	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+		mask |= X86_BR_NO_TX;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
 	 */
 	event->hw.branch_reg.reg = mask;
-
-	return 0;
 }
 
 /*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * setup SW LBR filter
 	 */
-	ret = intel_pmu_setup_sw_lbr_filter(event);
-	if (ret)
-		return ret;
+	intel_pmu_setup_sw_lbr_filter(event);
 
 	/*
 	 * setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
  * decoded (e.g., text page not present), then X86_BR_NONE is
  * returned.
  */
-static int branch_type(unsigned long from, unsigned long to)
+static int branch_type(unsigned long from, unsigned long to, int abort)
 {
 	struct insn insn;
 	void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
 	if (from == 0 || to == 0)
 		return X86_BR_NONE;
 
+	if (abort)
+		return X86_BR_ABORT | to_plm;
+
 	if (from_plm == X86_BR_USER) {
 		/*
 		 * can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 		from = cpuc->lbr_entries[i].from;
 		to = cpuc->lbr_entries[i].to;
 
-		type = branch_type(from, to);
+		type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+		if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+			if (cpuc->lbr_entries[i].in_tx)
+				type |= X86_BR_IN_TX;
+			else
+				type |= X86_BR_NO_TX;
+		}
 
 		/* if type does not correspond, then discard */
 		if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af53..9dd99751ccf 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
 	if (!uncore_box_is_fake(box))
 		reg1->alloc |= alloc;
 
-	return 0;
+	return NULL;
 fail:
 	for (; i >= 0; i--) {
 		if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
 	    (!uncore_box_is_fake(box) && reg1->alloc))
 		return NULL;
 again:
-	mask = 0xff << (idx * 8);
+	mask = 0xffULL << (idx * 8);
 	raw_spin_lock_irqsave(&er->lock, flags);
 	if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
 	    !((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-	int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+	u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
 	u64 config = reg1->config;
 
 	/* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 {
 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
-	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+	struct event_constraint *c;
 	int i, wmin, wmax, ret = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 
 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+		hwc = &box->event_list[i]->hw;
 		c = uncore_get_event_constraint(box, box->event_list[i]);
-		constraints[i] = c;
+		hwc->constraint = c;
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
 	}
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
 	/* fastpath, try to reuse previous register */
 	for (i = 0; i < n; i++) {
 		hwc = &box->event_list[i]->hw;
-		c = constraints[i];
+		c = hwc->constraint;
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
 	}
 	/* slow path */
 	if (i != n)
-		ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+		ret = perf_assign_events(box->event_list, n,
+					 wmin, wmax, assign);
 
 	if (!assign || ret) {
 		for (i = 0; i < n; i++)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e..47b3d00c9d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
 		 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
 
 #define NHMEX_M_PMON_ZDP_CTL_FVC_MASK		(((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n)	(0x7 << (11 + 3 * (n)))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n)	(0x7ULL << (11 + 3 * (n)))
 
 #define WSMEX_M_PMON_ZDP_CTL_FVC_MASK		(((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n)	(0x7 << (12 + 3 * (n)))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n)	(0x7ULL << (12 + 3 * (n)))
 
 /*
  * use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 37a198bd48c..aee6317b902 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -37,8 +37,8 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 		   static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
 		   static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
 		   static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
-		   c->hard_math ? "yes" : "no",
-		   c->hard_math ? "yes" : "no",
+		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
 		   c->cpuid_level,
 		   c->wp_works_ok ? "yes" : "no");
 }
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault.c
index 155a13f33ed..5d3fe8d36e4 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault.c
@@ -9,6 +9,8 @@
 #include <asm/processor.h>
 #include <asm/desc.h>
 
+#ifdef CONFIG_X86_32
+
 #define DOUBLEFAULT_STACKSIZE (1024)
 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
@@ -67,3 +69,16 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
 		.__cr3		= __pa_nodebug(swapper_pg_dir),
 	}
 };
+
+/* dummy for do_double_fault() call */
+void df_debug(struct pt_regs *regs, long error_code) {}
+
+#else /* !CONFIG_X86_32 */
+
+void df_debug(struct pt_regs *regs, long error_code)
+{
+	pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
+	show_regs(regs);
+	panic("Machine halted.");
+}
+#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8f3e2dec1df..2cfbc3a3a2d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -801,7 +801,17 @@ ENTRY(name)				\
 	CFI_ENDPROC;			\
 ENDPROC(name)
 
-#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
+
+#ifdef CONFIG_TRACING
+#define TRACE_BUILD_INTERRUPT(name, nr)		\
+	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
+#else
+#define TRACE_BUILD_INTERRUPT(name, nr)
+#endif
+
+#define BUILD_INTERRUPT(name, nr) \
+	BUILD_INTERRUPT3(name, nr, smp_##name); \
+	TRACE_BUILD_INTERRUPT(name, nr)
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include <asm/entry_arch.h>
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 72720894103..1b69951a81e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -365,7 +365,7 @@ ENDPROC(native_usergs_sysret64)
 	/*CFI_REL_OFFSET	ss,0*/
 	pushq_cfi %rax /* rsp */
 	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
 	/*CFI_REL_OFFSET	rflags,0*/
 	pushq_cfi $__KERNEL_CS /* cs */
 	/*CFI_REL_OFFSET	cs,0*/
@@ -1138,7 +1138,7 @@ END(common_interrupt)
 /*
  * APIC interrupts.
  */
-.macro apicinterrupt num sym do_sym
+.macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
 	INTR_FRAME
 	ASM_CLAC
@@ -1150,15 +1150,32 @@ ENTRY(\sym)
 END(\sym)
 .endm
 
+#ifdef CONFIG_TRACING
+#define trace(sym) trace_##sym
+#define smp_trace(sym) smp_trace_##sym
+
+.macro trace_apicinterrupt num sym
+apicinterrupt3 \num trace(\sym) smp_trace(\sym)
+.endm
+#else
+.macro trace_apicinterrupt num sym do_sym
+.endm
+#endif
+
+.macro apicinterrupt num sym do_sym
+apicinterrupt3 \num \sym \do_sym
+trace_apicinterrupt \num \sym
+.endm
+
 #ifdef CONFIG_SMP
-apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt REBOOT_VECTOR \
+apicinterrupt3 REBOOT_VECTOR \
 	reboot_interrupt smp_reboot_interrupt
 #endif
 
 #ifdef CONFIG_X86_UV
-apicinterrupt UV_BAU_MESSAGE \
+apicinterrupt3 UV_BAU_MESSAGE \
 	uv_bau_message_intr1 uv_bau_message_interrupt
 #endif
 apicinterrupt LOCAL_TIMER_VECTOR \
@@ -1167,14 +1184,19 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
 	x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_HAVE_KVM
-apicinterrupt POSTED_INTR_VECTOR \
+apicinterrupt3 POSTED_INTR_VECTOR \
 	kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
 #endif
 
+#ifdef CONFIG_X86_MCE_THRESHOLD
 apicinterrupt THRESHOLD_APIC_VECTOR \
 	threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
+#endif
 
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
@@ -1451,13 +1473,13 @@ ENTRY(xen_failsafe_callback)
 	CFI_ENDPROC
 END(xen_failsafe_callback)
 
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	xen_hvm_callback_vector xen_evtchn_do_upcall
 
 #endif /* CONFIG_XEN */
 
 #if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 73afd11799c..e65ddc62e11 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -444,7 +444,6 @@ is486:
 	orl %ecx,%eax
 	movl %eax,%cr0
 
-	call check_x87
 	lgdt early_gdt_descr
 	lidt idt_descr
 	ljmp $(__KERNEL_CS),$1f
@@ -467,26 +466,6 @@ is486:
 	pushl $0		# fake return address for unwinder
 	jmp *(initial_code)
 
-/*
- * We depend on ET to be correct. This checks for 287/387.
- */
-check_x87:
-	movb $0,X86_HARD_MATH
-	clts
-	fninit
-	fstsw %ax
-	cmpb $0,%al
-	je 1f
-	movl %cr0,%eax		/* no coprocessor: have to set bits */
-	xorl $4,%eax		/* set EM */
-	movl %eax,%cr0
-	ret
-	ALIGN
-1:	movb $1,X86_HARD_MATH
-	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
-	ret
-
-	
 #include "verify_cpu.S"
 
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65ebaff..5e4d8a8a5c4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -518,9 +518,15 @@ ENTRY(idt_table)
 	.skip IDT_ENTRIES * 16
 
 	.align L1_CACHE_BYTES
-ENTRY(nmi_idt_table)
+ENTRY(debug_idt_table)
 	.skip IDT_ENTRIES * 16
 
+#ifdef CONFIG_TRACING
+	.align L1_CACHE_BYTES
+ENTRY(trace_idt_table)
+	.skip IDT_ENTRIES * 16
+#endif
+
 	__PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index cb339097b9e..b627746f6b1 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -131,7 +131,7 @@ static void __cpuinit init_thread_xstate(void)
 	 * xsave_init().
 	 */
 
-	if (!HAVE_HWFP) {
+	if (!cpu_has_fpu) {
 		/*
 		 * Disable xsave as we do not support it if i387
 		 * emulation is enabled.
@@ -158,6 +158,14 @@ void __cpuinit fpu_init(void)
 	unsigned long cr0;
 	unsigned long cr4_mask = 0;
 
+#ifndef CONFIG_MATH_EMULATION
+	if (!cpu_has_fpu) {
+		pr_emerg("No FPU found and no math emulation present\n");
+		pr_emerg("Giving up\n");
+		for (;;)
+			asm volatile("hlt");
+	}
+#endif
 	if (cpu_has_fxsr)
 		cr4_mask |= X86_CR4_OSFXSR;
 	if (cpu_has_xmm)
@@ -167,7 +175,7 @@ void __cpuinit fpu_init(void)
 
 	cr0 = read_cr0();
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
-	if (!HAVE_HWFP)
+	if (!cpu_has_fpu)
 		cr0 |= X86_CR0_EM;
 	write_cr0(cr0);
 
@@ -185,7 +193,7 @@ void __cpuinit fpu_init(void)
 
 void fpu_finit(struct fpu *fpu)
 {
-	if (!HAVE_HWFP) {
+	if (!cpu_has_fpu) {
 		finit_soft_fpu(&fpu->state->soft);
 		return;
 	}
@@ -214,7 +222,7 @@ int init_fpu(struct task_struct *tsk)
 	int ret;
 
 	if (tsk_used_math(tsk)) {
-		if (HAVE_HWFP && tsk == current)
+		if (cpu_has_fpu && tsk == current)
 			unlazy_fpu(tsk);
 		tsk->thread.fpu.last_cpu = ~0;
 		return 0;
@@ -511,14 +519,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (ret)
 		return ret;
 
-	if (!HAVE_HWFP)
+	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
-	if (!cpu_has_fxsr) {
+	if (!cpu_has_fxsr)
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &target->thread.fpu.state->fsave, 0,
 					   -1);
-	}
 
 	sanitize_i387_state(target);
 
@@ -545,13 +552,13 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	sanitize_i387_state(target);
 
-	if (!HAVE_HWFP)
+	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
-	if (!cpu_has_fxsr) {
+	if (!cpu_has_fxsr)
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.fpu.state->fsave, 0, -1);
-	}
+					  &target->thread.fpu.state->fsave, 0,
+					  -1);
 
 	if (pos > 0 || count < sizeof(env))
 		convert_from_fxsr(&env, target);
@@ -592,3 +599,33 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 EXPORT_SYMBOL(dump_fpu);
 
 #endif	/* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+static int __init no_387(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_FPU);
+	return 1;
+}
+
+__setup("no387", no_387);
+
+void __cpuinit fpu_detect(struct cpuinfo_x86 *c)
+{
+	unsigned long cr0;
+	u16 fsw, fcw;
+
+	fsw = fcw = 0xffff;
+
+	cr0 = read_cr0();
+	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
+	write_cr0(cr0);
+
+	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+		     : "+m" (fsw), "+m" (fcw));
+
+	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+		set_cpu_cap(c, X86_FEATURE_FPU);
+	else
+		clear_cpu_cap(c, X86_FEATURE_FPU);
+
+	/* The final cr0 value is set in fpu_init() */
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index ac0631d8996..3a8185c042a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,6 +18,9 @@
 #include <asm/mce.h>
 #include <asm/hw_irq.h>
 
+#define CREATE_TRACE_POINTS
+#include <asm/trace/irq_vectors.h>
+
 atomic_t irq_err_count;
 
 /* Function pointer for generic interrupt vector handling */
@@ -204,23 +207,21 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 /*
  * Handler for X86_PLATFORM_IPI_VECTOR.
  */
-void smp_x86_platform_ipi(struct pt_regs *regs)
+void __smp_x86_platform_ipi(void)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	ack_APIC_irq();
-
-	irq_enter();
-
-	exit_idle();
-
 	inc_irq_stat(x86_platform_ipis);
 
 	if (x86_platform_ipi_callback)
 		x86_platform_ipi_callback();
+}
 
-	irq_exit();
+void smp_x86_platform_ipi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
 
+	entering_ack_irq();
+	__smp_x86_platform_ipi();
+	exiting_irq();
 	set_irq_regs(old_regs);
 }
 
@@ -246,6 +247,18 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
 }
 #endif
 
+void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	entering_ack_irq();
+	trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+	__smp_x86_platform_ipi();
+	trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
+	exiting_irq();
+	set_irq_regs(old_regs);
+}
+
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703a1e7..636a55e4a13 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -8,14 +8,34 @@
 #include <linux/irq_work.h>
 #include <linux/hardirq.h>
 #include <asm/apic.h>
+#include <asm/trace/irq_vectors.h>
 
-void smp_irq_work_interrupt(struct pt_regs *regs)
+static inline void irq_work_entering_irq(void)
 {
 	irq_enter();
 	ack_APIC_irq();
+}
+
+static inline void __smp_irq_work_interrupt(void)
+{
 	inc_irq_stat(apic_irq_work_irqs);
 	irq_work_run();
-	irq_exit();
+}
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+	irq_work_entering_irq();
+	__smp_irq_work_interrupt();
+	exiting_irq();
+}
+
+void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+{
+	irq_work_entering_irq();
+	trace_irq_work_entry(IRQ_WORK_VECTOR);
+	__smp_irq_work_interrupt();
+	trace_irq_work_exit(IRQ_WORK_VECTOR);
+	exiting_irq();
 }
 
 void arch_irq_work_raise(void)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index efdec7cd8e0..47ebb1dbfbc 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -31,48 +31,12 @@
 #include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/microcode_amd.h>
 
 MODULE_DESCRIPTION("AMD Microcode Update Driver");
 MODULE_AUTHOR("Peter Oruba");
 MODULE_LICENSE("GPL v2");
 
-#define UCODE_MAGIC                0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
-#define UCODE_UCODE_TYPE           0x00000001
-
-struct equiv_cpu_entry {
-	u32	installed_cpu;
-	u32	fixed_errata_mask;
-	u32	fixed_errata_compare;
-	u16	equiv_cpu;
-	u16	res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
-	u32	data_code;
-	u32	patch_id;
-	u16	mc_patch_data_id;
-	u8	mc_patch_data_len;
-	u8	init_flag;
-	u32	mc_patch_data_checksum;
-	u32	nb_dev_id;
-	u32	sb_dev_id;
-	u16	processor_rev_id;
-	u8	nb_rev_id;
-	u8	sb_rev_id;
-	u8	bios_api_rev;
-	u8	reserved1[3];
-	u32	match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
-	struct microcode_header_amd	hdr;
-	unsigned int			mpb[0];
-};
-
-#define SECTION_HDR_SIZE	8
-#define CONTAINER_HDR_SZ	12
-
 static struct equiv_cpu_entry *equiv_cpu_table;
 
 struct ucode_patch {
@@ -84,21 +48,10 @@ struct ucode_patch {
 
 static LIST_HEAD(pcache);
 
-static u16 find_equiv_id(unsigned int cpu)
+static u16 __find_equiv_id(unsigned int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	int i = 0;
-
-	if (!equiv_cpu_table)
-		return 0;
-
-	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
-			return equiv_cpu_table[i].equiv_cpu;
-
-		i++;
-	}
-	return 0;
+	return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
 }
 
 static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
@@ -163,7 +116,7 @@ static struct ucode_patch *find_patch(unsigned int cpu)
 {
 	u16 equiv_id;
 
-	equiv_id = find_equiv_id(cpu);
+	equiv_id = __find_equiv_id(cpu);
 	if (!equiv_id)
 		return NULL;
 
@@ -173,9 +126,20 @@ static struct ucode_patch *find_patch(unsigned int cpu)
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct ucode_patch *p;
 
 	csig->sig = cpuid_eax(0x00000001);
 	csig->rev = c->microcode;
+
+	/*
+	 * a patch could have been loaded early, set uci->mc so that
+	 * mc_bp_resume() can call apply_microcode()
+	 */
+	p = find_patch(cpu);
+	if (p && (p->patch_id == csig->rev))
+		uci->mc = p->data;
+
 	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
 
 	return 0;
@@ -215,7 +179,21 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-static int apply_microcode_amd(int cpu)
+int __apply_microcode_amd(struct microcode_amd *mc_amd)
+{
+	u32 rev, dummy;
+
+	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+	/* verify patch application was successful */
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+	if (rev != mc_amd->hdr.patch_id)
+		return -1;
+
+	return 0;
+}
+
+int apply_microcode_amd(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_amd *mc_amd;
@@ -242,19 +220,15 @@ static int apply_microcode_amd(int cpu)
 		return 0;
 	}
 
-	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-
-	/* verify patch application was successful */
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-	if (rev != mc_amd->hdr.patch_id) {
+	if (__apply_microcode_amd(mc_amd))
 		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
-		       cpu, mc_amd->hdr.patch_id);
-		return -1;
-	}
+			cpu, mc_amd->hdr.patch_id);
+	else
+		pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
+			mc_amd->hdr.patch_id);
 
-	pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
-	uci->cpu_sig.rev = rev;
-	c->microcode = rev;
+	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
+	c->microcode = mc_amd->hdr.patch_id;
 
 	return 0;
 }
@@ -364,7 +338,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
 	return crnt_size;
 }
 
-static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size)
 {
 	enum ucode_state ret = UCODE_ERROR;
 	unsigned int leftover;
@@ -398,6 +372,32 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
 	return UCODE_OK;
 }
 
+enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+{
+	enum ucode_state ret;
+
+	/* free old equiv table */
+	free_equiv_cpu_table();
+
+	ret = __load_microcode_amd(cpu, data, size);
+
+	if (ret != UCODE_OK)
+		cleanup();
+
+#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+	/* save BSP's matching patch for early load */
+	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
+		struct ucode_patch *p = find_patch(cpu);
+		if (p) {
+			memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
+			memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
+							   MPB_MAX_SIZE));
+		}
+	}
+#endif
+	return ret;
+}
+
 /*
  * AMD microcode firmware naming convention, up to family 15h they are in
  * the legacy file:
@@ -440,12 +440,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	/* free old equiv table */
-	free_equiv_cpu_table();
-
 	ret = load_microcode_amd(cpu, fw->data, fw->size);
-	if (ret != UCODE_OK)
-		cleanup();
 
  fw_release:
 	release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
new file mode 100644
index 00000000000..1ac6e9aee76
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/setup.h>
+#include <asm/microcode_amd.h>
+
+static bool ucode_loaded;
+static u32 ucode_new_rev;
+static unsigned long ucode_offset;
+static size_t ucode_size;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+	long offset = 0;
+	char *path;
+	void *start;
+	size_t size;
+	unsigned long *uoffset;
+	size_t *usize;
+	struct cpio_data cd;
+
+#ifdef CONFIG_X86_32
+	struct boot_params *p;
+
+	/*
+	 * On 32-bit, early load occurs before paging is turned on so we need
+	 * to use physical addresses.
+	 */
+	p       = (struct boot_params *)__pa_nodebug(&boot_params);
+	path    = (char *)__pa_nodebug(ucode_path);
+	start   = (void *)p->hdr.ramdisk_image;
+	size    = p->hdr.ramdisk_size;
+	uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+	usize   = (size_t *)__pa_nodebug(&ucode_size);
+#else
+	path    = ucode_path;
+	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+	size    = boot_params.hdr.ramdisk_size;
+	uoffset = &ucode_offset;
+	usize   = &ucode_size;
+#endif
+
+	cd = find_cpio_data(path, start, size, &offset);
+	if (!cd.data)
+		return cd;
+
+	if (*(u32 *)cd.data != UCODE_MAGIC) {
+		cd.data = NULL;
+		cd.size = 0;
+		return cd;
+	}
+
+	*uoffset = (u8 *)cd.data - (u8 *)start;
+	*usize   = cd.size;
+
+	return cd;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void __cpuinit apply_ucode_in_initrd(void *ucode, size_t size)
+{
+	struct equiv_cpu_entry *eq;
+	u32 *header;
+	u8  *data;
+	u16 eq_id = 0;
+	int offset, left;
+	u32 rev, eax;
+	u32 *new_rev;
+	unsigned long *uoffset;
+	size_t *usize;
+
+#ifdef CONFIG_X86_32
+	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+	uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+	usize   = (size_t *)__pa_nodebug(&ucode_size);
+#else
+	new_rev = &ucode_new_rev;
+	uoffset = &ucode_offset;
+	usize   = &ucode_size;
+#endif
+
+	data   = ucode;
+	left   = size;
+	header = (u32 *)data;
+
+	/* find equiv cpu table */
+
+	if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return;
+
+	eax = cpuid_eax(0x00000001);
+
+	while (left > 0) {
+		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+		offset = header[2] + CONTAINER_HDR_SZ;
+		data  += offset;
+		left  -= offset;
+
+		eq_id = find_equiv_id(eq, eax);
+		if (eq_id)
+			break;
+
+		/*
+		 * support multiple container files appended together. if this
+		 * one does not have a matching equivalent cpu entry, we fast
+		 * forward to the next container file.
+		 */
+		while (left > 0) {
+			header = (u32 *)data;
+			if (header[0] == UCODE_MAGIC &&
+			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+				break;
+
+			offset = header[1] + SECTION_HDR_SIZE;
+			data  += offset;
+			left  -= offset;
+		}
+
+		/* mark where the next microcode container file starts */
+		offset    = data - (u8 *)ucode;
+		*uoffset += offset;
+		*usize   -= offset;
+		ucode     = data;
+	}
+
+	if (!eq_id) {
+		*usize = 0;
+		return;
+	}
+
+	/* find ucode and update if needed */
+
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+
+	while (left > 0) {
+		struct microcode_amd *mc;
+
+		header = (u32 *)data;
+		if (header[0] != UCODE_UCODE_TYPE || /* type */
+		    header[1] == 0)                  /* size */
+			break;
+
+		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id)
+			if (__apply_microcode_amd(mc) == 0) {
+				rev = mc->hdr.patch_id;
+				*new_rev = rev;
+			}
+
+		offset  = header[1] + SECTION_HDR_SIZE;
+		data   += offset;
+		left   -= offset;
+	}
+
+	/* mark where this microcode container file ends */
+	offset  = *usize - (data - (u8 *)ucode);
+	*usize -= offset;
+
+	if (!(*new_rev))
+		*usize = 0;
+}
+
+void __init load_ucode_amd_bsp(void)
+{
+	struct cpio_data cd = find_ucode_in_initrd();
+	if (!cd.data)
+		return;
+
+	apply_ucode_in_initrd(cd.data, cd.size);
+}
+
+#ifdef CONFIG_X86_32
+u8 amd_bsp_mpb[MPB_MAX_SIZE];
+
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which
+ * is used upon resume from suspend.
+ */
+void __cpuinit load_ucode_amd_ap(void)
+{
+	struct microcode_amd *mc;
+	unsigned long *initrd;
+	unsigned long *uoffset;
+	size_t *usize;
+	void *ucode;
+
+	mc = (struct microcode_amd *)__pa(amd_bsp_mpb);
+	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+		__apply_microcode_amd(mc);
+		return;
+	}
+
+	initrd  = (unsigned long *)__pa(&initrd_start);
+	uoffset = (unsigned long *)__pa(&ucode_offset);
+	usize   = (size_t *)__pa(&ucode_size);
+
+	if (!*usize || !*initrd)
+		return;
+
+	ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset);
+	apply_ucode_in_initrd(ucode, *usize);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+	unsigned int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+#else
+static void __cpuinit collect_cpu_info_amd_early(struct cpuinfo_x86 *c,
+						 struct ucode_cpu_info *uci)
+{
+	u32 rev, eax;
+
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+	eax = cpuid_eax(0x00000001);
+
+	uci->cpu_sig.sig = eax;
+	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
+	c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+}
+
+void __cpuinit load_ucode_amd_ap(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu);
+
+	if (cpu && !ucode_loaded) {
+		void *ucode;
+
+		if (!ucode_size || !initrd_start)
+			return;
+
+		ucode = (void *)(initrd_start + ucode_offset);
+		if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK)
+			return;
+		ucode_loaded = true;
+	}
+
+	apply_microcode_amd(cpu);
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+	enum ucode_state ret;
+	void *ucode;
+#ifdef CONFIG_X86_32
+	unsigned int bsp = boot_cpu_data.cpu_index;
+	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+	if (!uci->cpu_sig.sig)
+		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+#endif
+	if (ucode_new_rev)
+		pr_info("microcode: updated early to new patch_level=0x%08x\n",
+			ucode_new_rev);
+
+	if (ucode_loaded || !ucode_size || !initrd_start)
+		return 0;
+
+	ucode = (void *)(initrd_start + ucode_offset);
+	ret = load_microcode_amd(0, ucode, ucode_size);
+	if (ret != UCODE_OK)
+		return -EINVAL;
+
+	ucode_loaded = true;
+	return 0;
+}
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 833d51d6ee0..86119f63db0 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -18,6 +18,7 @@
  */
 #include <linux/module.h>
 #include <asm/microcode_intel.h>
+#include <asm/microcode_amd.h>
 #include <asm/processor.h>
 
 #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -81,8 +82,18 @@ void __init load_ucode_bsp(void)
 	vendor = x86_vendor();
 	x86 = x86_family();
 
-	if (vendor == X86_VENDOR_INTEL && x86 >= 6)
-		load_ucode_intel_bsp();
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (x86 >= 6)
+			load_ucode_intel_bsp();
+		break;
+	case X86_VENDOR_AMD:
+		if (x86 >= 0x10)
+			load_ucode_amd_bsp();
+		break;
+	default:
+		break;
+	}
 }
 
 void __cpuinit load_ucode_ap(void)
@@ -95,6 +106,36 @@ void __cpuinit load_ucode_ap(void)
 	vendor = x86_vendor();
 	x86 = x86_family();
 
-	if (vendor == X86_VENDOR_INTEL && x86 >= 6)
-		load_ucode_intel_ap();
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (x86 >= 6)
+			load_ucode_intel_ap();
+		break;
+	case X86_VENDOR_AMD:
+		if (x86 >= 0x10)
+			load_ucode_amd_ap();
+		break;
+	default:
+		break;
+	}
+}
+
+int __init save_microcode_in_initrd(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (c->x86 >= 6)
+			save_microcode_in_initrd_intel();
+		break;
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0x10)
+			save_microcode_in_initrd_amd();
+		break;
+	default:
+		break;
+	}
+
+	return 0;
 }
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 2e9e12871c2..dabef95506f 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -529,7 +529,7 @@ int save_mc_for_early(u8 *mc)
 	 */
 	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
 	if (ret) {
-		pr_err("Can not save microcode patch.\n");
+		pr_err("Cannot save microcode patch.\n");
 		goto out;
 	}
 
@@ -699,7 +699,7 @@ static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
  * This function converts microcode patch offsets previously stored in
  * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
  */
-int __init save_microcode_in_initrd(void)
+int __init save_microcode_in_initrd_intel(void)
 {
 	unsigned int count = mc_saved_data.mc_saved_count;
 	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
@@ -711,7 +711,7 @@ int __init save_microcode_in_initrd(void)
 	microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
-		pr_err("Can not save microcod patches from initrd");
+		pr_err("Cannot save microcode patches from initrd.\n");
 
 	show_saved_mc();
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb..0920212e615 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/nmi.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/hardirq.h>
 #include <linux/slab.h>
@@ -29,6 +30,9 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/nmi.h>
+
 struct nmi_desc {
 	spinlock_t lock;
 	struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
 
 #define nmi_to_desc(type) (&nmi_desc[type])
 
+static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
+static int __init nmi_warning_debugfs(void)
+{
+	debugfs_create_u64("nmi_longest_ns", 0644,
+			arch_debugfs_dir, &nmi_longest_ns);
+	return 0;
+}
+fs_initcall(nmi_warning_debugfs);
+
 static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
 	 * can be latched at any given time.  Walk the whole list
 	 * to handle those situations.
 	 */
-	list_for_each_entry_rcu(a, &desc->head, list)
-		handled += a->handler(type, regs);
+	list_for_each_entry_rcu(a, &desc->head, list) {
+		u64 before, delta, whole_msecs;
+		int decimal_msecs, thishandled;
+
+		before = local_clock();
+		thishandled = a->handler(type, regs);
+		handled += thishandled;
+		delta = local_clock() - before;
+		trace_nmi_handler(a->handler, (int)delta, thishandled);
+
+		if (delta < nmi_longest_ns)
+			continue;
+
+		nmi_longest_ns = delta;
+		whole_msecs = do_div(delta, (1000 * 1000));
+		decimal_msecs = do_div(delta, 1000) % 1000;
+		printk_ratelimited(KERN_INFO
+			"INFO: NMI handler (%ps) took too long to run: "
+			"%lld.%03d msecs\n", a->handler, whole_msecs,
+			decimal_msecs);
+	}
 
 	rcu_read_unlock();
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7dfc7a..f8adefca71d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -110,11 +110,16 @@ void __show_regs(struct pt_regs *regs, int all)
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
 	get_debugreg(d3, 3);
-	printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
-			d0, d1, d2, d3);
-
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
+
+	/* Only print out debug registers if they are in their non-default state. */
+	if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+	    (d6 == DR6_RESERVED) && (d7 == 0x400))
+		return;
+
+	printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+			d0, d1, d2, d3);
 	printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
 			d6, d7);
 }
@@ -147,7 +152,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		childregs->bp = arg;
 		childregs->orig_ax = -1;
 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 		p->fpu_counter = 0;
 		p->thread.io_bitmap_ptr = NULL;
 		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 355ae06dbf9..05646bab4ca 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -105,11 +105,18 @@ void __show_regs(struct pt_regs *regs, int all)
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
-	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
+
+	/* Only print out debug registers if they are in their non-default state. */
+	if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+	    (d6 == DR6_RESERVED) && (d7 == 0x400))
+		return;
+
+	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -176,7 +183,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		childregs->bp = arg;
 		childregs->orig_ax = -1;
 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
 		return 0;
 	}
 	*childregs = *current_pt_regs();
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 36818f8ec2b..e13f8e7c22a 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -186,7 +186,7 @@ identity_mapped:
 	movl	CP_PA_PGD(%ebx), %eax
 	movl	%eax, %cr3
 	movl	%cr0, %eax
-	orl	$(1<<31), %eax
+	orl	$X86_CR0_PG, %eax
 	movl	%eax, %cr0
 	lea	PAGE_SIZE(%edi), %esp
 	movl	%edi, %eax
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f2bb9c96720..3fd2c693e47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -151,21 +151,21 @@ identity_mapped:
 
 	testq	%r11, %r11
 	jnz 1f
-	xorq	%rax, %rax
-	xorq	%rbx, %rbx
-	xorq    %rcx, %rcx
-	xorq    %rdx, %rdx
-	xorq    %rsi, %rsi
-	xorq    %rdi, %rdi
-	xorq    %rbp, %rbp
-	xorq	%r8,  %r8
-	xorq	%r9,  %r9
-	xorq	%r10, %r10
-	xorq	%r11, %r11
-	xorq	%r12, %r12
-	xorq	%r13, %r13
-	xorq	%r14, %r14
-	xorq	%r15, %r15
+	xorl	%eax, %eax
+	xorl	%ebx, %ebx
+	xorl    %ecx, %ecx
+	xorl    %edx, %edx
+	xorl    %esi, %esi
+	xorl    %edi, %edi
+	xorl    %ebp, %ebp
+	xorl	%r8d, %r8d
+	xorl	%r9d, %r9d
+	xorl	%r10d, %r10d
+	xorl	%r11d, %r11d
+	xorl	%r12d, %r12d
+	xorl	%r13d, %r13d
+	xorl	%r14d, %r14d
+	xorl	%r15d, %r15d
 
 	ret
 
@@ -212,8 +212,8 @@ virtual_mapped:
 	/* Do the copies */
 swap_pages:
 	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
-	xorq	%rdi, %rdi
-	xorq	%rsi, %rsi
+	xorl	%edi, %edi
+	xorl	%esi, %esi
 	jmp	1f
 
 0:	/* top, read another word for the indirection page */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56f7fcfe7fa..e68709da825 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1040,8 +1040,6 @@ void __init setup_arch(char **cmdline_p)
 	/* max_low_pfn get updated here */
 	find_low_pfn_range();
 #else
-	num_physpages = max_pfn;
-
 	check_x2apic();
 
 	/* How many end-of-memory variables you have, grandma! */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e45..cf913587d4d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
 
 #include <asm/sigframe.h>
 
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS	__FIX_EFLAGS
-#endif
-
 #define COPY(x)			do {			\
 	get_user_ex(regs->x, &sc->x);			\
 } while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	if (!failed) {
 		/*
 		 * Clear the direction flag as per the ABI for function entry.
-		 */
-		regs->flags &= ~X86_EFLAGS_DF;
-		/*
+		 *
+		 * Clear RF when entering the signal handler, because
+		 * it might disable possible debug exception from the
+		 * signal handler.
+		 *
 		 * Clear TF when entering the signal handler, but
 		 * notify any tracer that was single-stepping it.
 		 * The tracer may want to single-step inside the
 		 * handler too.
 		 */
-		regs->flags &= ~X86_EFLAGS_TF;
+		regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
 	}
 	signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 48d2b7ded42..f4fe0b8879e 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
 #include <asm/proto.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
+#include <asm/trace/irq_vectors.h>
 /*
  *	Some notes on x86 processor bugs affecting SMP operation:
  *
@@ -249,32 +250,80 @@ finish:
 /*
  * Reschedule call back.
  */
-void smp_reschedule_interrupt(struct pt_regs *regs)
+static inline void __smp_reschedule_interrupt(void)
 {
-	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
 	scheduler_ipi();
+}
+
+void smp_reschedule_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+	__smp_reschedule_interrupt();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
 }
 
-void smp_call_function_interrupt(struct pt_regs *regs)
+void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+	trace_reschedule_entry(RESCHEDULE_VECTOR);
+	__smp_reschedule_interrupt();
+	trace_reschedule_exit(RESCHEDULE_VECTOR);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
+}
+
+static inline void call_function_entering_irq(void)
 {
 	ack_APIC_irq();
 	irq_enter();
+}
+
+static inline void __smp_call_function_interrupt(void)
+{
 	generic_smp_call_function_interrupt();
 	inc_irq_stat(irq_call_count);
-	irq_exit();
 }
 
-void smp_call_function_single_interrupt(struct pt_regs *regs)
+void smp_call_function_interrupt(struct pt_regs *regs)
+{
+	call_function_entering_irq();
+	__smp_call_function_interrupt();
+	exiting_irq();
+}
+
+void smp_trace_call_function_interrupt(struct pt_regs *regs)
+{
+	call_function_entering_irq();
+	trace_call_function_entry(CALL_FUNCTION_VECTOR);
+	__smp_call_function_interrupt();
+	trace_call_function_exit(CALL_FUNCTION_VECTOR);
+	exiting_irq();
+}
+
+static inline void __smp_call_function_single_interrupt(void)
 {
-	ack_APIC_irq();
-	irq_enter();
 	generic_smp_call_function_single_interrupt();
 	inc_irq_stat(irq_call_count);
-	irq_exit();
+}
+
+void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+	call_function_entering_irq();
+	__smp_call_function_single_interrupt();
+	exiting_irq();
+}
+
+void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+{
+	call_function_entering_irq();
+	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
+	__smp_call_function_single_interrupt();
+	trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
+	exiting_irq();
 }
 
 static int __init nonmi_ipi_setup(char *str)
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f84fe00fad4..3ff42d2f046 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -31,6 +31,7 @@
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/tboot.h>
+#include <linux/debugfs.h>
 
 #include <asm/realmode.h>
 #include <asm/processor.h>
@@ -338,6 +339,73 @@ static struct notifier_block tboot_cpu_notifier __cpuinitdata =
 	.notifier_call = tboot_cpu_callback,
 };
 
+#ifdef CONFIG_DEBUG_FS
+
+#define TBOOT_LOG_UUID	{ 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
+			  0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 }
+
+#define TBOOT_SERIAL_LOG_ADDR	0x60000
+#define TBOOT_SERIAL_LOG_SIZE	0x08000
+#define LOG_MAX_SIZE_OFF	16
+#define LOG_BUF_OFF		24
+
+static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID;
+
+static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
+{
+	void __iomem *log_base;
+	u8 log_uuid[16];
+	u32 max_size;
+	void *kbuf;
+	int ret = -EFAULT;
+
+	log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
+	if (!log_base)
+		return ret;
+
+	memcpy_fromio(log_uuid, log_base, sizeof(log_uuid));
+	if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid)))
+		goto err_iounmap;
+
+	max_size = readl(log_base + LOG_MAX_SIZE_OFF);
+	if (*ppos >= max_size) {
+		ret = 0;
+		goto err_iounmap;
+	}
+
+	if (*ppos + count > max_size)
+		count = max_size - *ppos;
+
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf) {
+		ret = -ENOMEM;
+		goto err_iounmap;
+	}
+
+	memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count);
+	if (copy_to_user(user_buf, kbuf, count))
+		goto err_kfree;
+
+	*ppos += count;
+
+	ret = count;
+
+err_kfree:
+	kfree(kbuf);
+
+err_iounmap:
+	iounmap(log_base);
+
+	return ret;
+}
+
+static const struct file_operations tboot_log_fops = {
+	.read	= tboot_log_read,
+	.llseek	= default_llseek,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
 static __init int tboot_late_init(void)
 {
 	if (!tboot_enabled())
@@ -348,6 +416,11 @@ static __init int tboot_late_init(void)
 	atomic_set(&ap_wfs_count, 0);
 	register_hotcpu_notifier(&tboot_cpu_notifier);
 
+#ifdef CONFIG_DEBUG_FS
+	debugfs_create_file("tboot_log", S_IRUSR,
+			arch_debugfs_dir, NULL, &tboot_log_fops);
+#endif
+
 	acpi_os_set_prepare_sleep(&tboot_sleep);
 	return 0;
 }
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
new file mode 100644
index 00000000000..4e584a8d6ed
--- /dev/null
+++ b/arch/x86/kernel/tracepoint.c
@@ -0,0 +1,61 @@
+/*
+ * Code for supporting irq vector tracepoints.
+ *
+ * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
+ *
+ */
+#include <asm/hw_irq.h>
+#include <asm/desc.h>
+#include <linux/atomic.h>
+
+atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+				(unsigned long) trace_idt_table };
+
+#ifndef CONFIG_X86_64
+gate_desc trace_idt_table[NR_VECTORS] __page_aligned_data
+					= { { { { 0, 0 } } }, };
+#endif
+
+static int trace_irq_vector_refcount;
+static DEFINE_MUTEX(irq_vector_mutex);
+
+static void set_trace_idt_ctr(int val)
+{
+	atomic_set(&trace_idt_ctr, val);
+	/* Ensure the trace_idt_ctr is set before sending IPI */
+	wmb();
+}
+
+static void switch_idt(void *arg)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	load_current_idt();
+	local_irq_restore(flags);
+}
+
+void trace_irq_vector_regfunc(void)
+{
+	mutex_lock(&irq_vector_mutex);
+	if (!trace_irq_vector_refcount) {
+		set_trace_idt_ctr(1);
+		smp_call_function(switch_idt, NULL, 0);
+		switch_idt(NULL);
+	}
+	trace_irq_vector_refcount++;
+	mutex_unlock(&irq_vector_mutex);
+}
+
+void trace_irq_vector_unregfunc(void)
+{
+	mutex_lock(&irq_vector_mutex);
+	trace_irq_vector_refcount--;
+	if (!trace_irq_vector_refcount) {
+		set_trace_idt_ctr(0);
+		smp_call_function(switch_idt, NULL, 0);
+		switch_idt(NULL);
+	}
+	mutex_unlock(&irq_vector_mutex);
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a846de..b0865e88d3c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -254,6 +254,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_DF;
 
+#ifdef CONFIG_DOUBLEFAULT
+	df_debug(regs, error_code);
+#endif
 	/*
 	 * This is always a kernel trap and never fixable (and thus must
 	 * never return).
@@ -437,7 +440,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
-	if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+	if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
 							SIGTRAP) == NOTIFY_STOP)
 		goto exit;
 
@@ -785,7 +788,7 @@ void __init trap_init(void)
 	x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
-	memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
+	memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
 	set_nmi_gate(X86_TRAP_DB, &debug);
 	set_nmi_gate(X86_TRAP_BP, &int3);
 #endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ada87a329ed..d6c28acdf99 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -243,7 +243,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_WRITE, buf, size))
 		return -EACCES;
 
-	if (!HAVE_HWFP)
+	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(current, NULL, 0,
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
@@ -350,11 +350,10 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!used_math() && init_fpu(tsk))
 		return -1;
 
-	if (!HAVE_HWFP) {
+	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
 				       NULL, buf) != 0;
-	}
 
 	if (use_xsave()) {
 		struct _fpx_sw_bytes fx_sw_user;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d8404..bf4fb04d011 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
-kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o eventfd.o \
-				irqchip.o)
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
-				assigned-dev.o iommu.o)
-kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
+KVM := ../../../virt/kvm
+
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+				$(KVM)/eventfd.o $(KVM)/irqchip.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
+kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o cpuid.o pmu.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5953dcea752..2bc1e81045b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -61,6 +61,8 @@
 #define OpMem8            26ull  /* 8-bit zero extended memory operand */
 #define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
 #define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
+#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
+#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
 
 #define OpBits             5  /* Width of operand field */
 #define OpMask             ((1ull << OpBits) - 1)
@@ -86,6 +88,7 @@
 #define DstMem64    (OpMem64 << DstShift)
 #define DstImmUByte (OpImmUByte << DstShift)
 #define DstDX       (OpDX << DstShift)
+#define DstAccLo    (OpAccLo << DstShift)
 #define DstMask     (OpMask << DstShift)
 /* Source operand type. */
 #define SrcShift    6
@@ -108,6 +111,7 @@
 #define SrcImm64    (OpImm64 << SrcShift)
 #define SrcDX       (OpDX << SrcShift)
 #define SrcMem8     (OpMem8 << SrcShift)
+#define SrcAccHi    (OpAccHi << SrcShift)
 #define SrcMask     (OpMask << SrcShift)
 #define BitOp       (1<<11)
 #define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
@@ -138,6 +142,7 @@
 /* Source 2 operand type */
 #define Src2Shift   (31)
 #define Src2None    (OpNone << Src2Shift)
+#define Src2Mem     (OpMem << Src2Shift)
 #define Src2CL      (OpCL << Src2Shift)
 #define Src2ImmByte (OpImmByte << Src2Shift)
 #define Src2One     (OpOne << Src2Shift)
@@ -155,6 +160,9 @@
 #define Avx         ((u64)1 << 43)  /* Advanced Vector Extensions */
 #define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
 #define NoWrite     ((u64)1 << 45)  /* No writeback */
+#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
+
+#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
 #define X2(x...) x, x
 #define X3(x...) X2(x), x
@@ -171,10 +179,11 @@
 /*
  * fastop functions have a special calling convention:
  *
- * dst:    [rdx]:rax  (in/out)
- * src:    rbx        (in/out)
+ * dst:    rax        (in/out)
+ * src:    rdx        (in/out)
  * src2:   rcx        (in)
  * flags:  rflags     (in/out)
+ * ex:     rsi        (in:fastop pointer, out:zero if exception)
  *
  * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
  * different operand sizes can be reached by calculation, rather than a jump
@@ -276,174 +285,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
 }
 
 /*
- * Instruction emulation:
- * Most instructions are emulated directly via a fragment of inline assembly
- * code. This allows us to save/restore EFLAGS and thus very easily pick up
- * any modified flags.
- */
-
-#if defined(CONFIG_X86_64)
-#define _LO32 "k"		/* force 32-bit operand */
-#define _STK  "%%rsp"		/* stack pointer */
-#elif defined(__i386__)
-#define _LO32 ""		/* force 32-bit operand */
-#define _STK  "%%esp"		/* stack pointer */
-#endif
-
-/*
  * These EFLAGS bits are restored from saved value during emulation, and
  * any changes are written back to the saved value after emulation.
  */
 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
 
-/* Before executing instruction: restore necessary bits in EFLAGS. */
-#define _PRE_EFLAGS(_sav, _msk, _tmp)					\
-	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
-	"movl %"_sav",%"_LO32 _tmp"; "                                  \
-	"push %"_tmp"; "                                                \
-	"push %"_tmp"; "                                                \
-	"movl %"_msk",%"_LO32 _tmp"; "                                  \
-	"andl %"_LO32 _tmp",("_STK"); "                                 \
-	"pushf; "                                                       \
-	"notl %"_LO32 _tmp"; "                                          \
-	"andl %"_LO32 _tmp",("_STK"); "                                 \
-	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
-	"pop  %"_tmp"; "                                                \
-	"orl  %"_LO32 _tmp",("_STK"); "                                 \
-	"popf; "                                                        \
-	"pop  %"_sav"; "
-
-/* After executing instruction: write-back necessary bits in EFLAGS. */
-#define _POST_EFLAGS(_sav, _msk, _tmp) \
-	/* _sav |= EFLAGS & _msk; */		\
-	"pushf; "				\
-	"pop  %"_tmp"; "			\
-	"andl %"_msk",%"_LO32 _tmp"; "		\
-	"orl  %"_LO32 _tmp",%"_sav"; "
-
 #ifdef CONFIG_X86_64
 #define ON64(x) x
 #else
 #define ON64(x)
 #endif
 
-#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype)	\
-	do {								\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "4", "2")			\
-			_op _suffix " %"_x"3,%1; "			\
-			_POST_EFLAGS("0", "4", "2")			\
-			: "=m" ((ctxt)->eflags),			\
-			  "+q" (*(_dsttype*)&(ctxt)->dst.val),		\
-			  "=&r" (_tmp)					\
-			: _y ((ctxt)->src.val), "i" (EFLAGS_MASK));	\
-	} while (0)
-
-
-/* Raw emulation: instruction has two explicit operands. */
-#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy)		\
-	do {								\
-		unsigned long _tmp;					\
-									\
-		switch ((ctxt)->dst.bytes) {				\
-		case 2:							\
-			____emulate_2op(ctxt,_op,_wx,_wy,"w",u16);	\
-			break;						\
-		case 4:							\
-			____emulate_2op(ctxt,_op,_lx,_ly,"l",u32);	\
-			break;						\
-		case 8:							\
-			ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
-			break;						\
-		}							\
-	} while (0)
-
-#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)		     \
-	do {								     \
-		unsigned long _tmp;					     \
-		switch ((ctxt)->dst.bytes) {				     \
-		case 1:							     \
-			____emulate_2op(ctxt,_op,_bx,_by,"b",u8);	     \
-			break;						     \
-		default:						     \
-			__emulate_2op_nobyte(ctxt, _op,			     \
-					     _wx, _wy, _lx, _ly, _qx, _qy);  \
-			break;						     \
-		}							     \
-	} while (0)
-
-/* Source operand is byte-sized and may be restricted to just %cl. */
-#define emulate_2op_SrcB(ctxt, _op)					\
-	__emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
-
-/* Source operand is byte, word, long or quad sized. */
-#define emulate_2op_SrcV(ctxt, _op)					\
-	__emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
-
-/* Source operand is word, long or quad sized. */
-#define emulate_2op_SrcV_nobyte(ctxt, _op)				\
-	__emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
-
-/* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(ctxt, _op, _suffix, _type)		\
-	do {								\
-		unsigned long _tmp;					\
-		_type _clv  = (ctxt)->src2.val;				\
-		_type _srcv = (ctxt)->src.val;				\
-		_type _dstv = (ctxt)->dst.val;				\
-									\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "5", "2")			\
-			_op _suffix " %4,%1 \n"				\
-			_POST_EFLAGS("0", "5", "2")			\
-			: "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
-			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)	\
-			);						\
-									\
-		(ctxt)->src2.val  = (unsigned long) _clv;		\
-		(ctxt)->src2.val = (unsigned long) _srcv;		\
-		(ctxt)->dst.val = (unsigned long) _dstv;		\
-	} while (0)
-
-#define emulate_2op_cl(ctxt, _op)					\
-	do {								\
-		switch ((ctxt)->dst.bytes) {				\
-		case 2:							\
-			__emulate_2op_cl(ctxt, _op, "w", u16);		\
-			break;						\
-		case 4:							\
-			__emulate_2op_cl(ctxt, _op, "l", u32);		\
-			break;						\
-		case 8:							\
-			ON64(__emulate_2op_cl(ctxt, _op, "q", ulong));	\
-			break;						\
-		}							\
-	} while (0)
-
-#define __emulate_1op(ctxt, _op, _suffix)				\
-	do {								\
-		unsigned long _tmp;					\
-									\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "3", "2")			\
-			_op _suffix " %1; "				\
-			_POST_EFLAGS("0", "3", "2")			\
-			: "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
-			  "=&r" (_tmp)					\
-			: "i" (EFLAGS_MASK));				\
-	} while (0)
-
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(ctxt, _op)						\
-	do {								\
-		switch ((ctxt)->dst.bytes) {				\
-		case 1:	__emulate_1op(ctxt, _op, "b"); break;		\
-		case 2:	__emulate_1op(ctxt, _op, "w"); break;		\
-		case 4:	__emulate_1op(ctxt, _op, "l"); break;		\
-		case 8:	ON64(__emulate_1op(ctxt, _op, "q")); break;	\
-		}							\
-	} while (0)
-
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 
 #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
@@ -462,7 +314,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 #define FOPNOP() FOP_ALIGN FOP_RET
 
 #define FOP1E(op,  dst) \
-	FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
+	FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
+
+#define FOP1EEX(op,  dst) \
+	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
 
 #define FASTOP1(op) \
 	FOP_START(op) \
@@ -472,24 +327,42 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	ON64(FOP1E(op##q, rax))	\
 	FOP_END
 
+/* 1-operand, using src2 (for MUL/DIV r/m) */
+#define FASTOP1SRC2(op, name) \
+	FOP_START(name) \
+	FOP1E(op, cl) \
+	FOP1E(op, cx) \
+	FOP1E(op, ecx) \
+	ON64(FOP1E(op, rcx)) \
+	FOP_END
+
+/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
+#define FASTOP1SRC2EX(op, name) \
+	FOP_START(name) \
+	FOP1EEX(op, cl) \
+	FOP1EEX(op, cx) \
+	FOP1EEX(op, ecx) \
+	ON64(FOP1EEX(op, rcx)) \
+	FOP_END
+
 #define FOP2E(op,  dst, src)	   \
 	FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
 
 #define FASTOP2(op) \
 	FOP_START(op) \
-	FOP2E(op##b, al, bl) \
-	FOP2E(op##w, ax, bx) \
-	FOP2E(op##l, eax, ebx) \
-	ON64(FOP2E(op##q, rax, rbx)) \
+	FOP2E(op##b, al, dl) \
+	FOP2E(op##w, ax, dx) \
+	FOP2E(op##l, eax, edx) \
+	ON64(FOP2E(op##q, rax, rdx)) \
 	FOP_END
 
 /* 2 operand, word only */
 #define FASTOP2W(op) \
 	FOP_START(op) \
 	FOPNOP() \
-	FOP2E(op##w, ax, bx) \
-	FOP2E(op##l, eax, ebx) \
-	ON64(FOP2E(op##q, rax, rbx)) \
+	FOP2E(op##w, ax, dx) \
+	FOP2E(op##l, eax, edx) \
+	ON64(FOP2E(op##q, rax, rdx)) \
 	FOP_END
 
 /* 2 operand, src is CL */
@@ -508,14 +381,17 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 #define FASTOP3WCL(op) \
 	FOP_START(op) \
 	FOPNOP() \
-	FOP3E(op##w, ax, bx, cl) \
-	FOP3E(op##l, eax, ebx, cl) \
-	ON64(FOP3E(op##q, rax, rbx, cl)) \
+	FOP3E(op##w, ax, dx, cl) \
+	FOP3E(op##l, eax, edx, cl) \
+	ON64(FOP3E(op##q, rax, rdx, cl)) \
 	FOP_END
 
 /* Special case for SETcc - 1 instruction per cc */
 #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
 
+asm(".global kvm_fastop_exception \n"
+    "kvm_fastop_exception: xor %esi, %esi; ret");
+
 FOP_START(setcc)
 FOP_SETCC(seto)
 FOP_SETCC(setno)
@@ -538,47 +414,6 @@ FOP_END;
 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
 FOP_END;
 
-#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
-	do {								\
-		unsigned long _tmp;					\
-		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\
-		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\
-									\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "5", "1")			\
-			"1: \n\t"					\
-			_op _suffix " %6; "				\
-			"2: \n\t"					\
-			_POST_EFLAGS("0", "5", "1")			\
-			".pushsection .fixup,\"ax\" \n\t"		\
-			"3: movb $1, %4 \n\t"				\
-			"jmp 2b \n\t"					\
-			".popsection \n\t"				\
-			_ASM_EXTABLE(1b, 3b)				\
-			: "=m" ((ctxt)->eflags), "=&r" (_tmp),		\
-			  "+a" (*rax), "+d" (*rdx), "+qm"(_ex)		\
-			: "i" (EFLAGS_MASK), "m" ((ctxt)->src.val));	\
-	} while (0)
-
-/* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
-#define emulate_1op_rax_rdx(ctxt, _op, _ex)	\
-	do {								\
-		switch((ctxt)->src.bytes) {				\
-		case 1:							\
-			__emulate_1op_rax_rdx(ctxt, _op, "b", _ex);	\
-			break;						\
-		case 2:							\
-			__emulate_1op_rax_rdx(ctxt, _op, "w", _ex);	\
-			break;						\
-		case 4:							\
-			__emulate_1op_rax_rdx(ctxt, _op, "l", _ex);	\
-			break;						\
-		case 8: ON64(						\
-			__emulate_1op_rax_rdx(ctxt, _op, "q", _ex));	\
-			break;						\
-		}							\
-	} while (0)
-
 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
 				    enum x86_intercept intercept,
 				    enum x86_intercept_stage stage)
@@ -988,6 +823,11 @@ FASTOP2(xor);
 FASTOP2(cmp);
 FASTOP2(test);
 
+FASTOP1SRC2(mul, mul_ex);
+FASTOP1SRC2(imul, imul_ex);
+FASTOP1SRC2EX(div, div_ex);
+FASTOP1SRC2EX(idiv, idiv_ex);
+
 FASTOP3WCL(shld);
 FASTOP3WCL(shrd);
 
@@ -1013,6 +853,8 @@ FASTOP2W(bts);
 FASTOP2W(btr);
 FASTOP2W(btc);
 
+FASTOP2(xadd);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1726,45 +1568,42 @@ static void write_register_operand(struct operand *op)
 	}
 }
 
-static int writeback(struct x86_emulate_ctxt *ctxt)
+static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
 {
 	int rc;
 
-	if (ctxt->d & NoWrite)
-		return X86EMUL_CONTINUE;
-
-	switch (ctxt->dst.type) {
+	switch (op->type) {
 	case OP_REG:
-		write_register_operand(&ctxt->dst);
+		write_register_operand(op);
 		break;
 	case OP_MEM:
 		if (ctxt->lock_prefix)
 			rc = segmented_cmpxchg(ctxt,
-					       ctxt->dst.addr.mem,
-					       &ctxt->dst.orig_val,
-					       &ctxt->dst.val,
-					       ctxt->dst.bytes);
+					       op->addr.mem,
+					       &op->orig_val,
+					       &op->val,
+					       op->bytes);
 		else
 			rc = segmented_write(ctxt,
-					     ctxt->dst.addr.mem,
-					     &ctxt->dst.val,
-					     ctxt->dst.bytes);
+					     op->addr.mem,
+					     &op->val,
+					     op->bytes);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
 	case OP_MEM_STR:
 		rc = segmented_write(ctxt,
-				ctxt->dst.addr.mem,
-				ctxt->dst.data,
-				ctxt->dst.bytes * ctxt->dst.count);
+				op->addr.mem,
+				op->data,
+				op->bytes * op->count);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
 	case OP_XMM:
-		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
+		write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
 		break;
 	case OP_MM:
-		write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
+		write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
 		break;
 	case OP_NONE:
 		/* no writeback */
@@ -2117,42 +1956,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
-static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
-{
-	u8 ex = 0;
-
-	emulate_1op_rax_rdx(ctxt, "mul", ex);
-	return X86EMUL_CONTINUE;
-}
-
-static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
-{
-	u8 ex = 0;
-
-	emulate_1op_rax_rdx(ctxt, "imul", ex);
-	return X86EMUL_CONTINUE;
-}
-
-static int em_div_ex(struct x86_emulate_ctxt *ctxt)
-{
-	u8 de = 0;
-
-	emulate_1op_rax_rdx(ctxt, "div", de);
-	if (de)
-		return emulate_de(ctxt);
-	return X86EMUL_CONTINUE;
-}
-
-static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
-{
-	u8 de = 0;
-
-	emulate_1op_rax_rdx(ctxt, "idiv", de);
-	if (de)
-		return emulate_de(ctxt);
-	return X86EMUL_CONTINUE;
-}
-
 static int em_grp45(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = X86EMUL_CONTINUE;
@@ -3734,10 +3537,10 @@ static const struct opcode group3[] = {
 	F(DstMem | SrcImm | NoWrite, em_test),
 	F(DstMem | SrcNone | Lock, em_not),
 	F(DstMem | SrcNone | Lock, em_neg),
-	I(SrcMem, em_mul_ex),
-	I(SrcMem, em_imul_ex),
-	I(SrcMem, em_div_ex),
-	I(SrcMem, em_idiv_ex),
+	F(DstXacc | Src2Mem, em_mul_ex),
+	F(DstXacc | Src2Mem, em_imul_ex),
+	F(DstXacc | Src2Mem, em_div_ex),
+	F(DstXacc | Src2Mem, em_idiv_ex),
 };
 
 static const struct opcode group4[] = {
@@ -4064,7 +3867,7 @@ static const struct opcode twobyte_table[256] = {
 	F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
-	D2bv(DstMem | SrcReg | ModRM | Lock),
+	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
 	N, D(DstMem | SrcReg | ModRM | Mov),
 	N, N, N, GD(0, &group9),
 	/* 0xC8 - 0xCF */
@@ -4172,6 +3975,24 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		fetch_register_operand(op);
 		op->orig_val = op->val;
 		break;
+	case OpAccLo:
+		op->type = OP_REG;
+		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
+		fetch_register_operand(op);
+		op->orig_val = op->val;
+		break;
+	case OpAccHi:
+		if (ctxt->d & ByteOp) {
+			op->type = OP_NONE;
+			break;
+		}
+		op->type = OP_REG;
+		op->bytes = ctxt->op_bytes;
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
+		fetch_register_operand(op);
+		op->orig_val = op->val;
+		break;
 	case OpDI:
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
@@ -4553,11 +4374,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
-	fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+	if (!(ctxt->d & ByteOp))
+		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
-	    : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
-	: "c"(ctxt->src2.val), [fastop]"S"(fop));
+	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
+	      [fastop]"+S"(fop)
+	    : "c"(ctxt->src2.val));
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
+	if (!fop) /* exception is returned in fop variable */
+		return emulate_de(ctxt);
 	return X86EMUL_CONTINUE;
 }
 
@@ -4773,9 +4598,17 @@ special_insn:
 		goto done;
 
 writeback:
-	rc = writeback(ctxt);
-	if (rc != X86EMUL_CONTINUE)
-		goto done;
+	if (!(ctxt->d & NoWrite)) {
+		rc = writeback(ctxt, &ctxt->dst);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
+	if (ctxt->d & SrcWrite) {
+		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
+		rc = writeback(ctxt, &ctxt->src);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
 
 	/*
 	 * restore dst type in case the decoding will be reused
@@ -4872,12 +4705,6 @@ twobyte_insn:
 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
 							(s16) ctxt->src.val;
 		break;
-	case 0xc0 ... 0xc1:	/* xadd */
-		fastop(ctxt, em_add);
-		/* Write back the register source. */
-		ctxt->src.val = ctxt->dst.orig_val;
-		write_register_operand(&ctxt->src);
-		break;
 	case 0xc3:		/* movnti */
 		ctxt->dst.bytes = ctxt->op_bytes;
 		ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0eee2c8b64d..afc11245827 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1608,8 +1608,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 		return;
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
-		if (kvm_apic_local_deliver(apic, APIC_LVTT))
-			atomic_dec(&apic->lapic_timer.pending);
+		kvm_apic_local_deliver(apic, APIC_LVTT);
+		atomic_set(&apic->lapic_timer.pending, 0);
 	}
 }
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 004cc87b781..0d094da4954 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -197,15 +197,63 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
-static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access)
+/*
+ * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
+ * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
+ * number.
+ */
+#define MMIO_SPTE_GEN_LOW_SHIFT		3
+#define MMIO_SPTE_GEN_HIGH_SHIFT	52
+
+#define MMIO_GEN_SHIFT			19
+#define MMIO_GEN_LOW_SHIFT		9
+#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 1)
+#define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
+#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
+
+static u64 generation_mmio_spte_mask(unsigned int gen)
 {
-	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
+	u64 mask;
+
+	WARN_ON(gen > MMIO_MAX_GEN);
+
+	mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
+	mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
+	return mask;
+}
+
+static unsigned int get_mmio_spte_generation(u64 spte)
+{
+	unsigned int gen;
+
+	spte &= ~shadow_mmio_mask;
+
+	gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK;
+	gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT;
+	return gen;
+}
+
+static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+{
+	/*
+	 * Init kvm generation close to MMIO_MAX_GEN to easily test the
+	 * code of handling generation number wrap-around.
+	 */
+	return (kvm_memslots(kvm)->generation +
+		      MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+}
+
+static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
+			   unsigned access)
+{
+	unsigned int gen = kvm_current_mmio_generation(kvm);
+	u64 mask = generation_mmio_spte_mask(gen);
 
 	access &= ACC_WRITE_MASK | ACC_USER_MASK;
+	mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
 
-	sp->mmio_cached = true;
-	trace_mark_mmio_spte(sptep, gfn, access);
-	mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT);
+	trace_mark_mmio_spte(sptep, gfn, access, gen);
+	mmu_spte_set(sptep, mask);
 }
 
 static bool is_mmio_spte(u64 spte)
@@ -215,24 +263,38 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT;
+	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	return (spte & ~shadow_mmio_mask) & ~PAGE_MASK;
+	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	return (spte & ~mask) & ~PAGE_MASK;
 }
 
-static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access)
+static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+			  pfn_t pfn, unsigned access)
 {
 	if (unlikely(is_noslot_pfn(pfn))) {
-		mark_mmio_spte(sptep, gfn, access);
+		mark_mmio_spte(kvm, sptep, gfn, access);
 		return true;
 	}
 
 	return false;
 }
 
+static bool check_mmio_spte(struct kvm *kvm, u64 spte)
+{
+	unsigned int kvm_gen, spte_gen;
+
+	kvm_gen = kvm_current_mmio_generation(kvm);
+	spte_gen = get_mmio_spte_generation(spte);
+
+	trace_check_mmio_spte(spte, kvm_gen, spte_gen);
+	return likely(kvm_gen == spte_gen);
+}
+
 static inline u64 rsvd_bits(int s, int e)
 {
 	return ((1ULL << (e - s + 1)) - 1) << s;
@@ -404,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
 /*
  * The idea using the light way get the spte on x86_32 guest is from
  * gup_get_pte(arch/x86/mm/gup.c).
- * The difference is we can not catch the spte tlb flush if we leave
- * guest mode, so we emulate it by increase clear_spte_count when spte
- * is cleared.
+ *
+ * An spte tlb flush may be pending, because kvm_set_pte_rmapp
+ * coalesces them and we are running out of the MMU lock.  Therefore
+ * we need to protect against in-progress updates of the spte.
+ *
+ * Reading the spte while an update is in progress may get the old value
+ * for the high part of the spte.  The race is fine for a present->non-present
+ * change (because the high part of the spte is ignored for non-present spte),
+ * but for a present->present change we must reread the spte.
+ *
+ * All such changes are done in two steps (present->non-present and
+ * non-present->present), hence it is enough to count the number of
+ * present->non-present updates: if it changed while reading the spte,
+ * we might have hit the race.  This is done using clear_spte_count.
  */
 static u64 __get_spte_lockless(u64 *sptep)
 {
@@ -1511,6 +1584,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	if (!direct)
 		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+	/*
+	 * The active_mmu_pages list is the FIFO list, do not move the
+	 * page until it is zapped. kvm_zap_obsolete_pages depends on
+	 * this feature. See the comments in kvm_zap_obsolete_pages().
+	 */
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	sp->parent_ptes = 0;
 	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
@@ -1648,6 +1727,16 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list);
 
+/*
+ * NOTE: we should pay more attention on the zapped-obsolete page
+ * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
+ * since it has been deleted from active_mmu_pages but still can be found
+ * at hast list.
+ *
+ * for_each_gfn_indirect_valid_sp has skipped that kind of page and
+ * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
+ * all the obsolete pages.
+ */
 #define for_each_gfn_sp(_kvm, _sp, _gfn)				\
 	hlist_for_each_entry(_sp,					\
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
@@ -1838,6 +1927,11 @@ static void clear_sp_write_flooding_count(u64 *spte)
 	__clear_sp_write_flooding_count(sp);
 }
 
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -1864,6 +1958,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		role.quadrant = quadrant;
 	}
 	for_each_gfn_sp(vcpu->kvm, sp, gfn) {
+		if (is_obsolete_sp(vcpu->kvm, sp))
+			continue;
+
 		if (!need_sync && sp->unsync)
 			need_sync = true;
 
@@ -1900,6 +1997,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
 		account_shadowed(vcpu->kvm, gfn);
 	}
+	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
 	init_shadow_page_table(sp);
 	trace_kvm_mmu_get_page(sp, true);
 	return sp;
@@ -2070,8 +2168,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
 	kvm_mmu_page_unlink_children(kvm, sp);
 	kvm_mmu_unlink_parents(kvm, sp);
+
 	if (!sp->role.invalid && !sp->role.direct)
 		unaccount_shadowed(kvm, sp->gfn);
+
 	if (sp->unsync)
 		kvm_unlink_unsync_page(kvm, sp);
 	if (!sp->root_count) {
@@ -2081,7 +2181,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 		kvm_mod_used_mmu_pages(kvm, -1);
 	} else {
 		list_move(&sp->link, &kvm->arch.active_mmu_pages);
-		kvm_reload_remote_mmus(kvm);
+
+		/*
+		 * The obsolete pages can not be used on any vcpus.
+		 * See the comments in kvm_mmu_invalidate_zap_all_pages().
+		 */
+		if (!sp->role.invalid && !is_obsolete_sp(kvm, sp))
+			kvm_reload_remote_mmus(kvm);
 	}
 
 	sp->role.invalid = 1;
@@ -2331,7 +2437,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	u64 spte;
 	int ret = 0;
 
-	if (set_mmio_spte(sptep, gfn, pfn, pte_access))
+	if (set_mmio_spte(vcpu->kvm, sptep, gfn, pfn, pte_access))
 		return 0;
 
 	spte = PT_PRESENT_MASK;
@@ -2869,22 +2975,25 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
-	spin_lock(&vcpu->kvm->mmu_lock);
+
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
 	    (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
 	     vcpu->arch.mmu.direct_map)) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
+		spin_lock(&vcpu->kvm->mmu_lock);
 		sp = page_header(root);
 		--sp->root_count;
 		if (!sp->root_count && sp->role.invalid) {
 			kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
 			kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 		}
-		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 		spin_unlock(&vcpu->kvm->mmu_lock);
+		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 		return;
 	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -3148,17 +3257,12 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
 	return spte;
 }
 
-/*
- * If it is a real mmio page fault, return 1 and emulat the instruction
- * directly, return 0 to let CPU fault again on the address, -1 is
- * returned if bug is detected.
- */
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
 
 	if (quickly_check_mmio_pf(vcpu, addr, direct))
-		return 1;
+		return RET_MMIO_PF_EMULATE;
 
 	spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
 
@@ -3166,12 +3270,15 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 		gfn_t gfn = get_mmio_spte_gfn(spte);
 		unsigned access = get_mmio_spte_access(spte);
 
+		if (!check_mmio_spte(vcpu->kvm, spte))
+			return RET_MMIO_PF_INVALID;
+
 		if (direct)
 			addr = 0;
 
 		trace_handle_mmio_page_fault(addr, gfn, access);
 		vcpu_cache_mmio_info(vcpu, addr, gfn, access);
-		return 1;
+		return RET_MMIO_PF_EMULATE;
 	}
 
 	/*
@@ -3179,13 +3286,13 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	 * it's a BUG if the gfn is not a mmio page.
 	 */
 	if (direct && !check_direct_spte_mmio_pf(spte))
-		return -1;
+		return RET_MMIO_PF_BUG;
 
 	/*
 	 * If the page table is zapped by other cpus, let CPU fault again on
 	 * the address.
 	 */
-	return 0;
+	return RET_MMIO_PF_RETRY;
 }
 EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common);
 
@@ -3195,7 +3302,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
 	int ret;
 
 	ret = handle_mmio_page_fault_common(vcpu, addr, direct);
-	WARN_ON(ret < 0);
+	WARN_ON(ret == RET_MMIO_PF_BUG);
 	return ret;
 }
 
@@ -3207,8 +3314,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 
 	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
 
-	if (unlikely(error_code & PFERR_RSVD_MASK))
-		return handle_mmio_page_fault(vcpu, gva, error_code, true);
+	if (unlikely(error_code & PFERR_RSVD_MASK)) {
+		r = handle_mmio_page_fault(vcpu, gva, error_code, true);
+
+		if (likely(r != RET_MMIO_PF_INVALID))
+			return r;
+	}
 
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
@@ -3284,8 +3395,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
-	if (unlikely(error_code & PFERR_RSVD_MASK))
-		return handle_mmio_page_fault(vcpu, gpa, error_code, true);
+	if (unlikely(error_code & PFERR_RSVD_MASK)) {
+		r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
+
+		if (likely(r != RET_MMIO_PF_INVALID))
+			return r;
+	}
 
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
@@ -3391,8 +3506,8 @@ static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
 	*access &= mask;
 }
 
-static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
-			   int *nr_present)
+static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
+			   unsigned access, int *nr_present)
 {
 	if (unlikely(is_mmio_spte(*sptep))) {
 		if (gfn != get_mmio_spte_gfn(*sptep)) {
@@ -3401,7 +3516,7 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
 		}
 
 		(*nr_present)++;
-		mark_mmio_spte(sptep, gfn, access);
+		mark_mmio_spte(kvm, sptep, gfn, access);
 		return true;
 	}
 
@@ -3764,9 +3879,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	if (r)
 		goto out;
 	r = mmu_alloc_roots(vcpu);
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_sync_roots(vcpu);
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_sync_roots(vcpu);
 	if (r)
 		goto out;
 	/* set_cr3() should ensure TLB has been flushed */
@@ -4179,39 +4292,107 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 	spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
+#define BATCH_ZAP_PAGES	10
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
 	struct kvm_mmu_page *sp, *node;
-	LIST_HEAD(invalid_list);
+	int batch = 0;
 
-	spin_lock(&kvm->mmu_lock);
 restart:
-	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
-		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
+	list_for_each_entry_safe_reverse(sp, node,
+	      &kvm->arch.active_mmu_pages, link) {
+		int ret;
+
+		/*
+		 * No obsolete page exists before new created page since
+		 * active_mmu_pages is the FIFO list.
+		 */
+		if (!is_obsolete_sp(kvm, sp))
+			break;
+
+		/*
+		 * Since we are reversely walking the list and the invalid
+		 * list will be moved to the head, skip the invalid page
+		 * can help us to avoid the infinity list walking.
+		 */
+		if (sp->role.invalid)
+			continue;
+
+		/*
+		 * Need not flush tlb since we only zap the sp with invalid
+		 * generation number.
+		 */
+		if (batch >= BATCH_ZAP_PAGES &&
+		      cond_resched_lock(&kvm->mmu_lock)) {
+			batch = 0;
+			goto restart;
+		}
+
+		ret = kvm_mmu_prepare_zap_page(kvm, sp,
+				&kvm->arch.zapped_obsolete_pages);
+		batch += ret;
+
+		if (ret)
 			goto restart;
+	}
 
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
-	spin_unlock(&kvm->mmu_lock);
+	/*
+	 * Should flush tlb before free page tables since lockless-walking
+	 * may use the pages.
+	 */
+	kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
 }
 
-void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
 {
-	struct kvm_mmu_page *sp, *node;
-	LIST_HEAD(invalid_list);
-
 	spin_lock(&kvm->mmu_lock);
-restart:
-	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-		if (!sp->mmio_cached)
-			continue;
-		if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
-			goto restart;
-	}
+	trace_kvm_mmu_invalidate_zap_all_pages(kvm);
+	kvm->arch.mmu_valid_gen++;
 
-	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	/*
+	 * Notify all vcpus to reload its shadow page table
+	 * and flush TLB. Then all vcpus will switch to new
+	 * shadow page table with the new mmu_valid_gen.
+	 *
+	 * Note: we should do this under the protection of
+	 * mmu-lock, otherwise, vcpu would purge shadow page
+	 * but miss tlb flush.
+	 */
+	kvm_reload_remote_mmus(kvm);
+
+	kvm_zap_obsolete_pages(kvm);
 	spin_unlock(&kvm->mmu_lock);
 }
 
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
+{
+	/*
+	 * The very rare case: if the generation-number is round,
+	 * zap all shadow pages.
+	 *
+	 * The max value is MMIO_MAX_GEN - 1 since it is not called
+	 * when mark memslot invalid.
+	 */
+	if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
+		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
+		kvm_mmu_invalidate_zap_all_pages(kvm);
+	}
+}
+
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct kvm *kvm;
@@ -4240,15 +4421,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 		 * want to shrink a VM that only started to populate its MMU
 		 * anyway.
 		 */
-		if (!kvm->arch.n_used_mmu_pages)
+		if (!kvm->arch.n_used_mmu_pages &&
+		      !kvm_has_zapped_obsolete_pages(kvm))
 			continue;
 
 		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 
+		if (kvm_has_zapped_obsolete_pages(kvm)) {
+			kvm_mmu_commit_zap_page(kvm,
+			      &kvm->arch.zapped_obsolete_pages);
+			goto unlock;
+		}
+
 		prepare_zap_oldest_mmu_page(kvm, &invalid_list);
 		kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
+unlock:
 		spin_unlock(&kvm->mmu_lock);
 		srcu_read_unlock(&kvm->srcu, idx);
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2adcbc2cac6..5b59c573aba 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -52,6 +52,23 @@
 
 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
+
+/*
+ * Return values of handle_mmio_page_fault_common:
+ * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+ *			directly.
+ * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+ *			fault path update the mmio spte.
+ * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+ * RET_MMIO_PF_BUG: bug is detected.
+ */
+enum {
+	RET_MMIO_PF_EMULATE = 1,
+	RET_MMIO_PF_INVALID = 2,
+	RET_MMIO_PF_RETRY = 0,
+	RET_MMIO_PF_BUG = -1
+};
+
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
@@ -97,4 +114,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
 	return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
 }
 
+void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
 #endif
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index b8f6172f417..9d2e0ffcb19 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -7,16 +7,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvmmmu
 
-#define KVM_MMU_PAGE_FIELDS \
-	__field(__u64, gfn) \
-	__field(__u32, role) \
-	__field(__u32, root_count) \
+#define KVM_MMU_PAGE_FIELDS			\
+	__field(unsigned long, mmu_valid_gen)	\
+	__field(__u64, gfn)			\
+	__field(__u32, role)			\
+	__field(__u32, root_count)		\
 	__field(bool, unsync)
 
-#define KVM_MMU_PAGE_ASSIGN(sp)			     \
-	__entry->gfn = sp->gfn;			     \
-	__entry->role = sp->role.word;		     \
-	__entry->root_count = sp->root_count;        \
+#define KVM_MMU_PAGE_ASSIGN(sp)				\
+	__entry->mmu_valid_gen = sp->mmu_valid_gen;	\
+	__entry->gfn = sp->gfn;				\
+	__entry->role = sp->role.word;			\
+	__entry->root_count = sp->root_count;		\
 	__entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
@@ -28,8 +30,8 @@
 								        \
 	role.word = __entry->role;					\
 									\
-	trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s"		\
-			 " %snxe root %u %s%c",				\
+	trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s"	\
+			 " %snxe root %u %s%c",	__entry->mmu_valid_gen,	\
 			 __entry->gfn, role.level,			\
 			 role.cr4_pae ? " pae" : "",			\
 			 role.quadrant,					\
@@ -197,23 +199,25 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
 
 TRACE_EVENT(
 	mark_mmio_spte,
-	TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
-	TP_ARGS(sptep, gfn, access),
+	TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen),
+	TP_ARGS(sptep, gfn, access, gen),
 
 	TP_STRUCT__entry(
 		__field(void *, sptep)
 		__field(gfn_t, gfn)
 		__field(unsigned, access)
+		__field(unsigned int, gen)
 	),
 
 	TP_fast_assign(
 		__entry->sptep = sptep;
 		__entry->gfn = gfn;
 		__entry->access = access;
+		__entry->gen = gen;
 	),
 
-	TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn,
-		  __entry->access)
+	TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep,
+		  __entry->gfn, __entry->access, __entry->gen)
 );
 
 TRACE_EVENT(
@@ -274,6 +278,50 @@ TRACE_EVENT(
 		  __spte_satisfied(old_spte), __spte_satisfied(new_spte)
 	)
 );
+
+TRACE_EVENT(
+	kvm_mmu_invalidate_zap_all_pages,
+	TP_PROTO(struct kvm *kvm),
+	TP_ARGS(kvm),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, mmu_valid_gen)
+		__field(unsigned int, mmu_used_pages)
+	),
+
+	TP_fast_assign(
+		__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
+		__entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
+	),
+
+	TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
+		  __entry->mmu_valid_gen, __entry->mmu_used_pages
+	)
+);
+
+
+TRACE_EVENT(
+	check_mmio_spte,
+	TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
+	TP_ARGS(spte, kvm_gen, spte_gen),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, kvm_gen)
+		__field(unsigned int, spte_gen)
+		__field(u64, spte)
+	),
+
+	TP_fast_assign(
+		__entry->kvm_gen = kvm_gen;
+		__entry->spte_gen = spte_gen;
+		__entry->spte = spte;
+	),
+
+	TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte,
+		  __entry->kvm_gen, __entry->spte_gen,
+		  __entry->kvm_gen == __entry->spte_gen
+	)
+);
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860b457..7769699d48a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
-	if (unlikely(error_code & PFERR_RSVD_MASK))
-		return handle_mmio_page_fault(vcpu, addr, error_code,
+	if (unlikely(error_code & PFERR_RSVD_MASK)) {
+		r = handle_mmio_page_fault(vcpu, addr, error_code,
 					      mmu_is_nested(vcpu));
+		if (likely(r != RET_MMIO_PF_INVALID))
+			return r;
+	};
 
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
@@ -792,7 +795,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		pte_access &= gpte_access(vcpu, gpte);
 		protect_clean_gpte(&pte_access, gpte);
 
-		if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
+		if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access,
+		      &nr_present))
 			continue;
 
 		if (gfn != sp->gfns[i]) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a14a6eaf871..c0bc80391e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1026,7 +1026,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 		g_tsc_offset = svm->vmcb->control.tsc_offset -
 			       svm->nested.hsave->control.tsc_offset;
 		svm->nested.hsave->control.tsc_offset = offset;
-	}
+	} else
+		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+					   svm->vmcb->control.tsc_offset,
+					   offset);
 
 	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
 
@@ -1044,6 +1047,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 	svm->vmcb->control.tsc_offset += adjustment;
 	if (is_guest_mode(vcpu))
 		svm->nested.hsave->control.tsc_offset += adjustment;
+	else
+		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+				     svm->vmcb->control.tsc_offset - adjustment,
+				     svm->vmcb->control.tsc_offset);
+
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index fe5e00ed703..545245d7cc6 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -756,6 +756,27 @@ TRACE_EVENT(
 		  __entry->gpa_match ? "GPA" : "GVA")
 );
 
+TRACE_EVENT(kvm_write_tsc_offset,
+	TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset,
+		 __u64 next_tsc_offset),
+	TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	vcpu_id				)
+		__field(	__u64,	previous_tsc_offset		)
+		__field(	__u64,	next_tsc_offset			)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id		= vcpu_id;
+		__entry->previous_tsc_offset	= previous_tsc_offset;
+		__entry->next_tsc_offset	= next_tsc_offset;
+	),
+
+	TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id,
+		  __entry->previous_tsc_offset, __entry->next_tsc_offset)
+);
+
 #ifdef CONFIG_X86_64
 
 #define host_clocks					\
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a9193955..a7e18551c96 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2096,6 +2096,8 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 			(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
 			 vmcs12->tsc_offset : 0));
 	} else {
+		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+					   vmcs_read64(TSC_OFFSET), offset);
 		vmcs_write64(TSC_OFFSET, offset);
 	}
 }
@@ -2103,11 +2105,14 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
 {
 	u64 offset = vmcs_read64(TSC_OFFSET);
+
 	vmcs_write64(TSC_OFFSET, offset + adjustment);
 	if (is_guest_mode(vcpu)) {
 		/* Even when running L2, the adjustment needs to apply to L1 */
 		to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
-	}
+	} else
+		trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
+					   offset + adjustment);
 }
 
 static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
@@ -4176,10 +4181,10 @@ static void ept_set_mmio_spte_mask(void)
 	/*
 	 * EPT Misconfigurations can be generated if the value of bits 2:0
 	 * of an EPT paging-structure entry is 110b (write/execute).
-	 * Also, magic bits (0xffull << 49) is set to quickly identify mmio
+	 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
 	 * spte.
 	 */
-	kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull);
+	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
 /*
@@ -5366,10 +5371,14 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 
 	ret = handle_mmio_page_fault_common(vcpu, gpa, true);
-	if (likely(ret == 1))
+	if (likely(ret == RET_MMIO_PF_EMULATE))
 		return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
 					      EMULATE_DONE;
-	if (unlikely(!ret))
+
+	if (unlikely(ret == RET_MMIO_PF_INVALID))
+		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
+
+	if (unlikely(ret == RET_MMIO_PF_RETRY))
 		return 1;
 
 	/* It is the real ept misconfig */
@@ -7942,7 +7951,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
-	vmx_set_rflags(vcpu, X86_EFLAGS_BIT1);
+	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	/*
 	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
 	 * actually changed, because it depends on the current state of
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c3418..d21bce50531 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -618,7 +618,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
 		return 1;
 
-	if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
+	if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
 		return 1;
 
 	if (is_long_mode(vcpu)) {
@@ -1193,20 +1193,37 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
 	if (vcpu->arch.virtual_tsc_khz) {
+		int faulted = 0;
+
 		/* n.b - signed multiplication and division required */
 		usdiff = data - kvm->arch.last_tsc_write;
 #ifdef CONFIG_X86_64
 		usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
 #else
 		/* do_div() only does unsigned */
-		asm("idivl %2; xor %%edx, %%edx"
-		: "=A"(usdiff)
-		: "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+		asm("1: idivl %[divisor]\n"
+		    "2: xor %%edx, %%edx\n"
+		    "   movl $0, %[faulted]\n"
+		    "3:\n"
+		    ".section .fixup,\"ax\"\n"
+		    "4: movl $1, %[faulted]\n"
+		    "   jmp  3b\n"
+		    ".previous\n"
+
+		_ASM_EXTABLE(1b, 4b)
+
+		: "=A"(usdiff), [faulted] "=r" (faulted)
+		: "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
+
 #endif
 		do_div(elapsed, 1000);
 		usdiff -= elapsed;
 		if (usdiff < 0)
 			usdiff = -usdiff;
+
+		/* idivl overflow => difference is larger than USEC_PER_SEC */
+		if (faulted)
+			usdiff = USEC_PER_SEC;
 	} else
 		usdiff = USEC_PER_SEC; /* disable TSC match window below */
 
@@ -1587,6 +1604,30 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	return 0;
 }
 
+/*
+ * kvmclock updates which are isolated to a given vcpu, such as
+ * vcpu->cpu migration, should not allow system_timestamp from
+ * the rest of the vcpus to remain static. Otherwise ntp frequency
+ * correction applies to one vcpu's system_timestamp but not
+ * the others.
+ *
+ * So in those cases, request a kvmclock update for all vcpus.
+ * The worst case for a remote vcpu to update its kvmclock
+ * is then bounded by maximum nohz sleep latency.
+ */
+
+static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+{
+	int i;
+	struct kvm *kvm = v->kvm;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+		kvm_vcpu_kick(vcpu);
+	}
+}
+
 static bool msr_mtrr_valid(unsigned msr)
 {
 	switch (msr) {
@@ -1984,7 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		kvmclock_reset(vcpu);
 
 		vcpu->arch.time = data;
-		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 
 		/* we verify if the enable bit is set... */
 		if (!(data & 1))
@@ -2701,7 +2742,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * kvmclock on vcpu->cpu migration
 		 */
 		if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
-			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+			kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 		if (vcpu->cpu != cpu)
 			kvm_migrate_timers(vcpu);
 		vcpu->cpu = cpu;
@@ -5238,7 +5279,13 @@ static void kvm_set_mmio_spte_mask(void)
 	 * Set the reserved bits and the present bit of an paging-structure
 	 * entry to generate page fault with PFER.RSV = 1.
 	 */
-	mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
+	 /* Mask the reserved physical address bits. */
+	mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
+
+	/* Bit 62 is always reserved for 32bit host. */
+	mask |= 0x3ull << 62;
+
+	/* Set the present bit. */
 	mask |= 1ull;
 
 #ifdef CONFIG_X86_64
@@ -5498,13 +5545,6 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 	char instruction[3];
 	unsigned long rip = kvm_rip_read(vcpu);
 
-	/*
-	 * Blow out the MMU to ensure that no other VCPU has an active mapping
-	 * to ensure that the updated hypercall appears atomically across all
-	 * VCPUs.
-	 */
-	kvm_mmu_zap_all(vcpu->kvm);
-
 	kvm_x86_ops->patch_hypercall(vcpu, instruction);
 
 	return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
@@ -5702,6 +5742,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			__kvm_migrate_timers(vcpu);
 		if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
 			kvm_gen_update_masterclock(vcpu->kvm);
+		if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
+			kvm_gen_kvmclock_update(vcpu);
 		if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
 			r = kvm_guest_time_update(vcpu);
 			if (unlikely(r))
@@ -6812,6 +6854,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		return -EINVAL;
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -7040,22 +7083,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	 * If memory slot is created, or moved, we need to clear all
 	 * mmio sptes.
 	 */
-	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-		kvm_mmu_zap_mmio_sptes(kvm);
-		kvm_reload_remote_mmus(kvm);
-	}
+	kvm_mmu_invalidate_mmio_sptes(kvm);
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
-	kvm_mmu_zap_all(kvm);
-	kvm_reload_remote_mmus(kvm);
+	kvm_mmu_invalidate_zap_all_pages(kvm);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
-	kvm_arch_flush_shadow_all(kvm);
+	kvm_mmu_invalidate_zap_all_pages(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -7263,3 +7302,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
index 94e0e54056a..8f38d577a2f 100644
--- a/arch/x86/lguest/Makefile
+++ b/arch/x86/lguest/Makefile
@@ -1,2 +1,2 @@
-obj-y		:= i386_head.o boot.o
+obj-y		:= head_32.o boot.o
 CFLAGS_boot.o	:= $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7114c63f047..d482bcaf61c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1410,7 +1410,7 @@ __init void lguest_init(void)
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 
 	/* Math is always hard! */
-	new_cpu_data.hard_math = 1;
+	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
 
 	/* We don't have features.  We have puppies!  Puppies! */
 #ifdef CONFIG_X86_MCE
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/head_32.S
index 6ddfe4fc23c..6ddfe4fc23c 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/head_32.S
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 252b8f5489b..4500142bc4a 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,6 +1,7 @@
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/swap.h> /* for totalram_pages */
+#include <linux/bootmem.h>
 
 void *kmap(struct page *page)
 {
@@ -121,6 +122,11 @@ void __init set_highmem_pages_init(void)
 	struct zone *zone;
 	int nid;
 
+	/*
+	 * Explicitly reset zone->managed_pages because set_highmem_pages_init()
+	 * is invoked before free_all_bootmem()
+	 */
+	reset_all_zones_managed_pages();
 	for_each_zone(zone) {
 		unsigned long zone_start_pfn, zone_end_pfn;
 
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index ae1aa71d011..7e73e8c6909 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -16,169 +16,6 @@
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 
-static unsigned long page_table_shareable(struct vm_area_struct *svma,
-				struct vm_area_struct *vma,
-				unsigned long addr, pgoff_t idx)
-{
-	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
-				svma->vm_start;
-	unsigned long sbase = saddr & PUD_MASK;
-	unsigned long s_end = sbase + PUD_SIZE;
-
-	/* Allow segments to share if only one is marked locked */
-	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
-	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
-
-	/*
-	 * match the virtual addresses, permission and the alignment of the
-	 * page table page.
-	 */
-	if (pmd_index(addr) != pmd_index(saddr) ||
-	    vm_flags != svm_flags ||
-	    sbase < svma->vm_start || svma->vm_end < s_end)
-		return 0;
-
-	return saddr;
-}
-
-static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
-{
-	unsigned long base = addr & PUD_MASK;
-	unsigned long end = base + PUD_SIZE;
-
-	/*
-	 * check on proper vm_flags and page table alignment
-	 */
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    vma->vm_start <= base && end <= vma->vm_end)
-		return 1;
-	return 0;
-}
-
-/*
- * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
- * and returns the corresponding pte. While this is not necessary for the
- * !shared pmd case because we can allocate the pmd later as well, it makes the
- * code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
- * racing tasks could either miss the sharing (see huge_pte_offset) or select a
- * bad pmd for sharing.
- */
-static pte_t *
-huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
-{
-	struct vm_area_struct *vma = find_vma(mm, addr);
-	struct address_space *mapping = vma->vm_file->f_mapping;
-	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
-			vma->vm_pgoff;
-	struct vm_area_struct *svma;
-	unsigned long saddr;
-	pte_t *spte = NULL;
-	pte_t *pte;
-
-	if (!vma_shareable(vma, addr))
-		return (pte_t *)pmd_alloc(mm, pud, addr);
-
-	mutex_lock(&mapping->i_mmap_mutex);
-	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
-		if (svma == vma)
-			continue;
-
-		saddr = page_table_shareable(svma, vma, addr, idx);
-		if (saddr) {
-			spte = huge_pte_offset(svma->vm_mm, saddr);
-			if (spte) {
-				get_page(virt_to_page(spte));
-				break;
-			}
-		}
-	}
-
-	if (!spte)
-		goto out;
-
-	spin_lock(&mm->page_table_lock);
-	if (pud_none(*pud))
-		pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
-	else
-		put_page(virt_to_page(spte));
-	spin_unlock(&mm->page_table_lock);
-out:
-	pte = (pte_t *)pmd_alloc(mm, pud, addr);
-	mutex_unlock(&mapping->i_mmap_mutex);
-	return pte;
-}
-
-/*
- * unmap huge page backed by shared pte.
- *
- * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
- * indicated by page_count > 1, unmap is achieved by clearing pud and
- * decrementing the ref count. If count == 1, the pte page is not shared.
- *
- * called with vma->vm_mm->page_table_lock held.
- *
- * returns: 1 successfully unmapped a shared pte page
- *	    0 the underlying pte page is not shared, or it is the last user
- */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
-	pgd_t *pgd = pgd_offset(mm, *addr);
-	pud_t *pud = pud_offset(pgd, *addr);
-
-	BUG_ON(page_count(virt_to_page(ptep)) == 0);
-	if (page_count(virt_to_page(ptep)) == 1)
-		return 0;
-
-	pud_clear(pud);
-	put_page(virt_to_page(ptep));
-	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
-	return 1;
-}
-
-pte_t *huge_pte_alloc(struct mm_struct *mm,
-			unsigned long addr, unsigned long sz)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pte_t *pte = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		if (sz == PUD_SIZE) {
-			pte = (pte_t *)pud;
-		} else {
-			BUG_ON(sz != PMD_SIZE);
-			if (pud_none(*pud))
-				pte = huge_pmd_share(mm, addr, pud);
-			else
-				pte = (pte_t *)pmd_alloc(mm, pud, addr);
-		}
-	}
-	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
-
-	return pte;
-}
-
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud)) {
-			if (pud_large(*pud))
-				return (pte_t *)pud;
-			pmd = pmd_offset(pud, addr);
-		}
-	}
-	return (pte_t *) pmd;
-}
-
 #if 0	/* This is just for testing */
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
@@ -240,30 +77,6 @@ int pud_huge(pud_t pud)
 	return !!(pud_val(pud) & _PAGE_PSE);
 }
 
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pmd);
-	if (page)
-		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-		pud_t *pud, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pud);
-	if (page)
-		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
 #endif
 
 /* x86_64 also uses this file */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1f34e921977..2ec29ac78ae 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -494,7 +494,6 @@ int devmem_is_allowed(unsigned long pagenr)
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
-	unsigned long addr;
 	unsigned long begin_aligned, end_aligned;
 
 	/* Make sure boundaries are page aligned */
@@ -509,8 +508,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	if (begin >= end)
 		return;
 
-	addr = begin;
-
 	/*
 	 * If debugging page accesses then do not free this memory but
 	 * mark them not present - any buggy init-section access will
@@ -529,18 +526,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 	set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
 	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
-	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-
-	for (; addr < end; addr += PAGE_SIZE) {
-		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
-		free_reserved_page(virt_to_page(addr));
-	}
+	free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);
 #endif
 }
 
 void free_initmem(void)
 {
-	free_init_pages("unused kernel memory",
+	free_init_pages("unused kernel",
 			(unsigned long)(&__init_begin),
 			(unsigned long)(&__init_end));
 }
@@ -566,7 +558,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 	 *   - relocate_initrd()
 	 * So here We can do PAGE_ALIGN() safely to get partial page to be freed
 	 */
-	free_init_pages("initrd memory", start, PAGE_ALIGN(end));
+	free_init_pages("initrd", start, PAGE_ALIGN(end));
 }
 #endif
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3ac7e319918..4287f1ffba7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -660,10 +660,8 @@ void __init initmem_init(void)
 		highstart_pfn = max_low_pfn;
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
-	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
@@ -671,7 +669,7 @@ void __init initmem_init(void)
 	sparse_memory_present_with_active_regions(0);
 
 #ifdef CONFIG_FLATMEM
-	max_mapnr = num_physpages;
+	max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
 #endif
 	__vmalloc_start_set = true;
 
@@ -739,9 +737,6 @@ static void __init test_wp_bit(void)
 
 void __init mem_init(void)
 {
-	int codesize, reservedpages, datasize, initsize;
-	int tmp;
-
 	pci_iommu_alloc();
 
 #ifdef CONFIG_FLATMEM
@@ -759,32 +754,11 @@ void __init mem_init(void)
 	set_highmem_pages_init();
 
 	/* this will put all low memory onto the freelists */
-	totalram_pages += free_all_bootmem();
-
-	reservedpages = 0;
-	for (tmp = 0; tmp < max_low_pfn; tmp++)
-		/*
-		 * Only count reserved RAM pages:
-		 */
-		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
-			reservedpages++;
+	free_all_bootmem();
 
 	after_bootmem = 1;
 
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
-			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		initsize >> 10,
-		totalhigh_pages << (PAGE_SHIFT-10));
-
+	mem_init_print_info(NULL);
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb00c4672ad..104d56a9245 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -368,7 +368,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
  *
  *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
  *
- * phys_addr holds the negative offset to the kernel, which is added
+ * phys_base holds the negative offset to the kernel, which is added
  * to the compile time generated pmds. This results in invalid pmds up
  * to the point where we hit the physaddr 0 mapping.
  *
@@ -712,36 +712,22 @@ EXPORT_SYMBOL_GPL(arch_add_memory);
 
 static void __meminit free_pagetable(struct page *page, int order)
 {
-	struct zone *zone;
-	bool bootmem = false;
 	unsigned long magic;
 	unsigned int nr_pages = 1 << order;
 
 	/* bootmem page has reserved flag */
 	if (PageReserved(page)) {
 		__ClearPageReserved(page);
-		bootmem = true;
 
 		magic = (unsigned long)page->lru.next;
 		if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 			while (nr_pages--)
 				put_page_bootmem(page++);
 		} else
-			__free_pages_bootmem(page, order);
+			while (nr_pages--)
+				free_reserved_page(page++);
 	} else
 		free_pages((unsigned long)page_address(page), order);
-
-	/*
-	 * SECTION_INFO pages and MIX_SECTION_INFO pages
-	 * are all allocated by bootmem.
-	 */
-	if (bootmem) {
-		zone = page_zone(page);
-		zone_span_writelock(zone);
-		zone->present_pages += nr_pages;
-		zone_span_writeunlock(zone);
-		totalram_pages += nr_pages;
-	}
 }
 
 static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
@@ -1058,9 +1044,6 @@ static void __init register_page_bootmem_info(void)
 
 void __init mem_init(void)
 {
-	long codesize, reservedpages, datasize, initsize;
-	unsigned long absent_pages;
-
 	pci_iommu_alloc();
 
 	/* clear_bss() already clear the empty_zero_page */
@@ -1068,29 +1051,14 @@ void __init mem_init(void)
 	register_page_bootmem_info();
 
 	/* this will put all memory onto the freelists */
-	totalram_pages = free_all_bootmem();
-
-	absent_pages = absent_pages_in_range(0, max_pfn);
-	reservedpages = max_pfn - totalram_pages - absent_pages;
+	free_all_bootmem();
 	after_bootmem = 1;
 
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
 	/* Register memory areas for /proc/kcore */
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
-	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
-			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		max_pfn << (PAGE_SHIFT-10),
-		codesize >> 10,
-		absent_pages << (PAGE_SHIFT-10),
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		initsize >> 10);
+	mem_init_print_info(NULL);
 }
 
 #ifdef CONFIG_DEBUG_RODATA
@@ -1166,11 +1134,10 @@ void mark_rodata_ro(void)
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 #endif
 
-	free_init_pages("unused kernel memory",
+	free_init_pages("unused kernel",
 			(unsigned long) __va(__pa_symbol(text_end)),
 			(unsigned long) __va(__pa_symbol(rodata_start)));
-
-	free_init_pages("unused kernel memory",
+	free_init_pages("unused kernel",
 			(unsigned long) __va(__pa_symbol(rodata_end)),
 			(unsigned long) __va(__pa_symbol(_sdata)));
 }
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9a1e6583910..0215e2c563e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -501,15 +501,15 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
 	}
 
 	if (slot < 0) {
-		printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
-			 (u64)phys_addr, size);
+		printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
+		       __func__, (u64)phys_addr, size);
 		WARN_ON(1);
 		return NULL;
 	}
 
 	if (early_ioremap_debug) {
-		printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
-		       (u64)phys_addr, size, slot);
+		printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
+		       __func__, (u64)phys_addr, size, slot);
 		dump_stack();
 	}
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 73a6d7395bd..0342d27ca79 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -83,10 +83,8 @@ void __init initmem_init(void)
 		highstart_pfn = max_low_pfn;
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	       pages_to_mb(highend_pfn - highstart_pfn));
-	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3e724256dbe..d641897a1f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -324,14 +324,11 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 	res->start = start;
 	res->end = end;
 	info->res_offset[info->res_num] = addr.translation_offset;
+	info->res_num++;
 
-	if (!pci_use_crs) {
+	if (!pci_use_crs)
 		dev_printk(KERN_DEBUG, &info->bridge->dev,
 			   "host bridge window %pR (ignored)\n", res);
-		return AE_OK;
-	}
-
-	info->res_num++;
 
 	return AE_OK;
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d2fbcedcf6e..b410b71bdcf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -929,6 +929,13 @@ void __init efi_enter_virtual_mode(void)
 			va = efi_ioremap(md->phys_addr, size,
 					 md->type, md->attribute);
 
+		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+			if (!va)
+				pr_err("ioremap of 0x%llX failed!\n",
+				       (unsigned long long)md->phys_addr);
+			continue;
+		}
+
 		md->virt_addr = (u64) (unsigned long) va;
 
 		if (!va) {
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 0faad646f5f..d6bfb876cfb 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -372,7 +372,7 @@ subsys_initcall(sysenter_setup);
 /* Register vsyscall32 into the ABI table */
 #include <linux/sysctl.h>
 
-static ctl_table abi_table2[] = {
+static struct ctl_table abi_table2[] = {
 	{
 		.procname	= "vsyscall32",
 		.data		= &sysctl_vsyscall32,
@@ -383,7 +383,7 @@ static ctl_table abi_table2[] = {
 	{}
 };
 
-static ctl_table abi_root_table2[] = {
+static struct ctl_table abi_root_table2[] = {
 	{
 		.procname = "abi",
 		.mode = 0555,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a492be2635a..2fa02bc5003 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1557,7 +1557,7 @@ asmlinkage void __init xen_start_kernel(void)
 #ifdef CONFIG_X86_32
 	/* set up basic CPUID stuff */
 	cpu_detect(&new_cpu_data);
-	new_cpu_data.hard_math = 1;
+	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
 	new_cpu_data.wp_works_ok = 1;
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
 #endif
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d99cae8147d..c1367b29c3b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -40,11 +40,15 @@
 
 cpumask_var_t xen_cpu_initialized_map;
 
-static DEFINE_PER_CPU(int, xen_resched_irq);
-static DEFINE_PER_CPU(int, xen_callfunc_irq);
-static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
-static DEFINE_PER_CPU(int, xen_irq_work);
-static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
+struct xen_common_irq {
+	int irq;
+	char *name;
+};
+static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
+static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -99,10 +103,47 @@ static void __cpuinit cpu_bringup_and_idle(void)
 	cpu_startup_entry(CPUHP_ONLINE);
 }
 
+static void xen_smp_intr_free(unsigned int cpu)
+{
+	if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
+		per_cpu(xen_resched_irq, cpu).irq = -1;
+		kfree(per_cpu(xen_resched_irq, cpu).name);
+		per_cpu(xen_resched_irq, cpu).name = NULL;
+	}
+	if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
+		per_cpu(xen_callfunc_irq, cpu).irq = -1;
+		kfree(per_cpu(xen_callfunc_irq, cpu).name);
+		per_cpu(xen_callfunc_irq, cpu).name = NULL;
+	}
+	if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
+		per_cpu(xen_debug_irq, cpu).irq = -1;
+		kfree(per_cpu(xen_debug_irq, cpu).name);
+		per_cpu(xen_debug_irq, cpu).name = NULL;
+	}
+	if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
+				       NULL);
+		per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
+		kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
+		per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
+	}
+	if (xen_hvm_domain())
+		return;
+
+	if (per_cpu(xen_irq_work, cpu).irq >= 0) {
+		unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
+		per_cpu(xen_irq_work, cpu).irq = -1;
+		kfree(per_cpu(xen_irq_work, cpu).name);
+		per_cpu(xen_irq_work, cpu).name = NULL;
+	}
+};
 static int xen_smp_intr_init(unsigned int cpu)
 {
 	int rc;
-	const char *resched_name, *callfunc_name, *debug_name;
+	char *resched_name, *callfunc_name, *debug_name;
 
 	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -113,7 +154,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 				    NULL);
 	if (rc < 0)
 		goto fail;
-	per_cpu(xen_resched_irq, cpu) = rc;
+	per_cpu(xen_resched_irq, cpu).irq = rc;
+	per_cpu(xen_resched_irq, cpu).name = resched_name;
 
 	callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
@@ -124,7 +166,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 				    NULL);
 	if (rc < 0)
 		goto fail;
-	per_cpu(xen_callfunc_irq, cpu) = rc;
+	per_cpu(xen_callfunc_irq, cpu).irq = rc;
+	per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
 
 	debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
 	rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
@@ -132,7 +175,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 				     debug_name, NULL);
 	if (rc < 0)
 		goto fail;
-	per_cpu(xen_debug_irq, cpu) = rc;
+	per_cpu(xen_debug_irq, cpu).irq = rc;
+	per_cpu(xen_debug_irq, cpu).name = debug_name;
 
 	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
@@ -143,7 +187,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 				    NULL);
 	if (rc < 0)
 		goto fail;
-	per_cpu(xen_callfuncsingle_irq, cpu) = rc;
+	per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
+	per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
 
 	/*
 	 * The IRQ worker on PVHVM goes through the native path and uses the
@@ -161,26 +206,13 @@ static int xen_smp_intr_init(unsigned int cpu)
 				    NULL);
 	if (rc < 0)
 		goto fail;
-	per_cpu(xen_irq_work, cpu) = rc;
+	per_cpu(xen_irq_work, cpu).irq = rc;
+	per_cpu(xen_irq_work, cpu).name = callfunc_name;
 
 	return 0;
 
  fail:
-	if (per_cpu(xen_resched_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
-	if (per_cpu(xen_callfunc_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
-	if (per_cpu(xen_debug_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
-	if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
-				       NULL);
-	if (xen_hvm_domain())
-		return rc;
-
-	if (per_cpu(xen_irq_work, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
-
+	xen_smp_intr_free(cpu);
 	return rc;
 }
 
@@ -433,12 +465,7 @@ static void xen_cpu_die(unsigned int cpu)
 		current->state = TASK_UNINTERRUPTIBLE;
 		schedule_timeout(HZ/10);
 	}
-	unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
-	unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
-	unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
-	unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
-	if (!xen_hvm_domain())
-		unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
+	xen_smp_intr_free(cpu);
 	xen_uninit_lock_cpu(cpu);
 	xen_teardown_timer(cpu);
 }
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 3002ec1bb71..a40f8508e76 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -7,6 +7,7 @@
 #include <linux/debugfs.h>
 #include <linux/log2.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/paravirt.h>
 
@@ -165,6 +166,7 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
 	return old == 0;
 }
 
+static DEFINE_PER_CPU(char *, irq_name);
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 
@@ -362,7 +364,7 @@ static irqreturn_t dummy_handler(int irq, void *dev_id)
 void __cpuinit xen_init_lock_cpu(int cpu)
 {
 	int irq;
-	const char *name;
+	char *name;
 
 	WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
 	     cpu, per_cpu(lock_kicker_irq, cpu));
@@ -385,6 +387,7 @@ void __cpuinit xen_init_lock_cpu(int cpu)
 	if (irq >= 0) {
 		disable_irq(irq); /* make sure it's never delivered */
 		per_cpu(lock_kicker_irq, cpu) = irq;
+		per_cpu(irq_name, cpu) = name;
 	}
 
 	printk("cpu %d spinlock event irq %d\n", cpu, irq);
@@ -401,6 +404,8 @@ void xen_uninit_lock_cpu(int cpu)
 
 	unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
 	per_cpu(lock_kicker_irq, cpu) = -1;
+	kfree(per_cpu(irq_name, cpu));
+	per_cpu(irq_name, cpu) = NULL;
 }
 
 void __init xen_init_spinlocks(void)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 3d88bfdf9e1..a690868be83 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/math64.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
@@ -36,9 +37,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
 /* snapshots of runstate info */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
 
-/* unused ns of stolen and blocked time */
+/* unused ns of stolen time */
 static DEFINE_PER_CPU(u64, xen_residual_stolen);
-static DEFINE_PER_CPU(u64, xen_residual_blocked);
 
 /* return an consistent snapshot of 64-bit time/counter value */
 static u64 get64(const u64 *p)
@@ -115,7 +115,7 @@ static void do_stolen_accounting(void)
 {
 	struct vcpu_runstate_info state;
 	struct vcpu_runstate_info *snap;
-	s64 blocked, runnable, offline, stolen;
+	s64 runnable, offline, stolen;
 	cputime_t ticks;
 
 	get_runstate_snapshot(&state);
@@ -125,7 +125,6 @@ static void do_stolen_accounting(void)
 	snap = &__get_cpu_var(xen_runstate_snapshot);
 
 	/* work out how much time the VCPU has not been runn*ing*  */
-	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
 	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
 	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
 
@@ -141,17 +140,6 @@ static void do_stolen_accounting(void)
 	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
 	__this_cpu_write(xen_residual_stolen, stolen);
 	account_steal_ticks(ticks);
-
-	/* Add the appropriate number of ticks of blocked time,
-	   including any left-overs from last time. */
-	blocked += __this_cpu_read(xen_residual_blocked);
-
-	if (blocked < 0)
-		blocked = 0;
-
-	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
-	__this_cpu_write(xen_residual_blocked, blocked);
-	account_idle_ticks(ticks);
 }
 
 /* Get the TSC speed from Xen */
@@ -377,11 +365,16 @@ static const struct clock_event_device xen_vcpuop_clockevent = {
 
 static const struct clock_event_device *xen_clockevent =
 	&xen_timerop_clockevent;
-static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 };
+
+struct xen_clock_event_device {
+	struct clock_event_device evt;
+	char *name;
+};
+static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
 
 static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 {
-	struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
+	struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt;
 	irqreturn_t ret;
 
 	ret = IRQ_NONE;
@@ -395,14 +388,30 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 	return ret;
 }
 
+void xen_teardown_timer(int cpu)
+{
+	struct clock_event_device *evt;
+	BUG_ON(cpu == 0);
+	evt = &per_cpu(xen_clock_events, cpu).evt;
+
+	if (evt->irq >= 0) {
+		unbind_from_irqhandler(evt->irq, NULL);
+		evt->irq = -1;
+		kfree(per_cpu(xen_clock_events, cpu).name);
+		per_cpu(xen_clock_events, cpu).name = NULL;
+	}
+}
+
 void xen_setup_timer(int cpu)
 {
-	const char *name;
+	char *name;
 	struct clock_event_device *evt;
 	int irq;
 
-	evt = &per_cpu(xen_clock_events, cpu);
+	evt = &per_cpu(xen_clock_events, cpu).evt;
 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
+	if (evt->irq >= 0)
+		xen_teardown_timer(cpu);
 
 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
 
@@ -420,22 +429,15 @@ void xen_setup_timer(int cpu)
 
 	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
+	per_cpu(xen_clock_events, cpu).name = name;
 }
 
-void xen_teardown_timer(int cpu)
-{
-	struct clock_event_device *evt;
-	BUG_ON(cpu == 0);
-	evt = &per_cpu(xen_clock_events, cpu);
-	unbind_from_irqhandler(evt->irq, NULL);
-	evt->irq = -1;
-}
 
 void xen_setup_cpu_clockevents(void)
 {
 	BUG_ON(preemptible());
 
-	clockevents_register_device(&__get_cpu_var(xen_clock_events));
+	clockevents_register_device(&__get_cpu_var(xen_clock_events).evt);
 }
 
 void xen_timer_resume(void)
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index bba125b4bb0..479d7537a32 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -173,39 +173,16 @@ void __init zones_init(void)
 
 void __init mem_init(void)
 {
-	unsigned long codesize, reservedpages, datasize, initsize;
-	unsigned long highmemsize, tmp, ram;
-
-	max_mapnr = num_physpages = max_low_pfn - ARCH_PFN_OFFSET;
+	max_mapnr = max_low_pfn - ARCH_PFN_OFFSET;
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
-	highmemsize = 0;
 
 #ifdef CONFIG_HIGHMEM
 #error HIGHGMEM not implemented in init.c
 #endif
 
-	totalram_pages += free_all_bootmem();
-
-	reservedpages = ram = 0;
-	for (tmp = 0; tmp < max_mapnr; tmp++) {
-		ram++;
-		if (PageReserved(mem_map+tmp))
-			reservedpages++;
-	}
+	free_all_bootmem();
 
-	codesize =  (unsigned long) _etext - (unsigned long) _stext;
-	datasize =  (unsigned long) _edata - (unsigned long) _sdata;
-	initsize =  (unsigned long) __init_end - (unsigned long) __init_begin;
-
-	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, "
-	       "%ldk data, %ldk init %ldk highmem)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       ram << (PAGE_SHIFT-10),
-	       codesize >> 10,
-	       reservedpages << (PAGE_SHIFT-10),
-	       datasize >> 10,
-	       initsize >> 10,
-	       highmemsize >> 10);
+	mem_init_print_info(NULL);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -214,11 +191,11 @@ extern int initrd_is_mapped;
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 	if (initrd_is_mapped)
-		free_reserved_area(start, end, 0, "initrd");
+		free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_initmem_default(-1);
 }
author	Olof Johansson <olof@lixom.net>	2013-07-12 10:59:39 -0700
committer	Olof Johansson <olof@lixom.net>	2013-07-12 10:59:39 -0700
commit	f4b96f5e4ff8d86699c851c10245e102809b0331 (patch)
tree	f766102263bed71738431cabb4d4f6f086005cd8 /arch
parent	9d8812df35be58a5da0c44182c1e4ba2507cc6a7 (diff)
parent	c24a6ae18abde53b048372b066b93b71b1b91154 (diff)