136 files changed, 5474 insertions, 3685 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bb0c0d0f6db..96e0c2ebc38 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
 
@@ -168,6 +170,7 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+	select USE_GENERIC_SMP_HELPERS
 	default y
 
 config X86_32_SMP
@@ -181,12 +184,12 @@ config X86_64_SMP
 config X86_HT
 	bool
 	depends on SMP
-	depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64
+	depends on (X86_32 && !X86_VOYAGER) || X86_64
 	default y
 
 config X86_BIOS_REBOOT
 	bool
-	depends on !X86_VISWS && !X86_VOYAGER
+	depends on !X86_VOYAGER
 	default y
 
 config X86_TRAMPOLINE
@@ -232,13 +235,13 @@ config SMP
 
 config X86_FIND_SMP_CONFIG
 	def_bool y
-	depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS
+	depends on X86_MPPARSE || X86_VOYAGER
 
 if ACPI
 config X86_MPPARSE
 	def_bool y
 	bool "Enable MPS table"
-	depends on X86_LOCAL_APIC && !X86_VISWS
+	depends on X86_LOCAL_APIC
 	help
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
@@ -247,7 +250,7 @@ endif
 if !ACPI
 config X86_MPPARSE
 	def_bool y
-	depends on X86_LOCAL_APIC && !X86_VISWS
+	depends on X86_LOCAL_APIC
 endif
 
 choice
@@ -281,18 +284,6 @@ config X86_VOYAGER
 	  If you do not specifically know you have a Voyager based machine,
 	  say N here, otherwise the kernel you build will not be bootable.
 
-config X86_VISWS
-	bool "SGI 320/540 (Visual Workstation)"
-	depends on X86_32 && !PCI
-	help
-	  The SGI Visual Workstation series is an IA32-based workstation
-	  based on SGI systems chips with some legacy PC hardware attached.
-
-	  Say Y here to create a kernel to run on the SGI 320 or 540.
-
-	  A kernel compiled for the Visual Workstation will not run on PCs
-	  and vice versa. See <file:Documentation/sgi-visws.txt> for details.
-
 config X86_GENERICARCH
        bool "Generic architecture"
 	depends on X86_32
@@ -355,7 +346,7 @@ config X86_RDC321X
 config X86_VSMP
 	bool "Support for ScaleMP vSMP"
 	select PARAVIRT
-	depends on X86_64 && !PCI
+	depends on X86_64 && PCI
 	help
 	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
 	  supposed to run on these EM64T-based machines.  Only choose this option
@@ -363,6 +354,18 @@ config X86_VSMP
 
 endchoice
 
+config X86_VISWS
+	bool "SGI 320/540 (Visual Workstation)"
+	depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
+	help
+	  The SGI Visual Workstation series is an IA32-based workstation
+	  based on SGI systems chips with some legacy PC hardware attached.
+
+	  Say Y here to create a kernel to run on the SGI 320 or 540.
+
+	  A kernel compiled for the Visual Workstation will run on general
+	  PCs as well. See <file:Documentation/sgi-visws.txt> for details.
+
 config SCHED_NO_NO_OMIT_FRAME_POINTER
 	def_bool y
 	prompt "Single-depth WCHAN output"
@@ -391,7 +394,7 @@ config VMI
 	bool "VMI Guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !X86_VOYAGER
 	help
 	  VMI provides a paravirtualized interface to the VMware ESX server
 	  (it could be used by other hypervisors in theory too, but is not
@@ -402,7 +405,7 @@ config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
 	select PARAVIRT_CLOCK
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !X86_VOYAGER
 	help
 	  Turning on this option will allow you to run a paravirtualized clock
 	  when running over the KVM hypervisor. Instead of relying on a PIT
@@ -413,7 +416,7 @@ config KVM_CLOCK
 config KVM_GUEST
 	bool "KVM Guest support"
 	select PARAVIRT
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !X86_VOYAGER
 	help
 	 This option enables various optimizations for running under the KVM
 	 hypervisor.
@@ -422,7 +425,7 @@ source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT
 	bool "Enable paravirtualization code"
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !X86_VOYAGER
 	help
 	  This changes the kernel so it can modify itself when it is run
 	  under a hypervisor, potentially improving performance significantly
@@ -445,7 +448,6 @@ config PARAVIRT_DEBUG
 config MEMTEST
 	bool "Memtest"
 	depends on X86_64
-	default y
 	help
 	  This option adds a kernel parameter 'memtest', which allows memtest
 	  to be set.
@@ -453,7 +455,7 @@ config MEMTEST
 		memtest=1, mean do 1 test pattern;
 		...
 		memtest=4, mean do 4 test patterns.
-	  If you are unsure how to answer this question, answer Y.
+	  If you are unsure how to answer this question, answer N.
 
 config X86_SUMMIT_NUMA
 	def_bool y
@@ -575,7 +577,7 @@ config SWIOTLB
 	  3 GB of memory. If unsure, say Y.
 
 config IOMMU_HELPER
-	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
+	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
 	depends on X86_64 && SMP
@@ -628,7 +630,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
-	depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
+	depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
 	help
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  integrated interrupt controller in the CPU. If you have a single-CPU
@@ -653,11 +655,11 @@ config X86_UP_IOAPIC
 
 config X86_LOCAL_APIC
 	def_bool y
-	depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
+	depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
 
 config X86_IO_APIC
 	def_bool y
-	depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
+	depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
 
 config X86_VISWS_APIC
 	def_bool y
@@ -711,7 +713,7 @@ config X86_MCE_NONFATAL
 
 config X86_MCE_P4THERMAL
 	bool "check for P4 thermal throttling interrupt."
-	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
+	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
 	help
 	  Enabling this feature will cause a message to be printed when the P4
 	  enters thermal throttling.
@@ -1133,21 +1135,18 @@ config MTRR
 	  See <file:Documentation/mtrr.txt> for more information.
 
 config MTRR_SANITIZER
-	def_bool y
+	bool
 	prompt "MTRR cleanup support"
 	depends on MTRR
 	help
-	  Convert MTRR layout from continuous to discrete, so some X driver
-	  could add WB entries.
-
-	  Say N here if you see bootup problems (boot crash, boot hang,
-	  spontaneous reboots).
+	  Convert MTRR layout from continuous to discrete, so X drivers can
+	  add writeback entries.
 
-	  Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
-	  could be used to send largest mtrr entry size for continuous block
-	  to hold holes (aka. UC entries)
+	  Can be disabled with disable_mtrr_cleanup on the kernel command line.
+	  The largest mtrr entry size for a continous block can be set with
+	  mtrr_chunk_size.
 
-	  If unsure, say Y.
+	  If unsure, say N.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
 	int "MTRR cleanup enable value (0-1)"
@@ -1164,7 +1163,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
 	depends on MTRR_SANITIZER
 	help
 	  mtrr cleanup spare entries default, it can be changed via
-	  mtrr_spare_reg_nr=
+	  mtrr_spare_reg_nr=N on the kernel command line.
 
 config X86_PAT
 	bool
@@ -1414,7 +1413,7 @@ config X86_APM_BOOT
 
 menuconfig APM
 	tristate "APM (Advanced Power Management) BIOS support"
-	depends on X86_32 && PM_SLEEP && !X86_VISWS
+	depends on X86_32 && PM_SLEEP
 	---help---
 	  APM is a BIOS specification for saving power using several different
 	  techniques. This is mostly useful for battery powered laptops with
@@ -1561,7 +1560,7 @@ config PCI
 
 choice
 	prompt "PCI access mode"
-	depends on X86_32 && PCI && !X86_VISWS
+	depends on X86_32 && PCI
 	default PCI_GOANY
 	---help---
 	  On PCI systems, the BIOS can be used to detect the PCI devices and
@@ -1598,12 +1597,12 @@ endchoice
 
 config PCI_BIOS
 	def_bool y
-	depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+	depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY)
 
 # x86-64 doesn't support PCI BIOS access from long mode so always go direct.
 config PCI_DIRECT
 	def_bool y
-	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS)
+	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC))
 
 config PCI_MMCONFIG
 	def_bool y
@@ -1663,7 +1662,7 @@ if X86_32
 
 config ISA
 	bool "ISA support"
-	depends on !(X86_VOYAGER || X86_VISWS)
+	depends on !X86_VOYAGER
 	help
 	  Find out whether you have ISA slots on your motherboard.  ISA is the
 	  name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1690,7 +1689,7 @@ config EISA
 source "drivers/eisa/Kconfig"
 
 config MCA
-	bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+	bool "MCA support" if !X86_VOYAGER
 	default y if X86_VOYAGER
 	help
 	  MicroChannel Architecture is found in some IBM PS/2 machines and
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 3d22bb8175b..abff1b84ed5 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -344,7 +344,7 @@ config X86_F00F_BUG
 
 config X86_WP_WORKS_OK
 	def_bool y
-	depends on X86_32 && !M386
+	depends on !M386
 
 config X86_INVLPG
 	def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index acc0271920f..ae36bfa814e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,6 +171,33 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
+config MMIOTRACE_HOOKS
+	bool
+
+config MMIOTRACE
+	bool "Memory mapped IO tracing"
+	depends on DEBUG_KERNEL && PCI
+	select TRACING
+	select MMIOTRACE_HOOKS
+	help
+	  Mmiotrace traces Memory Mapped I/O access and is meant for
+	  debugging and reverse engineering. It is called from the ioremap
+	  implementation and works via page faults. Tracing is disabled by
+	  default and can be enabled at run-time.
+
+	  See Documentation/tracers/mmiotrace.txt.
+	  If you are not helping to develop drivers, say N.
+
+config MMIOTRACE_TEST
+	tristate "Test module for mmiotrace"
+	depends on MMIOTRACE && m
+	help
+	  This is a dumb module for testing mmiotrace. It is very dangerous
+	  as it will write garbage to IO memory starting at a given address.
+	  However, it should be safe to use on e.g. unused portion of VRAM.
+
+	  Say N, unless you absolutely know what you are doing.
+
 #
 # IO delay types:
 #
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b03d24b44bf..919ce21ea65 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -113,10 +113,6 @@ mcore-y  := arch/x86/mach-default/
 mflags-$(CONFIG_X86_VOYAGER)	:= -Iinclude/asm-x86/mach-voyager
 mcore-$(CONFIG_X86_VOYAGER)	:= arch/x86/mach-voyager/
 
-# VISWS subarch support
-mflags-$(CONFIG_X86_VISWS)	:= -Iinclude/asm-x86/mach-visws
-mcore-$(CONFIG_X86_VISWS)	:= arch/x86/mach-visws/
-
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
 fcore-$(CONFIG_X86_GENERICARCH)	+= arch/x86/mach-generic/
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 24e4d4928d6..20371d0635e 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -116,7 +116,7 @@ ENTRY(ia32_sysenter_target)
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
+	movl	8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
 	CFI_REGISTER rip,r10
 	pushq	$__USER32_CS
 	CFI_ADJUST_CFA_OFFSET 8
@@ -136,8 +136,9 @@ ENTRY(ia32_sysenter_target)
  	.quad 1b,ia32_badarg
  	.previous	
 	GET_THREAD_INFO(%r10)
-	orl    $TS_COMPAT,threadinfo_status(%r10)
-	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+	orl    $TS_COMPAT,TI_status(%r10)
+	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+		 TI_flags(%r10)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 sysenter_do_call:	
@@ -149,9 +150,9 @@ sysenter_do_call:
 	GET_THREAD_INFO(%r10)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+	testl	$_TIF_ALLWORK_MASK,TI_flags(%r10)
 	jnz	int_ret_from_sys_call
-	andl    $~TS_COMPAT,threadinfo_status(%r10)
+	andl    $~TS_COMPAT,TI_status(%r10)
 	/* clear IF, that popfq doesn't enable interrupts early */
 	andl  $~0x200,EFLAGS-R11(%rsp) 
 	movl	RIP-R11(%rsp),%edx		/* User %eip */
@@ -240,8 +241,9 @@ ENTRY(ia32_cstar_target)
 	.quad 1b,ia32_badarg
 	.previous	
 	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,threadinfo_status(%r10)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+	orl   $TS_COMPAT,TI_status(%r10)
+	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+		TI_flags(%r10)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 cstar_do_call:	
@@ -253,9 +255,9 @@ cstar_do_call:
 	GET_THREAD_INFO(%r10)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
+	testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
 	jnz  int_ret_from_sys_call
-	andl $~TS_COMPAT,threadinfo_status(%r10)
+	andl $~TS_COMPAT,TI_status(%r10)
 	RESTORE_ARGS 1,-ARG_SKIP,1,1,1
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
@@ -333,8 +335,9 @@ ENTRY(ia32_syscall)
 	   this could be a problem. */
 	SAVE_ARGS 0,0,1
 	GET_THREAD_INFO(%r10)
-	orl   $TS_COMPAT,threadinfo_status(%r10)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+	orl   $TS_COMPAT,TI_status(%r10)
+	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+		TI_flags(%r10)
 	jnz ia32_tracesys
 ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 54829e2b516..da140611bb5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,12 @@ extra-y                := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
+ifdef CONFIG_FTRACE
+# Do not profile debug utilities
+CFLAGS_REMOVE_tsc.o = -pg
+CFLAGS_REMOVE_rtc.o = -pg
+endif
+
 #
 # vsyscalls (which work on the user stack) should have
 # no stack-protector checks:
@@ -13,12 +19,13 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_vsyscall_64.o	:= $(PROFILING) -g0 $(nostackp)
 CFLAGS_hpet.o		:= $(nostackp)
-CFLAGS_tsc_64.o		:= $(nostackp)
+CFLAGS_tsc.o		:= $(nostackp)
 
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
 obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
+obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
@@ -26,7 +33,7 @@ obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
 obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o
-obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
+obj-y			+= tsc.o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5c0107602b6..f489d7a9be9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
+#include <asm/genapic.h>
 #include <asm/io.h>
 #include <asm/mpspec.h>
 #include <asm/smp.h>
@@ -83,8 +84,6 @@ int acpi_lapic;
 int acpi_ioapic;
 int acpi_strict;
 
-static int disable_irq0_through_ioapic __initdata;
-
 u8 acpi_sci_flags __initdata;
 int acpi_sci_override_gsi __initdata;
 int acpi_skip_timer_override __initdata;
@@ -108,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
  */
 enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
 
-#ifdef	CONFIG_X86_64
-
-/* rely on all ACPI tables being in the direct mapping */
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
-	if (!phys_addr || !size)
-		return NULL;
-
-	if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
-		return __va(phys_addr);
-
-	return NULL;
-}
-
-#else
 
 /*
  * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -141,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 	unsigned long base, offset, mapped_size;
 	int idx;
 
-	if (phys + size < 8 * 1024 * 1024)
+	if (!phys || !size)
+		return NULL;
+
+	if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
 		return __va(phys);
 
 	offset = phys & (PAGE_SIZE - 1);
 	mapped_size = PAGE_SIZE - offset;
+	clear_fixmap(FIX_ACPI_END);
 	set_fixmap(FIX_ACPI_END, phys);
 	base = fix_to_virt(FIX_ACPI_END);
 
@@ -157,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 		if (--idx < FIX_ACPI_BEGIN)
 			return NULL;	/* cannot handle this */
 		phys += PAGE_SIZE;
+		clear_fixmap(idx);
 		set_fixmap(idx, phys);
 		mapped_size += PAGE_SIZE;
 	}
 
 	return ((unsigned char *)base + offset);
 }
-#endif
 
 #ifdef CONFIG_PCI_MMCONFIG
 /* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
@@ -992,10 +980,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	int pin;
 	struct mp_config_intsrc mp_irq;
 
-	/* Skip the 8254 timer interrupt (IRQ 0) if requested.  */
-	if (bus_irq == 0 && disable_irq0_through_ioapic)
-		return;
-
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
@@ -1062,10 +1046,6 @@ void __init mp_config_acpi_legacy_irqs(void)
 	for (i = 0; i < 16; i++) {
 		int idx;
 
-		/* Skip the 8254 timer interrupt (IRQ 0) if requested.  */
-		if (i == 0 && disable_irq0_through_ioapic)
-			continue;
-
 		for (idx = 0; idx < mp_irq_entries; idx++) {
 			struct mp_config_intsrc *irq = mp_irqs + idx;
 
@@ -1373,8 +1353,6 @@ static void __init acpi_process_madt(void)
 	return;
 }
 
-#ifdef __i386__
-
 static int __init disable_acpi_irq(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
@@ -1425,13 +1403,12 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
 }
 
 /*
- * Don't register any I/O APIC entries for the 8254 timer IRQ.
+ * Force ignoring BIOS IRQ0 pin2 override
  */
-static int __init
-dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
+static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
 {
-	pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident);
-	disable_irq0_through_ioapic = 1;
+	pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
+	acpi_skip_timer_override = 1;
 	return 0;
 }
 
@@ -1609,11 +1586,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
 	 * is enabled.  This input is incorrectly designated the
 	 * ISA IRQ 0 via an interrupt source override even though
 	 * it is wired to the output of the master 8259A and INTIN0
-	 * is not connected at all.  Abandon any attempts to route
-	 * IRQ 0 through the I/O APIC therefore.
+	 * is not connected at all.  Force ignoring BIOS IRQ0 pin2
+	 * override in that cases.
 	 */
 	{
-	 .callback = dmi_disable_irq0_through_ioapic,
+	 .callback = dmi_ignore_irq0_timer_override,
 	 .ident = "HP NX6125 laptop",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1621,7 +1598,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
 		     },
 	 },
 	{
-	 .callback = dmi_disable_irq0_through_ioapic,
+	 .callback = dmi_ignore_irq0_timer_override,
 	 .ident = "HP NX6325 laptop",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1631,8 +1608,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
 	{}
 };
 
-#endif				/* __i386__ */
-
 /*
  * acpi_boot_table_init() and acpi_boot_init()
  *  called from setup_arch(), always.
@@ -1660,9 +1635,7 @@ int __init acpi_boot_table_init(void)
 {
 	int error;
 
-#ifdef __i386__
 	dmi_check_system(acpi_dmi_table);
-#endif
 
 	/*
 	 * If acpi_disabled, bail out
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index de2d2e4ebad..7c074eec39f 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_ACPI))
 		buf[2] |= ACPI_PDC_T_FFH;
 
+	/*
+	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+	 */
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+
 	obj->type = ACPI_TYPE_BUFFER;
 	obj->buffer.length = 12;
 	obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b564cca..868de3d5c39 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
 static char temp_stack[10240];
 #endif
 
+/* XXX: this macro should move to asm-x86/segment.h and be shared with the
+   boot code... */
+#define GDT_ENTRY(flags, base, limit)		\
+	(((u64)(base & 0xff000000) << 32) |	\
+	 ((u64)flags << 40) |			\
+	 ((u64)(limit & 0x00ff0000) << 32) |	\
+	 ((u64)(base & 0x00ffffff) << 16) |	\
+	 ((u64)(limit & 0x0000ffff)))
+
 /**
  * acpi_save_state_mem - save kernel state
  *
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
 	header->video_mode = saved_video_mode;
 
 	header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
+
+	/*
+	 * Set up the wakeup GDT.  We set these up as Big Real Mode,
+	 * that is, with limits set to 4 GB.  At least the Lenovo
+	 * Thinkpad X61 is known to need this for the video BIOS
+	 * initialization quirk to work; this is likely to also
+	 * be the case for other laptops or integrated video devices.
+	 */
+
 	/* GDT[0]: GDT self-pointer */
 	header->wakeup_gdt[0] =
 		(u64)(sizeof(header->wakeup_gdt) - 1) +
 		((u64)(acpi_wakeup_address +
 			((char *)&header->wakeup_gdt - (char *)acpi_realmode))
 				<< 16);
-	/* GDT[1]: real-mode-like code segment */
-	header->wakeup_gdt[1] = (0x009bULL << 40) +
-		((u64)acpi_wakeup_address << 16) + 0xffff;
-	/* GDT[2]: real-mode-like data segment */
-	header->wakeup_gdt[2] = (0x0093ULL << 40) +
-		((u64)acpi_wakeup_address << 16) + 0xffff;
+	/* GDT[1]: big real mode-like code segment */
+	header->wakeup_gdt[1] =
+		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
+	/* GDT[2]: big real mode-like data segment */
+	header->wakeup_gdt[2] =
+		GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
 
 #ifndef CONFIG_64BIT
 	store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
 			acpi_realmode_flags |= 2;
 		if (strncmp(str, "s3_beep", 7) == 0)
 			acpi_realmode_flags |= 4;
+		if (strncmp(str, "old_ordering", 12) == 0)
+			acpi_old_suspend_ordering();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90d..2763cb37b55 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/kprobes.h>
 #include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 #ifdef CONFIG_X86_64
 
 extern char __vsyscall_0;
-static inline const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
 {
 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
 	{ -1, NULL }
 };
 
-static const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
 {
 	const unsigned char *const *noptable = intel_nops;
 	int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
 	struct list_head next;
 };
 static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
+static DEFINE_MUTEX(smp_alt);
 static int smp_mode = 1;	/* protected by smp_alt */
 
 void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 		__func__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	spin_lock(&smp_alt);
+	mutex_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
 	if (boot_cpu_has(X86_FEATURE_UP))
 		alternatives_smp_unlock(smp->locks, smp->locks_end,
 					smp->text, smp->text_end);
-	spin_unlock(&smp_alt);
+	mutex_unlock(&smp_alt);
 }
 
 void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
 	if (smp_alt_once || noreplace_smp)
 		return;
 
-	spin_lock(&smp_alt);
+	mutex_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		spin_unlock(&smp_alt);
+		mutex_unlock(&smp_alt);
 		DPRINTK("%s: %s\n", __func__, item->name);
 		kfree(item);
 		return;
 	}
-	spin_unlock(&smp_alt);
+	mutex_unlock(&smp_alt);
 }
 
 void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
-	spin_lock(&smp_alt);
+	mutex_lock(&smp_alt);
 
 	/*
 	 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
 						mod->text, mod->text_end);
 	}
 	smp_mode = smp;
-	spin_unlock(&smp_alt);
+	mutex_unlock(&smp_alt);
 }
 
 #endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 6dea8306d8c..a437d027f20 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -82,6 +82,11 @@ int pic_mode;
 /* Have we found an MP table */
 int smp_found_config;
 
+static struct resource lapic_resource = {
+	.name = "Local APIC",
+	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
 static unsigned int calibration_result;
 
 static int lapic_next_event(unsigned long delta,
@@ -969,7 +974,7 @@ void __cpuinit setup_local_APIC(void)
 	 * Double-check whether this APIC is really registered.
 	 */
 	if (!apic_id_registered())
-		BUG();
+		WARN_ON_ONCE(1);
 
 	/*
 	 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1335,6 +1340,10 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
 }
 #endif
 
@@ -1720,3 +1729,21 @@ static int __init apic_set_verbosity(char *str)
 }
 __setup("apic=", apic_set_verbosity);
 
+static int __init lapic_insert_resource(void)
+{
+	if (!apic_phys)
+		return -1;
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 00e6d137095..bf9b441331e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,6 +204,7 @@
 #include <linux/module.h>
 
 #include <linux/poll.h>
+#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/timer.h>
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable)
 	if (err != APM_SUCCESS)
 		apm_error("suspend", err);
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
-	device_power_up();
+	device_power_up(PMSG_RESUME);
 	local_irq_enable();
-	device_resume();
+	device_resume(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
@@ -1239,7 +1240,7 @@ static void standby(void)
 		apm_error("standby", err);
 
 	local_irq_disable();
-	device_power_up();
+	device_power_up(PMSG_RESUME);
 	local_irq_enable();
 }
 
@@ -1325,7 +1326,7 @@ static void check_events(void)
 			ignore_bounce = 1;
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
-				device_resume();
+				device_resume(PMSG_RESUME);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp)
 {
 	struct apm_user *as;
 
+	lock_kernel();
 	as = kmalloc(sizeof(*as), GFP_KERNEL);
 	if (as == NULL) {
 		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
 		       sizeof(*as));
+		       unlock_kernel();
 		return -ENOMEM;
 	}
 	as->magic = APM_BIOS_MAGIC;
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp)
 	user_list = as;
 	spin_unlock(&user_list_lock);
 	filp->private_data = as;
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 3295e7c08fe..bacf5deeec2 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -34,7 +34,7 @@ int main(void)
 	ENTRY(pid);
 	BLANK();
 #undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
+#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
 	ENTRY(flags);
 	ENTRY(addr_limit);
 	ENTRY(preempt_count);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 958526d6a74..7c36fb8a28d 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,15 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * Don't do it for gbpages because there seems very little
 		 * benefit in doing so.
 		 */
-		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-		    (tseg >> PMD_SHIFT) <
-			(max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+		    if ((tseg>>PMD_SHIFT) <
+				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+			((tseg>>PMD_SHIFT) <
+				(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+			 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
 			set_memory_4k((unsigned long)__va(tseg), 1);
+		}
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 13526fd5cce..1d181c40e2e 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -10,20 +10,12 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
 {
 	if (c->x86 == 0x6 && c->x86_model >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 75185023529..7b8cc72feb4 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 
 void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
-	unsigned int n, dummy, eax, ebx, ecx, edx;
+	unsigned int n, dummy, ebx, ecx, edx;
 
 	n = c->extended_cpuid_level;
 
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 		c->x86_cache_size, ecx & 0xFF);
 	}
-	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+	if (c->extended_cpuid_level >= 0x80000008) {
+		u32 eax = cpuid_eax(0x80000008);
+
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+
+	/* Assume all 64-bit CPUs support 32-bit syscall */
+	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+
 	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
 	    cpu_devs[c->x86_vendor]->c_early_init)
 		cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d3..70609efdf1d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	if (cpu_has_bts)
 		ds_init_intel(c);
+
+#ifdef CONFIG_X86_NUMAQ
+	numaq_tsc_disable();
+#endif
 }
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index fcb1cc9d75c..1019c58d39f 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 }
 
 /*
@@ -52,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
 
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
 	init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -76,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	if (cpu_has_bts)
 		ds_init_intel(c);
 
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if (c->x86 == 6)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 501ca1cea27..c4a7ec31394 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
 
 static void mcheck_timer(struct work_struct *work)
 {
-	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+	on_each_cpu(mcheck_check_cpu, NULL, 1);
 
 	/*
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
@@ -532,10 +533,12 @@ static int open_exclu;	/* already open exclusive? */
 
 static int mce_open(struct inode *inode, struct file *file)
 {
+	lock_kernel();
 	spin_lock(&mce_state_lock);
 
 	if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
 		spin_unlock(&mce_state_lock);
+		unlock_kernel();
 		return -EBUSY;
 	}
 
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
 	open_count++;
 
 	spin_unlock(&mce_state_lock);
+	unlock_kernel();
 
 	return nonseekable_open(inode, file);
 }
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 	 * Collect entries that were still getting written before the
 	 * synchronize.
 	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+	on_each_cpu(collect_tscs, cpu_tsc, 1);
 	for (i = next; i < MCE_LOG_LEN; i++) {
 		if (mcelog.entry[i].finished &&
 		    mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -742,7 +746,7 @@ static void mce_restart(void)
 	if (next_interval)
 		cancel_delayed_work(&mcheck_work);
 	/* Timer race is harmless here */
-	on_each_cpu(mce_init, NULL, 1, 1);
+	on_each_cpu(mce_init, NULL, 1);
 	next_interval = check_interval * HZ;
 	if (next_interval)
 		schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec..cc1fccdd31e 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
 static void mce_work_fn(struct work_struct *work)
 {
-	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	on_each_cpu(mce_checkregs, NULL, 1);
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 105afe12beb..6f23969c8fa 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
 	atomic_set(&data.gate,0);
 
 	/*  Start the ball rolling on other CPUs  */
-	if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+	if (smp_call_function(ipi_handler, &data, 0) != 0)
 		panic("mtrr: timed out waiting for other CPUs\n");
 
 	local_irq_save(flags);
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void)
  */
 void mtrr_save_state(void)
 {
-	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
 }
 
 static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e9bef6e3aa..6d4bdc02388 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void)
 	if (atomic_read(&nmi_active) <= 0)
 		return;
 
-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 
 	if (wd_ops)
 		wd_ops->unreserve();
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
 		return;
 	}
 
-	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
 	touch_nmi_watchdog();
 }
 
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a6224..2de5fa2bbf7 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,6 +33,7 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 	for (; count; count -= 16) {
 		cmd.eax = pos;
 		cmd.ecx = pos >> 32;
-		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
 		if (copy_to_user(tmp, &cmd, 16))
 			return -EFAULT;
 		tmp += 16;
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 
 static int cpuid_open(struct inode *inode, struct file *file)
 {
-	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	if (cpu >= NR_CPUS || !cpu_online(cpu))
-		return -ENXIO;	/* No such CPU */
+	unsigned int cpu;
+	struct cpuinfo_x86 *c;
+	int ret = 0;
+	
+	lock_kernel();
+
+	cpu = iminor(file->f_path.dentry->d_inode);
+	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+		ret = -ENXIO;	/* No such CPU */
+		goto out;
+	}
+	c = &cpu_data(cpu);
 	if (c->cpuid_level < 0)
-		return -EIO;	/* CPUID not supported */
-
-	return 0;
+		ret = -EIO;	/* CPUID not supported */
+out:
+	unlock_kernel();
+	return ret;
 }
 
 /*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d0335853ff5..28c29180b38 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
+#include <linux/firmware-map.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -27,7 +28,22 @@
 #include <asm/setup.h>
 #include <asm/trampoline.h>
 
+/*
+ * The e820 map is the map that gets modified e.g. with command line parameters
+ * and that is also registered with modifications in the kernel resource tree
+ * with the iomem_resource as parent.
+ *
+ * The e820_saved is directly saved after the BIOS-provided memory map is
+ * copied. It doesn't get modified afterwards. It's registered for the
+ * /sys/firmware/memmap interface.
+ *
+ * That memory map is not modified and is used as base for kexec. The kexec'd
+ * kernel should get the same memory map as the firmware provides. Then the
+ * user can e.g. boot the original kernel with mem=1G while still booting the
+ * next kernel with full memory.
+ */
 struct e820map e820;
+struct e820map e820_saved;
 
 /* For PCI or other memory-mapped resources */
 unsigned long pci_mem_start = 0xaeedbabe;
@@ -398,8 +414,9 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
 	return __append_e820_map(biosmap, nr_map);
 }
 
-u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
-				unsigned new_type)
+static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
+					u64 size, unsigned old_type,
+					unsigned new_type)
 {
 	int i;
 	u64 real_updated_size = 0;
@@ -410,7 +427,7 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 		size = ULLONG_MAX - start;
 
 	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+		struct e820entry *ei = &e820x->map[i];
 		u64 final_start, final_end;
 		if (ei->type != old_type)
 			continue;
@@ -438,6 +455,19 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 	return real_updated_size;
 }
 
+u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
+			     unsigned new_type)
+{
+	return e820_update_range_map(&e820, start, size, old_type, new_type);
+}
+
+static u64 __init e820_update_range_saved(u64 start, u64 size,
+					  unsigned old_type, unsigned new_type)
+{
+	return e820_update_range_map(&e820_saved, start, size, old_type,
+				     new_type);
+}
+
 /* make e820 not cover the range */
 u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
 			     int checktype)
@@ -487,6 +517,15 @@ void __init update_e820(void)
 	printk(KERN_INFO "modified physical RAM map:\n");
 	e820_print_map("modified");
 }
+static void __init update_e820_saved(void)
+{
+	int nr_map;
+
+	nr_map = e820_saved.nr_map;
+	if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
+		return;
+	e820_saved.nr_map = nr_map;
+}
 #define MAX_GAP_END 0x100000000ull
 /*
  * Search for a gap in the e820 memory space from start_addr to end_addr.
@@ -991,8 +1030,10 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 
 	addr = round_down(start + size - sizet, align);
 	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
 	printk(KERN_INFO "update e820 for early_reserve_e820\n");
 	update_e820();
+	update_e820_saved();
 
 	return addr;
 }
@@ -1008,30 +1049,51 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 #endif
 
 /*
- * Last pfn which the user wants to use.
- */
-unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
-
-/*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end_of_ram(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
-	unsigned long last_pfn;
+	int i;
+	unsigned long last_pfn = 0;
 	unsigned long max_arch_pfn = MAX_ARCH_PFN;
 
-	last_pfn = find_max_pfn_with_active_regions();
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long start_pfn;
+		unsigned long end_pfn;
+
+		if (ei->type != type)
+			continue;
+
+		start_pfn = ei->addr >> PAGE_SHIFT;
+		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+		if (start_pfn >= limit_pfn)
+			continue;
+		if (end_pfn > limit_pfn) {
+			last_pfn = limit_pfn;
+			break;
+		}
+		if (end_pfn > last_pfn)
+			last_pfn = end_pfn;
+	}
 
 	if (last_pfn > max_arch_pfn)
 		last_pfn = max_arch_pfn;
-	if (last_pfn > end_user_pfn)
-		last_pfn = end_user_pfn;
 
 	printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
 			 last_pfn, max_arch_pfn);
 	return last_pfn;
 }
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
 
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1062,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei,
 	if (*ei_endpfn > last_pfn)
 		*ei_endpfn = last_pfn;
 
-	/* Obey end_user_pfn to save on memmap */
-	if (*ei_startpfn >= end_user_pfn)
-		return 0;
-	if (*ei_endpfn > end_user_pfn)
-		*ei_endpfn = end_user_pfn;
-
 	return 1;
 }
 
@@ -1113,6 +1169,8 @@ static void early_panic(char *msg)
 	panic(msg);
 }
 
+static int userdef __initdata;
+
 /* "mem=nopentium" disables the 4MB page tables. */
 static int __init parse_memopt(char *p)
 {
@@ -1128,22 +1186,22 @@ static int __init parse_memopt(char *p)
 	}
 #endif
 
+	userdef = 1;
 	mem_size = memparse(p, &p);
-	end_user_pfn = mem_size>>PAGE_SHIFT;
-	e820_update_range(mem_size, ULLONG_MAX - mem_size,
-		E820_RAM, E820_RESERVED);
+	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
 	return 0;
 }
 early_param("mem", parse_memopt);
 
-static int userdef __initdata;
-
 static int __init parse_memmap_opt(char *p)
 {
 	char *oldp;
 	u64 start_at, mem_size;
 
+	if (!p)
+		return -EINVAL;
+
 	if (!strcmp(p, "exactmap")) {
 #ifdef CONFIG_CRASH_DUMP
 		/*
@@ -1151,9 +1209,7 @@ static int __init parse_memmap_opt(char *p)
 		 * the real mem size before original memory map is
 		 * reset.
 		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
+		saved_max_pfn = e820_end_of_ram_pfn();
 #endif
 		e820.nr_map = 0;
 		userdef = 1;
@@ -1175,11 +1231,9 @@ static int __init parse_memmap_opt(char *p)
 	} else if (*p == '$') {
 		start_at = memparse(p+1, &p);
 		e820_add_region(start_at, mem_size, E820_RESERVED);
-	} else {
-		end_user_pfn = (mem_size >> PAGE_SHIFT);
-		e820_update_range(mem_size, ULLONG_MAX - mem_size,
-			E820_RAM, E820_RESERVED);
-	}
+	} else
+		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
+
 	return *p == '\0' ? 0 : -EINVAL;
 }
 early_param("memmap", parse_memmap_opt);
@@ -1198,6 +1252,17 @@ void __init finish_e820_parsing(void)
 	}
 }
 
+static inline const char *e820_type_to_string(int e820_type)
+{
+	switch (e820_type) {
+	case E820_RESERVED_KERN:
+	case E820_RAM:	return "System RAM";
+	case E820_ACPI:	return "ACPI Tables";
+	case E820_NVS:	return "ACPI Non-volatile Storage";
+	default:	return "reserved";
+	}
+}
+
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
@@ -1209,13 +1274,6 @@ void __init e820_reserve_resources(void)
 
 	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
-		switch (e820.map[i].type) {
-		case E820_RESERVED_KERN:
-		case E820_RAM:	res->name = "System RAM"; break;
-		case E820_ACPI:	res->name = "ACPI Tables"; break;
-		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
-		default:	res->name = "reserved";
-		}
 		end = e820.map[i].addr + e820.map[i].size - 1;
 #ifndef CONFIG_RESOURCES_64BIT
 		if (end > 0x100000000ULL) {
@@ -1223,6 +1281,7 @@ void __init e820_reserve_resources(void)
 			continue;
 		}
 #endif
+		res->name = e820_type_to_string(e820.map[i].type);
 		res->start = e820.map[i].addr;
 		res->end = end;
 
@@ -1230,8 +1289,20 @@ void __init e820_reserve_resources(void)
 		insert_resource(&iomem_resource, res);
 		res++;
 	}
+
+	for (i = 0; i < e820_saved.nr_map; i++) {
+		struct e820entry *entry = &e820_saved.map[i];
+		firmware_map_add_early(entry->addr,
+			entry->addr + entry->size - 1,
+			e820_type_to_string(entry->type));
+	}
 }
 
+/*
+ * Non-standard memory setup can be specified via this quirk:
+ */
+char * (*arch_memory_setup_quirk)(void);
+
 char *__init default_machine_specific_memory_setup(void)
 {
 	char *who = "BIOS-e820";
@@ -1272,6 +1343,12 @@ char *__init default_machine_specific_memory_setup(void)
 
 char *__init __attribute__((weak)) machine_specific_memory_setup(void)
 {
+	if (arch_memory_setup_quirk) {
+		char *who = arch_memory_setup_quirk();
+
+		if (who)
+			return who;
+	}
 	return default_machine_specific_memory_setup();
 }
 
@@ -1283,8 +1360,12 @@ char * __init __attribute__((weak)) memory_setup(void)
 
 void __init setup_memory_map(void)
 {
+	char *who;
+
+	who = memory_setup();
+	memcpy(&e820_saved, &e820, sizeof(struct e820map));
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-	e820_print_map(memory_setup());
+	e820_print_map(who);
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f37cfc..a0e11c0cc87 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
 	{}
 };
 
-static void __init check_dev_quirk(int num, int slot, int func)
+/**
+ * check_dev_quirk - apply early quirks to a given PCI device
+ * @num: bus number
+ * @slot: slot number
+ * @func: PCI function
+ *
+ * Check the vendor & device ID against the early quirks table.
+ *
+ * If the device is single function, let early_quirks() know so we don't
+ * poke at this device again.
+ */
+static int __init check_dev_quirk(int num, int slot, int func)
 {
 	u16 class;
 	u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
 	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
 
 	if (class == 0xffff)
-		return;
+		return -1; /* no class, treat as single function */
 
 	vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
 
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
 	type = read_pci_config_byte(num, slot, func,
 				    PCI_HEADER_TYPE);
 	if (!(type & 0x80))
-		return;
+		return -1;
+
+	return 0;
 }
 
 void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
 	/* Poor man's PCI discovery */
 	for (num = 0; num < 32; num++)
 		for (slot = 0; slot < 32; slot++)
-			for (func = 0; func < 8; func++)
-				check_dev_quirk(num, slot, func);
+			for (func = 0; func < 8; func++) {
+				/* Only probe function 0 on single fn devices */
+				if (check_dev_quirk(num, slot, func))
+					break;
+			}
 }
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 643fd861b72..ff9e7350da5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -196,7 +196,7 @@ static struct console simnow_console = {
 static struct console *early_console = &early_vga_console;
 static int early_console_initialized;
 
-void early_printk(const char *fmt, ...)
+asmlinkage void early_printk(const char *fmt, ...)
 {
 	char buf[512];
 	int n;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 94382faeadb..06cc8d4254b 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		if (PFN_UP(end) <= max_pfn_mapped)
+		if (PFN_UP(end) <= max_low_pfn_mapped)
 			va = __va(md->phys_addr);
 		else
 			va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 53393c306e1..6bc07f0f120 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include <asm/processor-flags.h>
+#include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
 
 /*
@@ -1024,6 +1025,7 @@ ENTRY(xen_sysenter_target)
 	RING0_INT_FRAME
 	addl $5*4, %esp		/* remove xen-provided frame */
 	jmp sysenter_past_esp
+	CFI_ENDPROC
 
 ENTRY(xen_hypervisor_callback)
 	CFI_STARTPROC
@@ -1110,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
 
 #endif	/* CONFIG_XEN */
 
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	movl 0xc(%esp), %eax
+	subl $MCOUNT_INSN_SIZE, %eax
+
+.globl mcount_call
+mcount_call:
+	call ftrace_stub
+
+	popl %edx
+	popl %ecx
+	popl %eax
+
+	ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	movl 0xc(%esp), %eax
+	movl 0x4(%ebp), %edx
+	subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+	call ftrace_stub
+
+	popl %edx
+	popl %ecx
+	popl %eax
+
+.globl ftrace_stub
+ftrace_stub:
+	ret
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+	cmpl $ftrace_stub, ftrace_trace_function
+	jnz trace
+.globl ftrace_stub
+ftrace_stub:
+	ret
+
+	/* taken from glibc */
+trace:
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	movl 0xc(%esp), %eax
+	movl 0x4(%ebp), %edx
+	subl $MCOUNT_INSN_SIZE, %eax
+
+	call *ftrace_trace_function
+
+	popl %edx
+	popl %ecx
+	popl %eax
+
+	jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
 .section .rodata,"a"
 #include "syscall_table_32.S"
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07d69f26233..ae63e584c34 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
 #include <asm/page.h>
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
+#include <asm/ftrace.h>
 
 	.code64
 
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(mcount)
+
+	subq $0x38, %rsp
+	movq %rax, (%rsp)
+	movq %rcx, 8(%rsp)
+	movq %rdx, 16(%rsp)
+	movq %rsi, 24(%rsp)
+	movq %rdi, 32(%rsp)
+	movq %r8, 40(%rsp)
+	movq %r9, 48(%rsp)
+
+	movq 0x38(%rsp), %rdi
+	subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl mcount_call
+mcount_call:
+	call ftrace_stub
+
+	movq 48(%rsp), %r9
+	movq 40(%rsp), %r8
+	movq 32(%rsp), %rdi
+	movq 24(%rsp), %rsi
+	movq 16(%rsp), %rdx
+	movq 8(%rsp), %rcx
+	movq (%rsp), %rax
+	addq $0x38, %rsp
+
+	retq
+END(mcount)
+
+ENTRY(ftrace_caller)
+
+	/* taken from glibc */
+	subq $0x38, %rsp
+	movq %rax, (%rsp)
+	movq %rcx, 8(%rsp)
+	movq %rdx, 16(%rsp)
+	movq %rsi, 24(%rsp)
+	movq %rdi, 32(%rsp)
+	movq %r8, 40(%rsp)
+	movq %r9, 48(%rsp)
+
+	movq 0x38(%rsp), %rdi
+	movq 8(%rbp), %rsi
+	subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl ftrace_call
+ftrace_call:
+	call ftrace_stub
+
+	movq 48(%rsp), %r9
+	movq 40(%rsp), %r8
+	movq 32(%rsp), %rdi
+	movq 24(%rsp), %rsi
+	movq 16(%rsp), %rdx
+	movq 8(%rsp), %rcx
+	movq (%rsp), %rax
+	addq $0x38, %rsp
+
+.globl ftrace_stub
+ftrace_stub:
+	retq
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+ENTRY(mcount)
+	cmpq $ftrace_stub, ftrace_trace_function
+	jnz trace
+.globl ftrace_stub
+ftrace_stub:
+	retq
+
+trace:
+	/* taken from glibc */
+	subq $0x38, %rsp
+	movq %rax, (%rsp)
+	movq %rcx, 8(%rsp)
+	movq %rdx, 16(%rsp)
+	movq %rsi, 24(%rsp)
+	movq %rdi, 32(%rsp)
+	movq %r8, 40(%rsp)
+	movq %r9, 48(%rsp)
+
+	movq 0x38(%rsp), %rdi
+	movq 8(%rbp), %rsi
+	subq $MCOUNT_INSN_SIZE, %rdi
+
+	call   *ftrace_trace_function
+
+	movq 48(%rsp), %r9
+	movq 40(%rsp), %r8
+	movq 32(%rsp), %rdi
+	movq 24(%rsp), %rsi
+	movq 16(%rsp), %rdx
+	movq 8(%rsp), %rcx
+	movq (%rsp), %rax
+	addq $0x38, %rsp
+
+	jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
 #endif	
@@ -168,13 +274,13 @@ ENTRY(ret_from_fork)
 	CFI_ADJUST_CFA_OFFSET -4
 	call schedule_tail
 	GET_THREAD_INFO(%rcx)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 	jnz rff_trace
 rff_action:	
 	RESTORE_REST
 	testl $3,CS-ARGOFFSET(%rsp)	# from kernel_thread?
 	je   int_ret_from_sys_call
-	testl $_TIF_IA32,threadinfo_flags(%rcx)
+	testl $_TIF_IA32,TI_flags(%rcx)
 	jnz  int_ret_from_sys_call
 	RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
 	jmp ret_from_sys_call
@@ -243,7 +349,8 @@ ENTRY(system_call_after_swapgs)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	GET_THREAD_INFO(%rcx)
-	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+		TI_flags(%rcx)
 	jnz tracesys
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
@@ -262,7 +369,7 @@ sysret_check:
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl threadinfo_flags(%rcx),%edx
+	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	jnz  sysret_careful 
 	CFI_REMEMBER_STATE
@@ -305,7 +412,7 @@ sysret_signal:
 	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
 	xorl %esi,%esi # oldset -> arg2
 	call ptregscall_common
-1:	movl $_TIF_NEED_RESCHED,%edi
+1:	movl $_TIF_WORK_MASK,%edi
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -347,10 +454,10 @@ int_ret_from_sys_call:
 int_with_check:
 	LOCKDEP_SYS_EXIT_IRQ
 	GET_THREAD_INFO(%rcx)
-	movl threadinfo_flags(%rcx),%edx
+	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	jnz   int_careful
-	andl    $~TS_COMPAT,threadinfo_status(%rcx)
+	andl    $~TS_COMPAT,TI_status(%rcx)
 	jmp   retint_swapgs
 
 	/* Either reschedule or signal or syscall exit tracking needed. */
@@ -393,7 +500,7 @@ int_signal:
 	movq %rsp,%rdi		# &ptregs -> arg1
 	xorl %esi,%esi		# oldset -> arg2
 	call do_notify_resume
-1:	movl $_TIF_NEED_RESCHED,%edi	
+1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -558,7 +665,7 @@ retint_with_reschedule:
 	movl $_TIF_WORK_MASK,%edi
 retint_check:
 	LOCKDEP_SYS_EXIT_IRQ
-	movl threadinfo_flags(%rcx),%edx
+	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	CFI_REMEMBER_STATE
 	jnz  retint_careful
@@ -646,17 +753,16 @@ retint_signal:
 	RESTORE_REST
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
-	jmp retint_check
+	jmp retint_with_reschedule
 
 #ifdef CONFIG_PREEMPT
 	/* Returning to kernel space. Check if we need preemption */
 	/* rcx:	 threadinfo. interrupts off. */
 ENTRY(retint_kernel)
-	cmpl $0,threadinfo_preempt_count(%rcx)
+	cmpl $0,TI_preempt_count(%rcx)
 	jnz  retint_restore_args
-	bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
 	jnc  retint_restore_args
 	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
 ENTRY(irq_move_cleanup_interrupt)
 	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 END(irq_move_cleanup_interrupt)
@@ -819,7 +928,7 @@ paranoid_restore\trace:
 	jmp irq_return
 paranoid_userspace\trace:
 	GET_THREAD_INFO(%rcx)
-	movl threadinfo_flags(%rcx),%ebx
+	movl TI_flags(%rcx),%ebx
 	andl $_TIF_WORK_MASK,%ebx
 	jz paranoid_swapgs\trace
 	movq %rsp,%rdi			/* &pt_regs */
@@ -917,7 +1026,7 @@ error_exit:
 	testl %eax,%eax
 	jne  retint_kernel
 	LOCKDEP_SYS_EXIT_IRQ
-	movl  threadinfo_flags(%rcx),%edx
+	movl  TI_flags(%rcx),%edx
 	movl  $_TIF_WORK_MASK,%edi
 	andl  %edi,%edx
 	jnz  retint_careful
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 00000000000..ab115cd15fd
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/alternative.h>
+#include <asm/ftrace.h>
+
+
+/* Long is fine, even if it is only 4 bytes ;-) */
+static long *ftrace_nop;
+
+union ftrace_code_union {
+	char code[MCOUNT_INSN_SIZE];
+	struct {
+		char e8;
+		int offset;
+	} __attribute__((packed));
+};
+
+
+static int notrace ftrace_calc_offset(long ip, long addr)
+{
+	return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+	return (char *)ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	static union ftrace_code_union calc;
+
+	calc.e8		= 0xe8;
+	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+
+	/*
+	 * No locking needed, this must be called via kstop_machine
+	 * which in essence is like running on a uniprocessor machine.
+	 */
+	return calc.code;
+}
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code)
+{
+	unsigned replaced;
+	unsigned old = *(unsigned *)old_code; /* 4 bytes */
+	unsigned new = *(unsigned *)new_code; /* 4 bytes */
+	unsigned char newch = new_code[4];
+	int faulted = 0;
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine.
+	 */
+	asm volatile (
+		"1: lock\n"
+		"   cmpxchg %3, (%2)\n"
+		"   jnz 2f\n"
+		"   movb %b4, 4(%2)\n"
+		"2:\n"
+		".section .fixup, \"ax\"\n"
+		"3:	movl $1, %0\n"
+		"	jmp 2b\n"
+		".previous\n"
+		_ASM_EXTABLE(1b, 3b)
+		: "=r"(faulted), "=a"(replaced)
+		: "r"(ip), "r"(new), "c"(newch),
+		  "0"(faulted), "a"(old)
+		: "memory");
+	sync_core();
+
+	if (replaced != old && replaced != new)
+		faulted = 2;
+
+	return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
+	int ret;
+
+	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	ret = ftrace_modify_code(ip, old, new);
+
+	return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+	unsigned long ip = (long)(&mcount_call);
+	unsigned long *addr = data;
+	unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+	/*
+	 * Replace the mcount stub with a pointer to the
+	 * ip recorder function.
+	 */
+	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+	new = ftrace_call_replace(ip, *addr);
+	*addr = ftrace_modify_code(ip, old, new);
+
+	return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+	const unsigned char *const *noptable = find_nop_table();
+
+	/* This is running in kstop_machine */
+
+	ftrace_mcount_set(data);
+
+	ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+
+	return 0;
+}
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 45e84acca8a..711f11c30b0 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -8,6 +8,7 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
+#include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
@@ -20,6 +21,7 @@
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
+#include <asm/pgtable.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
 
@@ -208,14 +210,79 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 	BUG();
 }
 
+static __init void map_low_mmrs(void)
+{
+	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+enum map_type {map_wb, map_uc};
+
+static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+{
+	unsigned long bytes, paddr;
+
+	paddr = base << shift;
+	bytes = (1UL << shift);
+	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+	       					paddr + bytes);
+	if (map_type == map_uc)
+		init_extra_mapping_uc(paddr, bytes);
+	else
+		init_extra_mapping_wb(paddr, bytes);
+
+}
+static __init void map_gru_high(int max_pnode)
+{
+	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+	if (gru.s.enable)
+		map_high("GRU", gru.s.base, shift, map_wb);
+}
+
+static __init void map_config_high(int max_pnode)
+{
+	union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
+	int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
+	if (cfg.s.enable)
+		map_high("CONFIG", cfg.s.base, shift, map_uc);
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+	if (mmr.s.enable)
+		map_high("MMR", mmr.s.base, shift, map_uc);
+}
+
+static __init void map_mmioh_high(int max_pnode)
+{
+	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+	int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+	if (mmioh.s.enable)
+		map_high("MMIOH", mmioh.s.base, shift, map_uc);
+}
+
 static __init void uv_system_init(void)
 {
 	union uvh_si_addr_map_config_u m_n_config;
 	union uvh_node_id_u node_id;
 	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
 	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+	int max_pnode = 0;
 	unsigned long mmr_base, present;
 
+	map_low_mmrs();
+
 	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
 	m_val = m_n_config.s.m_skt;
 	n_val = m_n_config.s.n_skt;
@@ -281,12 +348,18 @@ static __init void uv_system_init(void)
 		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
+		max_pnode = max(pnode, max_pnode);
 
-		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, "
+		printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
 			"lcpu %d, blade %d\n",
 			cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
 			lcpu, blade);
 	}
+
+	map_gru_high(max_pnode);
+	map_mmr_high(max_pnode);
+	map_config_high(max_pnode);
+	map_mmioh_high(max_pnode);
 }
 
 /*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec6905..0ea6a19bfdf 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
 }
 
 #ifdef CONFIG_X86_64
-
 #include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
-	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-	hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
-	hpet_virt_address = NULL;
-}
-
-#else
+#endif
 
 static inline void hpet_set_mapping(void)
 {
 	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_64
+	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+#endif
 }
 
 static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
 	iounmap(hpet_virt_address);
 	hpet_virt_address = NULL;
 }
-#endif
 
 /*
  * HPET command line enable / disable
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e92..dd7ebee446a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
 #include <linux/module.h>
+
 #include <asm/checksum.h>
-#include <asm/desc.h>
 #include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
 
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 337ec3438a8..558abf4c796 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1569,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy)
 
 void print_all_local_APICs(void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1, 1);
+	on_each_cpu(print_local_APIC, NULL, 1);
 }
 
 void /*__init*/ print_PIC(void)
@@ -2020,7 +2020,7 @@ static inline void init_IO_APIC_traps(void)
  * The local APIC irq-chip implementation:
  */
 
-static void ack_apic(unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2045,9 +2045,17 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.name		= "local-APIC",
 	.mask		= mask_lapic_irq,
 	.unmask		= unmask_lapic_irq,
-	.eoi		= ack_apic,
+	.ack		= ack_lapic_irq,
 };
 
+static void lapic_register_intr(int irq, int vector)
+{
+	irq_desc[irq].status &= ~IRQ_LEVEL;
+	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+				      "edge");
+	set_intr_gate(vector, interrupt[irq]);
+}
+
 static void __init setup_nmi(void)
 {
 	/*
@@ -2247,8 +2255,7 @@ static inline void __init check_timer(void)
 
 	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
-	set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
-				      "fasteoi");
+	lapic_register_intr(0, vector);
 	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -2280,11 +2287,21 @@ out:
 }
 
 /*
- *
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- *   Linux doesn't really care, as it's not actually used
- *   for any interrupt handling anyway.
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
  */
 #define PIC_IRQS	(1 << PIC_CASCADE_IR)
 
@@ -2298,10 +2315,7 @@ void __init setup_IO_APIC(void)
 
 	enable_IO_APIC();
 
-	if (acpi_ioapic)
-		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
-	else
-		io_apic_irqs = ~PIC_IRQS;
+	io_apic_irqs = ~PIC_IRQS;
 
 	printk("ENABLING IO-APIC IRQs\n");
 
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2b4c40bc12c..6510cde36b3 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1160,7 +1160,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 
 void print_all_local_APICs (void)
 {
-	on_each_cpu(print_local_APIC, NULL, 1, 1);
+	on_each_cpu(print_local_APIC, NULL, 1);
 }
 
 void __apicdebuginit print_PIC(void)
@@ -1554,7 +1554,7 @@ static inline void init_IO_APIC_traps(void)
 	}
 }
 
-static void enable_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
@@ -1562,7 +1562,7 @@ static void enable_lapic_irq (unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq(unsigned int irq)
 {
 	unsigned long v;
 
@@ -1575,19 +1575,20 @@ static void ack_lapic_irq (unsigned int irq)
 	ack_APIC_irq();
 }
 
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
-	.name = "local-APIC",
-	.typename = "local-APIC-edge",
-	.startup = NULL, /* startup_irq() not used for IRQ0 */
-	.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
-	.enable = enable_lapic_irq,
-	.disable = disable_lapic_irq,
-	.ack = ack_lapic_irq,
-	.end = end_lapic_irq,
+static struct irq_chip lapic_chip __read_mostly = {
+	.name		= "local-APIC",
+	.mask		= mask_lapic_irq,
+	.unmask		= unmask_lapic_irq,
+	.ack		= ack_lapic_irq,
 };
 
+static void lapic_register_intr(int irq)
+{
+	irq_desc[irq].status &= ~IRQ_LEVEL;
+	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+				      "edge");
+}
+
 static void __init setup_nmi(void)
 {
 	/*
@@ -1714,11 +1715,6 @@ static inline void __init check_timer(void)
 		apic2 = apic1;
 	}
 
-	replace_pin_at_irq(0, 0, 0, apic1, pin1);
-	apic1 = 0;
-	pin1 = 0;
-	setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-
 	if (pin1 != -1) {
 		/*
 		 * Ok, does IRQ0 through the IOAPIC work?
@@ -1779,7 +1775,7 @@ static inline void __init check_timer(void)
 
 	apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
-	irq_desc[0].chip = &lapic_irq_type;
+	lapic_register_intr(0);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -1817,11 +1813,21 @@ static int __init notimercheck(char *s)
 __setup("no_timer_check", notimercheck);
 
 /*
- *
- * IRQs that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- *   Linux doesn't really care, as it's not actually used
- *   for any interrupt handling anyway.
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
  */
 #define PIC_IRQS	(1<<2)
 
@@ -1832,10 +1838,7 @@ void __init setup_IO_APIC(void)
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
 
-	if (acpi_ioapic)
-		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
-	else
-		io_apic_irqs = ~PIC_IRQS;
+	io_apic_irqs = ~PIC_IRQS;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31f49e8f46a..0373e88de95 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void)
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
+	/* IPI for generic single function call */
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+			call_function_single_interrupt);
+
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 21f2bae98c1..a8449571858 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 		load_LDT(pc);
 		mask = cpumask_of_cpu(smp_processor_id());
 		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-			smp_call_function(flush_ldt, current->mm, 1, 1);
+			smp_call_function(flush_ldt, current->mm, 1);
 		preempt_enable();
 #else
 		load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f4960171bc6..8864230d55a 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/numa.h>
+#include <linux/ftrace.h>
+
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	unsigned long page_list[PAGES_NR];
 	void *control_page;
 
+	tracer_disable();
+
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4a838..9dd9262693a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
 #include <linux/string.h>
 #include <linux/reboot.h>
 #include <linux/numa.h>
+#include <linux/ftrace.h>
+
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	unsigned long page_list[PAGES_NR];
 	void *control_page;
 
+	tracer_disable();
+
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9758fea87c5..56b933119a0 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -76,6 +76,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -423,6 +424,7 @@ out:
 
 static int microcode_open (struct inode *unused1, struct file *unused2)
 {
+	cycle_kernel_lock();
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 #define microcode_dev_exit() do { } while(0)
 #endif
 
-static long get_next_ucode_from_buffer(void **mc, void *buf,
+static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
 	unsigned long size, long offset)
 {
 	microcode_header_t *mc_header;
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu)
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
-	void *buf;
+	const u8 *buf;
 	unsigned long size;
 	long offset = 0;
 	int error;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8b6b1e05c30..3b25e49380c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -726,12 +726,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 static struct intel_mp_floating *mpf_found;
 
 /*
+ * Machine specific quirk for finding the SMP config before other setup
+ * activities destroy the table:
+ */
+int (*mach_get_smp_config_quirk)(unsigned int early);
+
+/*
  * Scan the memory blocks for an SMP configuration block.
  */
-static void __init __get_smp_config(unsigned early)
+static void __init __get_smp_config(unsigned int early)
 {
 	struct intel_mp_floating *mpf = mpf_found;
 
+	if (mach_get_smp_config_quirk) {
+		if (mach_get_smp_config_quirk(early))
+			return;
+	}
 	if (acpi_lapic && early)
 		return;
 	/*
@@ -889,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 	return 0;
 }
 
-static void __init __find_smp_config(unsigned reserve)
+int (*mach_find_smp_config_quirk)(unsigned int reserve);
+
+static void __init __find_smp_config(unsigned int reserve)
 {
 	unsigned int address;
 
+	if (mach_find_smp_config_quirk) {
+		if (mach_find_smp_config_quirk(reserve))
+			return;
+	}
 	/*
 	 * FIXME: Linux assumes you have 640K of base ram..
 	 * this continues the error...
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 1f3abe048e9..a153b3905f6 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	int ret = 0;
 
-	if (cpu >= NR_CPUS || !cpu_online(cpu))
-		return -ENXIO;	/* No such CPU */
-	if (!cpu_has(c, X86_FEATURE_MSR))
-		return -EIO;	/* MSR not supported */
+	lock_kernel();
+	cpu = iminor(file->f_path.dentry->d_inode);
 
+	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+		ret = -ENXIO;	/* No such CPU */
+		goto out;
+	}
+	c = &cpu_data(cpu);
+	if (!cpu_has(c, X86_FEATURE_MSR))
+		ret = -EIO;	/* MSR not supported */
+out:
+	unlock_kernel();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8dfe9db87a9..ec024b3baad 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void)
 
 #ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
 #endif
 
 	for_each_possible_cpu(cpu)
@@ -171,6 +171,9 @@ int __init check_nmi_watchdog(void)
 error:
 	if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
 		disable_8259A_irq(0);
+#ifdef CONFIG_X86_32
+	timer_ack = 0;
+#endif
 	return -1;
 }
 
@@ -269,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused)
 void acpi_nmi_enable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_enable, NULL, 1);
 }
 
 static void __acpi_nmi_disable(void *__unused)
@@ -283,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused)
 void acpi_nmi_disable(void)
 {
 	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
 void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1c4a1..a23e8233b9a 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
 	return 1;
 }
 
-static int __init numaq_tsc_disable(void)
+void __init numaq_tsc_disable(void)
 {
+	if (!found_numaq)
+		return;
+
 	if (num_online_nodes() > 1) {
 		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
 		setup_clear_cpu_cap(X86_FEATURE_TSC);
 	}
-	return 0;
 }
-arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e7e5652f65b..e0f571d58c1 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = {
 	.get_wallclock = native_get_wallclock,
 	.set_wallclock = native_set_wallclock,
 	.sched_clock = native_sched_clock,
-	.get_cpu_khz = native_calculate_cpu_khz,
+	.get_tsc_khz = native_calibrate_tsc,
 };
 
 struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db5d2a..c3fe78406d1 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	struct pci_dev *dev;
 	void *gatt;
 	int i, error;
+	unsigned long start_pfn, end_pfn;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
 	       aper_base, aper_size>>10);
+
+	/* need to map that range */
+	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+	}
 	return 0;
 
  nommu:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4061d63aabe..4d629c62f4f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <asm/system.h>
+
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -132,7 +138,7 @@ void cpu_idle_wait(void)
 {
 	smp_mb();
 	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
+	smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
 		pm_idle = poll_idle;
 	} else if (!strcmp(str, "mwait"))
 		force_mwait = 1;
-	else
+	else if (!strcmp(str, "halt")) {
+		/*
+		 * When the boot option of idle=halt is added, halt is
+		 * forced to be used for CPU idle. In such case CPU C2/C3
+		 * won't be used again.
+		 * To continue to load the CPU idle driver, don't touch
+		 * the boot_option_idle_override.
+		 */
+		pm_idle = default_idle;
+		idle_halt = 1;
+		return 0;
+	} else if (!strcmp(str, "nomwait")) {
+		/*
+		 * If the boot option of "idle=nomwait" is added,
+		 * it means that mwait will be disabled for CPU C2/C3
+		 * states. In such case it won't touch the variable
+		 * of boot_option_idle_override.
+		 */
+		idle_nomwait = 1;
+		return 0;
+	} else
 		return -1;
 
 	boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6c9df..0c3927accb0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
 
 			local_irq_disable();
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+			/* Don't trace irqs off for idle */
+			stop_critical_timings();
 			pm_idle();
+			start_critical_timings();
 		}
 		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb963e4d..a8e53626ac9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
 			 */
 			local_irq_disable();
 			enter_idle();
+			/* Don't trace irqs off for idle */
+			stop_critical_timings();
 			pm_idle();
+			start_critical_timings();
 			/* In many cases the interrupt that ended idle
 			   has already called exit_idle. But some idle
 			   loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd11c66..d1385881810 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
 		hpet_print_force_info();
 }
 
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
+			 old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
 			 old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cfcfbefee0b..531b55b8e81 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -394,11 +394,10 @@ static void __init parse_setup_data(void)
 	}
 }
 
-static void __init reserve_setup_data(void)
+static void __init e820_reserve_setup_data(void)
 {
 	struct setup_data *data;
 	u64 pa_data;
-	char buf[32];
 	int found = 0;
 
 	if (boot_params.hdr.version < 0x0209)
@@ -406,8 +405,6 @@ static void __init reserve_setup_data(void)
 	pa_data = boot_params.hdr.setup_data;
 	while (pa_data) {
 		data = early_ioremap(pa_data, sizeof(*data));
-		sprintf(buf, "setup data %x", data->type);
-		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
 		e820_update_range(pa_data, sizeof(*data)+data->len,
 			 E820_RAM, E820_RESERVED_KERN);
 		found = 1;
@@ -418,10 +415,29 @@ static void __init reserve_setup_data(void)
 		return;
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	memcpy(&e820_saved, &e820, sizeof(struct e820map));
 	printk(KERN_INFO "extended physical RAM map:\n");
 	e820_print_map("reserve setup_data");
 }
 
+static void __init reserve_early_setup_data(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+}
+
 /*
  * --------- Crashkernel reservation ------------------------------
  */
@@ -580,6 +596,7 @@ void __init setup_arch(char **cmdline_p)
 {
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+	visws_early_detect();
 	pre_setup_arch_hook();
 	early_cpu_init();
 #else
@@ -626,6 +643,8 @@ void __init setup_arch(char **cmdline_p)
 
 	setup_memory_map();
 	parse_setup_data();
+	/* update the e820_saved too */
+	e820_reserve_setup_data();
 
 	copy_edd();
 
@@ -656,7 +675,7 @@ void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 	/* after early param, so could get panic from serial */
-	reserve_setup_data();
+	reserve_early_setup_data();
 
 	if (acpi_mps_check()) {
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -665,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	}
 
+#ifdef CONFIG_PCI
+	if (pci_early_dump_regs)
+		early_dump_pci_devices();
+#endif
+
 	finish_e820_parsing();
 
 #ifdef CONFIG_X86_32
@@ -691,22 +715,18 @@ void __init setup_arch(char **cmdline_p)
 	early_gart_iommu_check();
 #endif
 
-	e820_register_active_regions(0, 0, -1UL);
 	/*
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	max_pfn = e820_end_of_ram();
+	max_pfn = e820_end_of_ram_pfn();
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(max_pfn)) {
-		remove_all_active_ranges();
-		e820_register_active_regions(0, 0, -1UL);
-		max_pfn = e820_end_of_ram();
-	}
+	if (mtrr_trim_uncached_memory(max_pfn))
+		max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
@@ -718,12 +738,26 @@ void __init setup_arch(char **cmdline_p)
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* need this before calling reserve_initrd */
-	max_low_pfn = max_pfn;
+	if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+		max_low_pfn = e820_end_of_low_ram_pfn();
+	else
+		max_low_pfn = max_pfn;
+
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
 	/* max_pfn_mapped is updated here */
-	max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+	max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+	if (max_pfn > max_low_pfn) {
+		max_pfn_mapped = init_memory_mapping(1UL<<32,
+						     max_pfn<<PAGE_SHIFT);
+		/* can we preseve max_low_pfn ?*/
+		max_low_pfn = max_pfn;
+	}
+#endif
 
 	/*
 	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -749,9 +783,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_table_init();
 
-	/* Remove active ranges so rediscovery with NUMA-awareness happens */
-	remove_all_active_ranges();
-
 #ifdef CONFIG_ACPI_NUMA
 	/*
 	 * Parse SRAT to discover nodes.
@@ -823,6 +854,14 @@ void __init setup_arch(char **cmdline_p)
 	init_cpu_to_node();
 #endif
 
+#ifdef CONFIG_X86_NUMAQ
+	/*
+	 * need to check online nodes num, call it
+	 * here before time_init/tsc_init
+	 */
+	numaq_tsc_disable();
+#endif
+
 	init_apic_mappings();
 	ioapic_init_mappings();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f746f..cac68430d31 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
-cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *_node_to_cpumask_ptr(int node)
 {
 	if (node_to_cpumask_map == NULL) {
 		printk(KERN_WARNING
 			"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
 			node);
 		dump_stack();
-		return &cpu_online_map;
+		return (const cpumask_t *)&cpu_online_map;
 	}
 	if (node >= nr_node_ids) {
 		printk(KERN_WARNING
 			"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
 			node, nr_node_ids);
 		dump_stack();
-		return (cpumask_t *)&cpu_mask_none;
+		return &cpu_mask_none;
 	}
-	return (cpumask_t *)&node_to_cpumask_map[node];
+	return &node_to_cpumask_map[node];
 }
 EXPORT_SYMBOL(_node_to_cpumask_ptr);
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aadc87c..361b7a4c640 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
-void lock_ipi_call_lock(void)
+void native_send_call_func_single_ipi(int cpu)
 {
-	spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
-	spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
+void native_send_call_func_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
 	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
 	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
 
 static void native_smp_send_stop(void)
 {
-	int nolock;
 	unsigned long flags;
 
 	if (reboot_force)
 		return;
 
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
+	smp_call_function(stop_this_cpu, NULL, 0);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
 	disable_local_APIC();
 	local_irq_restore(flags);
 }
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
 	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
 	add_pda(irq_call_count, 1);
 #endif
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
 }
 
 struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1200b202ed..687376ab07e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -327,12 +327,12 @@ static void __cpuinit start_secondary(void *unused)
 	 * lock helps us to not include this cpu in a currently in progress
 	 * smp_call_function().
 	 */
-	lock_ipi_call_lock();
+	ipi_call_lock_irq();
 #ifdef CONFIG_X86_IO_APIC
 	setup_vector_irq(smp_processor_id());
 #endif
 	cpu_set(smp_processor_id(), cpu_online_map);
-	unlock_ipi_call_lock();
+	ipi_call_unlock_irq();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 
 	setup_secondary_clock();
@@ -939,9 +939,9 @@ do_rest:
 				inquire_remote_apic(apicid);
 		}
 	}
-
+#ifdef CONFIG_X86_64
 restore_state:
-
+#endif
 	if (boot_error) {
 		/* Try to put things back the way they were before ... */
 		numa_remove_cpu(cpu); /* was set by numa_add_cpu */
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064d141..99941b37eca 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342c162..a03e7f6d90c 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 5f29f12da50..059ca6ee59b 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -39,9 +39,6 @@
 
 #include "do_timer.h"
 
-unsigned int cpu_khz;	/* Detected as we calibrate the TSC */
-EXPORT_SYMBOL(cpu_khz);
-
 int timer_ack;
 
 unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 39ae8511a13..e3d49c553af 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
-unsigned long __init native_calculate_cpu_khz(void)
+unsigned long __init calibrate_cpu(void)
 {
 	int tsc_start, tsc_now;
 	int i, no_ctr_free;
@@ -116,25 +116,11 @@ void __init hpet_time_init(void)
 
 void __init time_init(void)
 {
-	tsc_calibrate();
-
-	cpu_khz = tsc_khz;
-	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
-		(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
-		cpu_khz = calculate_cpu_khz();
-
-	lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
-
-	if (unsynchronized_tsc())
-		mark_tsc_unstable("TSCs unsynchronized");
-
+	tsc_init();
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
 		vgetcpu_mode = VGETCPU_RDTSCP;
 	else
 		vgetcpu_mode = VGETCPU_LSL;
 
-	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
-		cpu_khz / 1000, cpu_khz % 1000);
-	init_tsc_clocksource();
 	late_time_init = choose_time_init();
 }
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363851a..fec1ecedc9b 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 5039d0f097a..dcbf7a1159e 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index d7cc292691f..8a768973c4f 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1,5 +1,6 @@
 /*
  *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  *
  *  Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
@@ -60,8 +61,6 @@
 
 #include "mach_traps.h"
 
-int panic_on_unrecovered_nmi;
-
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
@@ -98,19 +97,22 @@ asmlinkage void alignment_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
+int panic_on_unrecovered_nmi;
 int kstack_depth_to_print = 24;
 static unsigned int code_bytes = 64;
+static int ignore_nmis;
+static int die_counter;
 
 void printk_address(unsigned long address, int reliable)
 {
 #ifdef CONFIG_KALLSYMS
-	char namebuf[KSYM_NAME_LEN];
 	unsigned long offset = 0;
 	unsigned long symsize;
 	const char *symname;
-	char reliab[4] = "";
-	char *delim = ":";
 	char *modname;
+	char *delim = ":";
+	char namebuf[KSYM_NAME_LEN];
+	char reliab[4] = "";
 
 	symname = kallsyms_lookup(address, &symsize, &offset,
 					&modname, namebuf);
@@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable)
 #endif
 }
 
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+			void *p, unsigned int size)
 {
-	return	p > (void *)tinfo &&
-		p <= (void *)tinfo + THREAD_SIZE - size;
+	void *t = tinfo;
+	return	p > t && p <= t + THREAD_SIZE - size;
 }
 
 /* The form of the top of the frame on the stack */
 struct stack_frame {
-	struct stack_frame	*next_frame;
-	unsigned long		return_address;
+	struct stack_frame *next_frame;
+	unsigned long return_address;
 };
 
 static inline unsigned long
 print_context_stack(struct thread_info *tinfo,
-		    unsigned long *stack, unsigned long bp,
-		    const struct stacktrace_ops *ops, void *data)
+		unsigned long *stack, unsigned long bp,
+		const struct stacktrace_ops *ops, void *data)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
@@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo,
 	return bp;
 }
 
-#define MSG(msg)		ops->warning(data, msg)
-
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
@@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 	if (!stack) {
 		unsigned long dummy;
-
 		stack = &dummy;
 		if (task != current)
 			stack = (unsigned long *)task->thread.sp;
@@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	}
 #endif
 
-	while (1) {
+	for (;;) {
 		struct thread_info *context;
 
 		context = (struct thread_info *)
@@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops print_trace_ops = {
-	.warning		= print_trace_warning,
-	.warning_symbol		= print_trace_warning_symbol,
-	.stack			= print_trace_stack,
-	.address		= print_trace_address,
+	.warning = print_trace_warning,
+	.warning_symbol = print_trace_warning_symbol,
+	.stack = print_trace_stack,
+	.address = print_trace_address,
 };
 
 static void
@@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs)
 		printk(KERN_EMERG "Code: ");
 
 		ip = (u8 *)regs->ip - code_prologue;
-		if (ip < (u8 *)PAGE_OFFSET ||
-			probe_kernel_address(ip, c)) {
+		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
 			/* try starting at EIP */
 			ip = (u8 *)regs->ip;
 			code_len = code_len - code_prologue + 1;
 		}
 		for (i = 0; i < code_len; i++, ip++) {
 			if (ip < (u8 *)PAGE_OFFSET ||
-				probe_kernel_address(ip, c)) {
+					probe_kernel_address(ip, c)) {
 				printk(" Bad EIP value.");
 				break;
 			}
@@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip)
 	return ud2 == 0x0b0f;
 }
 
-static int die_counter;
-
 int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned short ss;
@@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-
 	if (notify_die(DIE_OOPS, str, regs, err,
-			current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
-
-		show_registers(regs);
-		/* Executive summary in case the oops scrolled away */
-		sp = (unsigned long) (&regs->sp);
-		savesegment(ss, ss);
-		if (user_mode(regs)) {
-			sp = regs->sp;
-			ss = regs->ss & 0xffff;
-		}
-		printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-		print_symbol("%s", regs->ip);
-		printk(" SS:ESP %04x:%08lx\n", ss, sp);
+			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+		return 1;
 
-		return 0;
+	show_registers(regs);
+	/* Executive summary in case the oops scrolled away */
+	sp = (unsigned long) (&regs->sp);
+	savesegment(ss, ss);
+	if (user_mode(regs)) {
+		sp = regs->sp;
+		ss = regs->ss & 0xffff;
 	}
-
-	return 1;
+	printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+	print_symbol("%s", regs->ip);
+	printk(" SS:ESP %04x:%08lx\n", ss, sp);
+	return 0;
 }
 
 /*
@@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code)			\
 {									\
 	trace_hardirqs_fixup();						\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-						== NOTIFY_STOP)		\
+							== NOTIFY_STOP)	\
 		return;							\
 	do_trap(trapnr, signr, str, 0, regs, error_code, NULL);		\
 }
@@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code)			\
 	info.si_code = sicode;						\
 	info.si_addr = (void __user *)siaddr;				\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-						== NOTIFY_STOP)		\
+							== NOTIFY_STOP)	\
 		return;							\
 	do_trap(trapnr, signr, str, 0, regs, error_code, &info);	\
 }
@@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code)			\
 void do_##name(struct pt_regs *regs, long error_code)			\
 {									\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-						== NOTIFY_STOP)		\
+							== NOTIFY_STOP)	\
 		return;							\
 	do_trap(trapnr, signr, str, 1, regs, error_code, NULL);		\
 }
@@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code)			\
 	info.si_addr = (void __user *)siaddr;				\
 	trace_hardirqs_fixup();						\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-						== NOTIFY_STOP)		\
+							== NOTIFY_STOP)	\
 		return;							\
 	do_trap(trapnr, signr, str, 1, regs, error_code, &info);	\
 }
 
-DO_VM86_ERROR_INFO(0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
 #ifndef CONFIG_KPROBES
 DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
 #endif
 DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
 DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO(6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
-DO_ERROR(9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
-DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
 DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 
-void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
+void __kprobes
+do_general_protection(struct pt_regs *regs, long error_code)
 {
+	struct task_struct *tsk;
 	struct thread_struct *thread;
 	struct tss_struct *tss;
 	int cpu;
@@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
 	if (regs->flags & X86_VM_MASK)
 		goto gp_in_vm86;
 
+	tsk = current;
 	if (!user_mode(regs))
 		goto gp_in_kernel;
 
-	current->thread.error_code = error_code;
-	current->thread.trap_no = 13;
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 13;
 
-	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
-	    printk_ratelimit()) {
+	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+			printk_ratelimit()) {
 		printk(KERN_INFO
-		    "%s[%d] general protection ip:%lx sp:%lx error:%lx",
-		    current->comm, task_pid_nr(current),
-		    regs->ip, regs->sp, error_code);
+			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
+			tsk->comm, task_pid_nr(tsk),
+			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
 		printk("\n");
 	}
 
-	force_sig(SIGSEGV, current);
+	force_sig(SIGSEGV, tsk);
 	return;
 
 gp_in_vm86:
@@ -672,14 +668,15 @@ gp_in_vm86:
 	return;
 
 gp_in_kernel:
-	if (!fixup_exception(regs)) {
-		current->thread.error_code = error_code;
-		current->thread.trap_no = 13;
-		if (notify_die(DIE_GPF, "general protection fault", regs,
+	if (fixup_exception(regs))
+		return;
+
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 13;
+	if (notify_die(DIE_GPF, "general protection fault", regs,
 				error_code, 13, SIGSEGV) == NOTIFY_STOP)
-			return;
-		die("general protection fault", regs, error_code);
-	}
+		return;
+	die("general protection fault", regs, error_code);
 }
 
 static notrace __kprobes void
@@ -792,14 +789,17 @@ void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
+	int cpu;
+
+	cpu = smp_processor_id();
 
-	/* Only the BSP gets external NMIs from the system: */
-	if (!smp_processor_id())
+	/* Only the BSP gets external NMIs from the system. */
+	if (!cpu)
 		reason = get_nmi_reason();
 
 	if (!(reason & 0xc0)) {
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-							== NOTIFY_STOP)
+								== NOTIFY_STOP)
 			return;
 #ifdef CONFIG_X86_LOCAL_APIC
 		/*
@@ -808,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		 */
 		if (nmi_watchdog_tick(regs, reason))
 			return;
-		if (!do_nmi_callback(regs, smp_processor_id()))
+		if (!do_nmi_callback(regs, cpu))
 			unknown_nmi_error(reason, regs);
 #else
 		unknown_nmi_error(reason, regs);
@@ -818,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	}
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 		return;
+
+	/* AK: following checks seem to be broken on modern chipsets. FIXME */
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
 	if (reason & 0x40)
@@ -829,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	reassert_nmi();
 }
 
-static int ignore_nmis;
-
 notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
 {
 	int cpu;
@@ -915,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	tsk->thread.debugctlmsr = 0;
 
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
-					SIGTRAP) == NOTIFY_STOP)
+						SIGTRAP) == NOTIFY_STOP)
 		return;
 	/* It's safe to allow irq's after DR6 has been saved */
 	if (regs->flags & X86_EFLAGS_IF)
@@ -976,9 +976,8 @@ clear_TF_reenable:
 void math_error(void __user *ip)
 {
 	struct task_struct *task;
-	unsigned short cwd;
-	unsigned short swd;
 	siginfo_t info;
+	unsigned short cwd, swd;
 
 	/*
 	 * Save the info for the exception handler and clear the error.
@@ -997,7 +996,7 @@ void math_error(void __user *ip)
 	 * C1 reg you need in case of a stack fault, 0x040 is the stack
 	 * fault bit.  We should only be taking one exception at a time,
 	 * so if this combination doesn't produce any single exception,
-	 * then we have a bad program that isn't syncronizing its FPU usage
+	 * then we have a bad program that isn't synchronizing its FPU usage
 	 * and it will suffer the consequences since we won't be able to
 	 * fully reproduce the context of the exception
 	 */
@@ -1006,7 +1005,7 @@ void math_error(void __user *ip)
 	switch (swd & ~cwd & 0x3f) {
 	case 0x000: /* No unmasked exception */
 		return;
-	default:    /* Multiple exceptions */
+	default: /* Multiple exceptions */
 		break;
 	case 0x001: /* Invalid Op */
 		/*
@@ -1042,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code)
 static void simd_math_error(void __user *ip)
 {
 	struct task_struct *task;
-	unsigned short mxcsr;
 	siginfo_t info;
+	unsigned short mxcsr;
 
 	/*
 	 * Save the info for the exception handler and clear the error.
@@ -1198,16 +1197,16 @@ void __init trap_init(void)
 	early_iounmap(p, 4);
 #endif
 
-	set_trap_gate(0,  &divide_error);
-	set_intr_gate(1,  &debug);
-	set_intr_gate(2,  &nmi);
-	set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
-	set_system_gate(4, &overflow);
-	set_trap_gate(5,  &bounds);
-	set_trap_gate(6,  &invalid_op);
-	set_trap_gate(7,  &device_not_available);
-	set_task_gate(8,  GDT_ENTRY_DOUBLEFAULT_TSS);
-	set_trap_gate(9,  &coprocessor_segment_overrun);
+	set_trap_gate(0, &divide_error);
+	set_intr_gate(1, &debug);
+	set_intr_gate(2, &nmi);
+	set_system_intr_gate(3, &int3); /* int3 can be called from all */
+	set_system_gate(4, &overflow); /* int4 can be called from all */
+	set_trap_gate(5, &bounds);
+	set_trap_gate(6, &invalid_op);
+	set_trap_gate(7, &device_not_available);
+	set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
+	set_trap_gate(9, &coprocessor_segment_overrun);
 	set_trap_gate(10, &invalid_TSS);
 	set_trap_gate(11, &segment_not_present);
 	set_trap_gate(12, &stack_segment);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 80ba6d37bfe..2696a683778 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -10,49 +10,49 @@
  * 'Traps.c' handles hardware traps and faults after we have saved some
  * state in 'entry.S'.
  */
-#include <linux/sched.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/kdebug.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
 #include <linux/string.h>
+#include <linux/unwind.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
-#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/sched.h>
 #include <linux/timer.h>
-#include <linux/mm.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nmi.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-#include <linux/unwind.h>
-#include <linux/uaccess.h>
 #include <linux/bug.h>
-#include <linux/kdebug.h>
-#include <linux/utsname.h>
-
-#include <mach_traps.h>
+#include <linux/nmi.h>
+#include <linux/mm.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
 #endif
 
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
+#include <asm/stacktrace.h>
+#include <asm/processor.h>
 #include <asm/debugreg.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/processor.h>
-#include <asm/unwind.h>
+#include <asm/nmi.h>
 #include <asm/smp.h>
+#include <asm/io.h>
 #include <asm/pgalloc.h>
-#include <asm/pda.h>
 #include <asm/proto.h>
-#include <asm/nmi.h>
-#include <asm/stacktrace.h>
+#include <asm/pda.h>
+
+#include <mach_traps.h>
 
 asmlinkage void divide_error(void);
 asmlinkage void debug(void);
@@ -72,12 +72,14 @@ asmlinkage void page_fault(void);
 asmlinkage void coprocessor_error(void);
 asmlinkage void simd_coprocessor_error(void);
 asmlinkage void alignment_check(void);
-asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
 
 int panic_on_unrecovered_nmi;
+int kstack_depth_to_print = 12;
 static unsigned int code_bytes = 64;
-static unsigned ignore_nmis;
+static int ignore_nmis;
+static int die_counter;
 
 static inline void conditional_sti(struct pt_regs *regs)
 {
@@ -101,34 +103,9 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	dec_preempt_count();
 }
 
-int kstack_depth_to_print = 12;
-
 void printk_address(unsigned long address, int reliable)
 {
-#ifdef CONFIG_KALLSYMS
-	unsigned long offset = 0, symsize;
-	const char *symname;
-	char *modname;
-	char *delim = ":";
-	char namebuf[KSYM_NAME_LEN];
-	char reliab[4] = "";
-
-	symname = kallsyms_lookup(address, &symsize, &offset,
-					&modname, namebuf);
-	if (!symname) {
-		printk(" [<%016lx>]\n", address);
-		return;
-	}
-	if (!reliable)
-		strcpy(reliab, "? ");
-
-	if (!modname)
-		modname = delim = "";
-	printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
-		address, reliab, delim, modname, delim, symname, offset, symsize);
-#else
-	printk(" [<%016lx>]\n", address);
-#endif
+	printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -205,8 +182,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 	return NULL;
 }
 
-#define MSG(txt) ops->warning(data, txt)
-
 /*
  * x86-64 can have up to three kernel stacks: 
  * process stack
@@ -233,11 +208,11 @@ struct stack_frame {
 	unsigned long return_address;
 };
 
-
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
-				unsigned long *stack, unsigned long bp,
-				const struct stacktrace_ops *ops, void *data,
-				unsigned long *end)
+static inline unsigned long
+print_context_stack(struct thread_info *tinfo,
+		unsigned long *stack, unsigned long bp,
+		const struct stacktrace_ops *ops, void *data,
+		unsigned long *end)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
@@ -259,7 +234,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	return bp;
 }
 
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data)
 {
@@ -268,36 +243,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 	unsigned used = 0;
 	struct thread_info *tinfo;
 
-	if (!tsk)
-		tsk = current;
-	tinfo = task_thread_info(tsk);
+	if (!task)
+		task = current;
 
 	if (!stack) {
 		unsigned long dummy;
 		stack = &dummy;
-		if (tsk && tsk != current)
-			stack = (unsigned long *)tsk->thread.sp;
+		if (task && task != current)
+			stack = (unsigned long *)task->thread.sp;
 	}
 
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp) {
-		if (tsk == current) {
+		if (task == current) {
 			/* Grab bp right from our regs */
-			asm("movq %%rbp, %0" : "=r" (bp):);
+			asm("movq %%rbp, %0" : "=r" (bp) :);
 		} else {
 			/* bp is the last reg pushed by switch_to */
-			bp = *(unsigned long *) tsk->thread.sp;
+			bp = *(unsigned long *) task->thread.sp;
 		}
 	}
 #endif
 
-
-
 	/*
 	 * Print function call entries in all stacks, starting at the
 	 * current stack address. If the stacks consist of nested
 	 * exceptions
 	 */
+	tinfo = task_thread_info(task);
 	for (;;) {
 		char *id;
 		unsigned long *estack_end;
@@ -382,18 +355,17 @@ static const struct stacktrace_ops print_trace_ops = {
 	.address = print_trace_address,
 };
 
-void
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
-		unsigned long bp)
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *stack, unsigned long bp)
 {
 	printk("\nCall Trace:\n");
-	dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
+	dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
 	printk("\n");
 }
 
 static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
-							unsigned long bp)
+_show_stack(struct task_struct *task, struct pt_regs *regs,
+		unsigned long *sp, unsigned long bp)
 {
 	unsigned long *stack;
 	int i;
@@ -405,14 +377,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
 	// back trace for this cpu.
 
 	if (sp == NULL) {
-		if (tsk)
-			sp = (unsigned long *)tsk->thread.sp;
+		if (task)
+			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
 	}
 
 	stack = sp;
-	for(i=0; i < kstack_depth_to_print; i++) {
+	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (stack >= irqstack && stack <= irqstack_end) {
 			if (stack == irqstack_end) {
 				stack = (unsigned long *) (irqstack_end[-1]);
@@ -427,12 +399,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	show_trace(tsk, regs, sp, bp);
+	show_trace(task, regs, sp, bp);
 }
 
-void show_stack(struct task_struct *tsk, unsigned long * sp)
+void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	_show_stack(tsk, NULL, sp, 0);
+	_show_stack(task, NULL, sp, 0);
 }
 
 /*
@@ -440,8 +412,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp)
  */
 void dump_stack(void)
 {
-	unsigned long dummy;
 	unsigned long bp = 0;
+	unsigned long stack;
 
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp)
@@ -453,7 +425,7 @@ void dump_stack(void)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	show_trace(NULL, NULL, &dummy, bp);
+	show_trace(NULL, NULL, &stack, bp);
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -464,12 +436,8 @@ void show_registers(struct pt_regs *regs)
 	unsigned long sp;
 	const int cpu = smp_processor_id();
 	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
-	u8 *ip;
-	unsigned int code_prologue = code_bytes * 43 / 64;
-	unsigned int code_len = code_bytes;
 
 	sp = regs->sp;
-	ip = (u8 *) regs->ip - code_prologue;
 	printk("CPU %d ", cpu);
 	__show_regs(regs);
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -480,15 +448,21 @@ void show_registers(struct pt_regs *regs)
 	 * time of the fault..
 	 */
 	if (!user_mode(regs)) {
+		unsigned int code_prologue = code_bytes * 43 / 64;
+		unsigned int code_len = code_bytes;
 		unsigned char c;
+		u8 *ip;
+
 		printk("Stack: ");
 		_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
 		printk("\n");
 
 		printk(KERN_EMERG "Code: ");
+
+		ip = (u8 *)regs->ip - code_prologue;
 		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
 			/* try starting at RIP */
-			ip = (u8 *) regs->ip;
+			ip = (u8 *)regs->ip;
 			code_len = code_len - code_prologue + 1;
 		}
 		for (i = 0; i < code_len; i++, ip++) {
@@ -504,7 +478,7 @@ void show_registers(struct pt_regs *regs)
 		}
 	}
 	printk("\n");
-}	
+}
 
 int is_valid_bugaddr(unsigned long ip)
 {
@@ -562,10 +536,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	do_exit(signr);
 }
 
-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 {
-	static int die_counter;
-	printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
+	printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	printk("PREEMPT ");
 #endif
@@ -576,8 +549,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+	if (notify_die(DIE_OOPS, str, regs, err,
+			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
+
 	show_registers(regs);
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
@@ -589,7 +564,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	return 0;
 }
 
-void die(const char * str, struct pt_regs * regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned long flags = oops_begin();
 
@@ -606,8 +581,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 {
 	unsigned long flags;
 
-	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
-	    NOTIFY_STOP)
+	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	flags = oops_begin();
@@ -629,44 +603,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
 	do_exit(SIGBUS);
 }
 
-static void __kprobes do_trap(int trapnr, int signr, char *str,
-			      struct pt_regs * regs, long error_code,
-			      siginfo_t *info)
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+	long error_code, siginfo_t *info)
 {
 	struct task_struct *tsk = current;
 
-	if (user_mode(regs)) {
-		/*
-		 * We want error_code and trap_no set for userspace
-		 * faults and kernelspace faults which result in
-		 * die(), but not kernelspace faults which are fixed
-		 * up.  die() gives the process no chance to handle
-		 * the signal and notice the kernel fault information,
-		 * so that won't result in polluting the information
-		 * about previously queued, but not yet delivered,
-		 * faults.  See also do_general_protection below.
-		 */
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_no = trapnr;
-
-		if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
-		    printk_ratelimit()) {
-			printk(KERN_INFO
-			       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
-			       tsk->comm, tsk->pid, str,
-			       regs->ip, regs->sp, error_code);
-			print_vma_addr(" in ", regs->ip);
-			printk("\n");
-		}
+	if (!user_mode(regs))
+		goto kernel_trap;
 
-		if (info)
-			force_sig_info(signr, info, tsk);
-		else
-			force_sig(signr, tsk);
-		return;
+	/*
+	 * We want error_code and trap_no set for userspace faults and
+	 * kernelspace faults which result in die(), but not
+	 * kernelspace faults which are fixed up.  die() gives the
+	 * process no chance to handle the signal and notice the
+	 * kernel fault information, so that won't result in polluting
+	 * the information about previously queued, but not yet
+	 * delivered, faults.  See also do_general_protection below.
+	 */
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = trapnr;
+
+	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
+	    printk_ratelimit()) {
+		printk(KERN_INFO
+		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+		       tsk->comm, tsk->pid, str,
+		       regs->ip, regs->sp, error_code);
+		print_vma_addr(" in ", regs->ip);
+		printk("\n");
 	}
 
+	if (info)
+		force_sig_info(signr, info, tsk);
+	else
+		force_sig(signr, tsk);
+	return;
 
+kernel_trap:
 	if (!fixup_exception(regs)) {
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_no = trapnr;
@@ -676,38 +650,38 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-							== NOTIFY_STOP) \
-		return; \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+{									\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+							== NOTIFY_STOP)	\
+		return;							\
 	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, NULL); \
+	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	siginfo_t info; \
-	info.si_signo = signr; \
-	info.si_errno = 0; \
-	info.si_code = sicode; \
-	info.si_addr = (void __user *)siaddr; \
-	trace_hardirqs_fixup(); \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-							== NOTIFY_STOP) \
-		return; \
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
+asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+{									\
+	siginfo_t info;							\
+	info.si_signo = signr;						\
+	info.si_errno = 0;						\
+	info.si_code = sicode;						\
+	info.si_addr = (void __user *)siaddr;				\
+	trace_hardirqs_fixup();						\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+							== NOTIFY_STOP)	\
+		return;							\
 	conditional_sti(regs);						\
-	do_trap(trapnr, signr, str, regs, error_code, &info); \
+	do_trap(trapnr, signr, str, regs, error_code, &info);		\
 }
 
-DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
-DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_ERROR(4, SIGSEGV, "overflow", overflow)
+DO_ERROR(5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 
 /* Runs on IST stack */
@@ -738,31 +712,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
 		die(str, regs, error_code);
 }
 
-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
-						long error_code)
+asmlinkage void __kprobes
+do_general_protection(struct pt_regs *regs, long error_code)
 {
-	struct task_struct *tsk = current;
+	struct task_struct *tsk;
 
 	conditional_sti(regs);
 
-	if (user_mode(regs)) {
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_no = 13;
-
-		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-		    printk_ratelimit()) {
-			printk(KERN_INFO
-		       "%s[%d] general protection ip:%lx sp:%lx error:%lx",
-			       tsk->comm, tsk->pid,
-			       regs->ip, regs->sp, error_code);
-			print_vma_addr(" in ", regs->ip);
-			printk("\n");
-		}
+	tsk = current;
+	if (!user_mode(regs))
+		goto gp_in_kernel;
 
-		force_sig(SIGSEGV, tsk);
-		return;
-	} 
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = 13;
 
+	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+			printk_ratelimit()) {
+		printk(KERN_INFO
+			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
+			tsk->comm, tsk->pid,
+			regs->ip, regs->sp, error_code);
+		print_vma_addr(" in ", regs->ip);
+		printk("\n");
+	}
+
+	force_sig(SIGSEGV, tsk);
+	return;
+
+gp_in_kernel:
 	if (fixup_exception(regs))
 		return;
 
@@ -775,14 +752,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
 }
 
 static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
+mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
 	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
 
 #if defined(CONFIG_EDAC)
-	if(edac_handler_set()) {
+	if (edac_handler_set()) {
 		edac_atomic_assert_error();
 		return;
 	}
@@ -799,7 +776,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
 }
 
 static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs * regs)
+io_check_error(unsigned char reason, struct pt_regs *regs)
 {
 	printk("NMI: IOCK error (debug interrupt?)\n");
 	show_registers(regs);
@@ -829,14 +806,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 
 /* Runs on IST stack. This code must keep interrupts off all the time.
    Nested NMIs are prevented by the CPU. */
-asmlinkage notrace  __kprobes void default_do_nmi(struct pt_regs *regs)
+asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int cpu;
 
 	cpu = smp_processor_id();
 
-	/* Only the BSP gets external NMIs from the system.  */
+	/* Only the BSP gets external NMIs from the system. */
 	if (!cpu)
 		reason = get_nmi_reason();
 
@@ -848,18 +825,17 @@ asmlinkage notrace  __kprobes void default_do_nmi(struct pt_regs *regs)
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
 		 */
-		if (nmi_watchdog_tick(regs,reason))
+		if (nmi_watchdog_tick(regs, reason))
 			return;
-		if (!do_nmi_callback(regs,cpu))
+		if (!do_nmi_callback(regs, cpu))
 			unknown_nmi_error(reason, regs);
 
 		return;
 	}
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
-		return; 
+		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
 	if (reason & 0x40)
@@ -870,9 +846,12 @@ asmlinkage notrace __kprobes void
 do_nmi(struct pt_regs *regs, long error_code)
 {
 	nmi_enter();
+
 	add_pda(__nmi_count, 1);
+
 	if (!ignore_nmis)
 		default_do_nmi(regs);
+
 	nmi_exit();
 }
 
@@ -889,13 +868,14 @@ void restart_nmi(void)
 }
 
 /* runs on IST stack. */
-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
+asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
 	trace_hardirqs_fixup();
 
-	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
+	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+			== NOTIFY_STOP)
 		return;
-	}
+
 	preempt_conditional_sti(regs);
 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
@@ -926,8 +906,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 				   unsigned long error_code)
 {
-	unsigned long condition;
 	struct task_struct *tsk = current;
+	unsigned long condition;
 	siginfo_t info;
 
 	trace_hardirqs_fixup();
@@ -948,21 +928,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 
 	/* Mask out spurious debug traps due to lazy DR7 setting */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-		if (!tsk->thread.debugreg7) { 
+		if (!tsk->thread.debugreg7)
 			goto clear_dr7;
-		}
 	}
 
 	tsk->thread.debugreg6 = condition;
 
-
 	/*
 	 * Single-stepping through TF: make sure we ignore any events in
 	 * kernel space (but re-enable TF when returning to user mode).
 	 */
 	if (condition & DR_STEP) {
-                if (!user_mode(regs))
-                       goto clear_TF_reenable;
+		if (!user_mode(regs))
+			goto clear_TF_reenable;
 	}
 
 	/* Ok, finally something we can handle */
@@ -975,7 +953,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
 	force_sig_info(SIGTRAP, &info, tsk);
 
 clear_dr7:
-	set_debugreg(0UL, 7);
+	set_debugreg(0, 7);
 	preempt_conditional_cli(regs);
 	return;
 
@@ -983,6 +961,7 @@ clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 	regs->flags &= ~X86_EFLAGS_TF;
 	preempt_conditional_cli(regs);
+	return;
 }
 
 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
@@ -1005,7 +984,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 {
 	void __user *ip = (void __user *)(regs->ip);
-	struct task_struct * task;
+	struct task_struct *task;
 	siginfo_t info;
 	unsigned short cwd, swd;
 
@@ -1038,30 +1017,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 	switch (swd & ~cwd & 0x3f) {
-		case 0x000:
-		default:
-			break;
-		case 0x001: /* Invalid Op */
-			/*
-			 * swd & 0x240 == 0x040: Stack Underflow
-			 * swd & 0x240 == 0x240: Stack Overflow
-			 * User must clear the SF bit (0x40) if set
-			 */
-			info.si_code = FPE_FLTINV;
-			break;
-		case 0x002: /* Denormalize */
-		case 0x010: /* Underflow */
-			info.si_code = FPE_FLTUND;
-			break;
-		case 0x004: /* Zero Divide */
-			info.si_code = FPE_FLTDIV;
-			break;
-		case 0x008: /* Overflow */
-			info.si_code = FPE_FLTOVF;
-			break;
-		case 0x020: /* Precision */
-			info.si_code = FPE_FLTRES;
-			break;
+	case 0x000: /* No unmasked exception */
+	default: /* Multiple exceptions */
+		break;
+	case 0x001: /* Invalid Op */
+		/*
+		 * swd & 0x240 == 0x040: Stack Underflow
+		 * swd & 0x240 == 0x240: Stack Overflow
+		 * User must clear the SF bit (0x40) if set
+		 */
+		info.si_code = FPE_FLTINV;
+		break;
+	case 0x002: /* Denormalize */
+	case 0x010: /* Underflow */
+		info.si_code = FPE_FLTUND;
+		break;
+	case 0x004: /* Zero Divide */
+		info.si_code = FPE_FLTDIV;
+		break;
+	case 0x008: /* Overflow */
+		info.si_code = FPE_FLTOVF;
+		break;
+	case 0x020: /* Precision */
+		info.si_code = FPE_FLTRES;
+		break;
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
@@ -1074,7 +1053,7 @@ asmlinkage void bad_intr(void)
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 {
 	void __user *ip = (void __user *)(regs->ip);
-	struct task_struct * task;
+	struct task_struct *task;
 	siginfo_t info;
 	unsigned short mxcsr;
 
@@ -1102,25 +1081,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 	 */
 	mxcsr = get_fpu_mxcsr(task);
 	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
-		case 0x000:
-		default:
-			break;
-		case 0x001: /* Invalid Op */
-			info.si_code = FPE_FLTINV;
-			break;
-		case 0x002: /* Denormalize */
-		case 0x010: /* Underflow */
-			info.si_code = FPE_FLTUND;
-			break;
-		case 0x004: /* Zero Divide */
-			info.si_code = FPE_FLTDIV;
-			break;
-		case 0x008: /* Overflow */
-			info.si_code = FPE_FLTOVF;
-			break;
-		case 0x020: /* Precision */
-			info.si_code = FPE_FLTRES;
-			break;
+	case 0x000:
+	default:
+		break;
+	case 0x001: /* Invalid Op */
+		info.si_code = FPE_FLTINV;
+		break;
+	case 0x002: /* Denormalize */
+	case 0x010: /* Underflow */
+		info.si_code = FPE_FLTUND;
+		break;
+	case 0x004: /* Zero Divide */
+		info.si_code = FPE_FLTDIV;
+		break;
+	case 0x008: /* Overflow */
+		info.si_code = FPE_FLTOVF;
+		break;
+	case 0x020: /* Precision */
+		info.si_code = FPE_FLTRES;
+		break;
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
@@ -1138,7 +1117,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
 }
 
 /*
- *  'math_state_restore()' saves the current math information in the
+ * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  *
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
@@ -1163,7 +1142,7 @@ asmlinkage void math_state_restore(void)
 		local_irq_disable();
 	}
 
-	clts();			/* Allow maths ops (or we recurse) */
+	clts();				/* Allow maths ops (or we recurse) */
 	restore_fpu_checking(&me->thread.xstate->fxsave);
 	task_thread_info(me)->status |= TS_USEDFPU;
 	me->fpu_counter++;
@@ -1172,64 +1151,61 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 
 void __init trap_init(void)
 {
-	set_intr_gate(0,&divide_error);
-	set_intr_gate_ist(1,&debug,DEBUG_STACK);
-	set_intr_gate_ist(2,&nmi,NMI_STACK);
- 	set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
-	set_system_gate(4,&overflow);	/* int4 can be called from all */
-	set_intr_gate(5,&bounds);
-	set_intr_gate(6,&invalid_op);
-	set_intr_gate(7,&device_not_available);
-	set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
-	set_intr_gate(9,&coprocessor_segment_overrun);
-	set_intr_gate(10,&invalid_TSS);
-	set_intr_gate(11,&segment_not_present);
-	set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
-	set_intr_gate(13,&general_protection);
-	set_intr_gate(14,&page_fault);
-	set_intr_gate(15,&spurious_interrupt_bug);
-	set_intr_gate(16,&coprocessor_error);
-	set_intr_gate(17,&alignment_check);
+	set_intr_gate(0, &divide_error);
+	set_intr_gate_ist(1, &debug, DEBUG_STACK);
+	set_intr_gate_ist(2, &nmi, NMI_STACK);
+ 	set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
+	set_system_gate(4, &overflow); /* int4 can be called from all */
+	set_intr_gate(5, &bounds);
+	set_intr_gate(6, &invalid_op);
+	set_intr_gate(7, &device_not_available);
+	set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
+	set_intr_gate(9, &coprocessor_segment_overrun);
+	set_intr_gate(10, &invalid_TSS);
+	set_intr_gate(11, &segment_not_present);
+	set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
+	set_intr_gate(13, &general_protection);
+	set_intr_gate(14, &page_fault);
+	set_intr_gate(15, &spurious_interrupt_bug);
+	set_intr_gate(16, &coprocessor_error);
+	set_intr_gate(17, &alignment_check);
 #ifdef CONFIG_X86_MCE
-	set_intr_gate_ist(18,&machine_check, MCE_STACK); 
+	set_intr_gate_ist(18, &machine_check, MCE_STACK);
 #endif
-	set_intr_gate(19,&simd_coprocessor_error);
+	set_intr_gate(19, &simd_coprocessor_error);
 
 #ifdef CONFIG_IA32_EMULATION
 	set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 #endif
-       
 	/*
 	 * initialize the per thread extended state:
 	 */
-        init_thread_xstate();
+	init_thread_xstate();
 	/*
-	 * Should be a barrier for any external CPU state.
+	 * Should be a barrier for any external CPU state:
 	 */
 	cpu_init();
 }
 
-
 static int __init oops_setup(char *s)
-{ 
+{
 	if (!s)
 		return -EINVAL;
 	if (!strcmp(s, "panic"))
 		panic_on_oops = 1;
 	return 0;
-} 
+}
 early_param("oops", oops_setup);
 
 static int __init kstack_setup(char *s)
 {
 	if (!s)
 		return -EINVAL;
-	kstack_depth_to_print = simple_strtoul(s,NULL,0);
+	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
 	return 0;
 }
 early_param("kstack", kstack_setup);
 
-
 static int __init code_bytes_setup(char *s)
 {
 	code_bytes = simple_strtoul(s, NULL, 0);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
new file mode 100644
index 00000000000..7603c055390
--- /dev/null
+++ b/arch/x86/kernel/tsc.c
@@ -0,0 +1,535 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/cpufreq.h>
+#include <linux/dmi.h>
+#include <linux/delay.h>
+#include <linux/clocksource.h>
+#include <linux/percpu.h>
+
+#include <asm/hpet.h>
+#include <asm/timer.h>
+#include <asm/vgtod.h>
+#include <asm/time.h>
+#include <asm/delay.h>
+
+unsigned int cpu_khz;           /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
+unsigned int tsc_khz;
+EXPORT_SYMBOL(tsc_khz);
+
+/*
+ * TSC can be unstable due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+
+/* native_sched_clock() is called before tsc_init(), so
+   we must start with the TSC soft disabled to prevent
+   erroneous rdtsc usage on !cpu_has_tsc processors */
+static int tsc_disabled = -1;
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+u64 native_sched_clock(void)
+{
+	u64 this_offset;
+
+	/*
+	 * Fall back to jiffies if there's no TSC available:
+	 * ( But note that we still use it if the TSC is marked
+	 *   unstable. We do this because unlike Time Of Day,
+	 *   the scheduler clock tolerates small errors and it's
+	 *   very important for it to be as fast as the platform
+	 *   can achive it. )
+	 */
+	if (unlikely(tsc_disabled)) {
+		/* No locking but a rare wrong value is not a big deal: */
+		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+	}
+
+	/* read the Time Stamp Counter: */
+	rdtscll(this_offset);
+
+	/* return the value in ns */
+	return cycles_2_ns(this_offset);
+}
+
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+	return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+int check_tsc_unstable(void)
+{
+	return tsc_unstable;
+}
+EXPORT_SYMBOL_GPL(check_tsc_unstable);
+
+#ifdef CONFIG_X86_TSC
+int __init notsc_setup(char *str)
+{
+	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+			"cannot disable TSC completely.\n");
+	tsc_disabled = 1;
+	return 1;
+}
+#else
+/*
+ * disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c
+ */
+int __init notsc_setup(char *str)
+{
+	setup_clear_cpu_cap(X86_FEATURE_TSC);
+	return 1;
+}
+#endif
+
+__setup("notsc", notsc_setup);
+
+#define MAX_RETRIES     5
+#define SMI_TRESHOLD    50000
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+{
+	u64 t1, t2;
+	int i;
+
+	for (i = 0; i < MAX_RETRIES; i++) {
+		t1 = get_cycles();
+		if (hpet)
+			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+		else
+			*pm = acpi_pm_read_early();
+		t2 = get_cycles();
+		if ((t2 - t1) < SMI_TRESHOLD)
+			return t2;
+	}
+	return ULLONG_MAX;
+}
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+	unsigned long flags;
+	u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
+	int hpet = is_hpet_enabled();
+	unsigned int tsc_khz_val = 0;
+
+	local_irq_save(flags);
+
+	tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	outb(0xb0, 0x43);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+	tr1 = get_cycles();
+	while ((inb(0x61) & 0x20) == 0);
+	tr2 = get_cycles();
+
+	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+
+	local_irq_restore(flags);
+
+	/*
+	 * Preset the result with the raw and inaccurate PIT
+	 * calibration value
+	 */
+	delta = (tr2 - tr1);
+	do_div(delta, 50);
+	tsc_khz_val = delta;
+
+	/* hpet or pmtimer available ? */
+	if (!hpet && !pm1 && !pm2) {
+		printk(KERN_INFO "TSC calibrated against PIT\n");
+		goto out;
+	}
+
+	/* Check, whether the sampling was disturbed by an SMI */
+	if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
+		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
+				"using PIT calibration result\n");
+		goto out;
+	}
+
+	tsc2 = (tsc2 - tsc1) * 1000000LL;
+
+	if (hpet) {
+		printk(KERN_INFO "TSC calibrated against HPET\n");
+		if (hpet2 < hpet1)
+			hpet2 += 0x100000000ULL;
+		hpet2 -= hpet1;
+		tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+		do_div(tsc1, 1000000);
+	} else {
+		printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
+		if (pm2 < pm1)
+			pm2 += (u64)ACPI_PM_OVRRUN;
+		pm2 -= pm1;
+		tsc1 = pm2 * 1000000000LL;
+		do_div(tsc1, PMTMR_TICKS_PER_SEC);
+	}
+
+	do_div(tsc2, tsc1);
+	tsc_khz_val = tsc2;
+
+out:
+	return tsc_khz_val;
+}
+
+
+#ifdef CONFIG_X86_32
+/* Only called from the Powernow K7 cpu freq driver */
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+	unsigned long cpu_khz_old = cpu_khz;
+
+	if (cpu_has_tsc) {
+		tsc_khz = calibrate_tsc();
+		cpu_khz = tsc_khz;
+		cpu_data(0).loops_per_jiffy =
+			cpufreq_scale(cpu_data(0).loops_per_jiffy,
+					cpu_khz_old, cpu_khz);
+		return 0;
+	} else
+		return -ENODEV;
+#else
+	return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+#endif /* CONFIG_X86_32 */
+
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *              ns = cycles / (freq / ns_per_sec)
+ *              ns = cycles * (ns_per_sec / freq)
+ *              ns = cycles * (10^9 / (cpu_khz * 10^3))
+ *              ns = cycles * (10^6 / cpu_khz)
+ *
+ *      Then we use scaling math (suggested by george@mvista.com) to get:
+ *              ns = cycles * (10^6 * SC / cpu_khz) / SC
+ *              ns = cycles * cyc2ns_scale / SC
+ *
+ *      And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *
+ *  We can use khz divisor instead of mhz to keep a better precision, since
+ *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ *  (mathieu.desnoyers@polymtl.ca)
+ *
+ *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+
+DEFINE_PER_CPU(unsigned long, cyc2ns);
+
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+{
+	unsigned long long tsc_now, ns_now;
+	unsigned long flags, *scale;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	if (cpu_khz)
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+ * changes.
+ *
+ * RED-PEN: On SMP we assume all CPUs run with the same frequency.  It's
+ * not that important because current Opteron setups do not support
+ * scaling on SMP anyroads.
+ *
+ * Should fix up last_tsc too. Currently gettimeofday in the
+ * first tick after the change will be slightly wrong.
+ */
+
+static unsigned int  ref_freq;
+static unsigned long loops_per_jiffy_ref;
+static unsigned long tsc_khz_ref;
+
+static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+				void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	unsigned long *lpj, dummy;
+
+	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
+		return 0;
+
+	lpj = &dummy;
+	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+#ifdef CONFIG_SMP
+		lpj = &cpu_data(freq->cpu).loops_per_jiffy;
+#else
+	lpj = &boot_cpu_data.loops_per_jiffy;
+#endif
+
+	if (!ref_freq) {
+		ref_freq = freq->old;
+		loops_per_jiffy_ref = *lpj;
+		tsc_khz_ref = tsc_khz;
+	}
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+			(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+			(val == CPUFREQ_RESUMECHANGE)) {
+		*lpj = 	cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+			mark_tsc_unstable("cpufreq changes");
+	}
+
+	set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
+
+	return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+	.notifier_call  = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+	cpufreq_register_notifier(&time_cpufreq_notifier_block,
+				CPUFREQ_TRANSITION_NOTIFIER);
+	return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif /* CONFIG_CPU_FREQ */
+
+/* clocksource code */
+
+static struct clocksource clocksource_tsc;
+
+/*
+ * We compare the TSC to the cycle_last value in the clocksource
+ * structure to avoid a nasty time-warp. This can be observed in a
+ * very small window right after one CPU updated cycle_last under
+ * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
+ * is smaller than the cycle_last reference value due to a TSC which
+ * is slighty behind. This delta is nowhere else observable, but in
+ * that case it results in a forward time jump in the range of hours
+ * due to the unsigned delta calculation of the time keeping core
+ * code, which is necessary to support wrapping clocksources like pm
+ * timer.
+ */
+static cycle_t read_tsc(void)
+{
+	cycle_t ret = (cycle_t)get_cycles();
+
+	return ret >= clocksource_tsc.cycle_last ?
+		ret : clocksource_tsc.cycle_last;
+}
+
+#ifdef CONFIG_X86_64
+static cycle_t __vsyscall_fn vread_tsc(void)
+{
+	cycle_t ret = (cycle_t)vget_cycles();
+
+	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
+		ret : __vsyscall_gtod_data.clock.cycle_last;
+}
+#endif
+
+static struct clocksource clocksource_tsc = {
+	.name                   = "tsc",
+	.rating                 = 300,
+	.read                   = read_tsc,
+	.mask                   = CLOCKSOURCE_MASK(64),
+	.shift                  = 22,
+	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
+				  CLOCK_SOURCE_MUST_VERIFY,
+#ifdef CONFIG_X86_64
+	.vread                  = vread_tsc,
+#endif
+};
+
+void mark_tsc_unstable(char *reason)
+{
+	if (!tsc_unstable) {
+		tsc_unstable = 1;
+		printk("Marking TSC unstable due to %s\n", reason);
+		/* Change only the rating, when not registered */
+		if (clocksource_tsc.mult)
+			clocksource_change_rating(&clocksource_tsc, 0);
+		else
+			clocksource_tsc.rating = 0;
+	}
+}
+
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
+{
+	printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+			d->ident);
+	tsc_unstable = 1;
+	return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+	{
+		.callback = dmi_mark_tsc_unstable,
+		.ident = "IBM Thinkpad 380XD",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+			DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+		},
+	},
+	{}
+};
+
+/*
+ * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
+ */
+#ifdef CONFIG_MGEODE_LX
+/* RTSC counts during suspend */
+#define RTSC_SUSP 0x100
+
+static void __init check_geode_tsc_reliable(void)
+{
+	unsigned long res_low, res_high;
+
+	rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+	if (res_low & RTSC_SUSP)
+		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+}
+#else
+static inline void check_geode_tsc_reliable(void) { }
+#endif
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+__cpuinit int unsynchronized_tsc(void)
+{
+	if (!cpu_has_tsc || tsc_unstable)
+		return 1;
+
+#ifdef CONFIG_SMP
+	if (apic_is_clustered_box())
+		return 1;
+#endif
+
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return 0;
+	/*
+	 * Intel systems are normally all synchronized.
+	 * Exceptions must mark TSC as unstable:
+	 */
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+		/* assume multi socket systems are not synchronized: */
+		if (num_possible_cpus() > 1)
+			tsc_unstable = 1;
+	}
+
+	return tsc_unstable;
+}
+
+static void __init init_tsc_clocksource(void)
+{
+	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
+			clocksource_tsc.shift);
+	/* lower the rating if we already know its unstable: */
+	if (check_tsc_unstable()) {
+		clocksource_tsc.rating = 0;
+		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
+	}
+	clocksource_register(&clocksource_tsc);
+}
+
+void __init tsc_init(void)
+{
+	u64 lpj;
+	int cpu;
+
+	if (!cpu_has_tsc)
+		return;
+
+	tsc_khz = calibrate_tsc();
+	cpu_khz = tsc_khz;
+
+	if (!tsc_khz) {
+		mark_tsc_unstable("could not calculate TSC khz");
+		return;
+	}
+
+#ifdef CONFIG_X86_64
+	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
+			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+		cpu_khz = calibrate_cpu();
+#endif
+
+	lpj = ((u64)tsc_khz * 1000);
+	do_div(lpj, HZ);
+	lpj_fine = lpj;
+
+	printk("Detected %lu.%03lu MHz processor.\n",
+			(unsigned long)cpu_khz / 1000,
+			(unsigned long)cpu_khz % 1000);
+
+	/*
+	 * Secondary CPUs do not run through tsc_init(), so set up
+	 * all the scale factors for all CPUs, assuming the same
+	 * speed as the bootup CPU. (cpufreq notifiers will fix this
+	 * up if their speed diverges)
+	 */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
+
+	if (tsc_disabled > 0)
+		return;
+
+	/* now allow native_sched_clock() to use rdtsc */
+	tsc_disabled = 0;
+
+	use_tsc_delay();
+	/* Check and install the TSC clocksource */
+	dmi_check_system(bad_tsc_dmi_table);
+
+	if (unsynchronized_tsc())
+		mark_tsc_unstable("TSCs unsynchronized");
+
+	check_geode_tsc_reliable();
+	init_tsc_clocksource();
+}
+
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
deleted file mode 100644
index 6240922e497..00000000000
--- a/arch/x86/kernel/tsc_32.c
+++ /dev/null
@@ -1,455 +0,0 @@
-#include <linux/sched.h>
-#include <linux/clocksource.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-#include <linux/jiffies.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-#include <linux/percpu.h>
-
-#include <asm/delay.h>
-#include <asm/tsc.h>
-#include <asm/io.h>
-#include <asm/timer.h>
-
-#include "mach_timer.h"
-
-/* native_sched_clock() is called before tsc_init(), so
-   we must start with the TSC soft disabled to prevent
-   erroneous rdtsc usage on !cpu_has_tsc processors */
-static int tsc_disabled = -1;
-
-/*
- * On some systems the TSC frequency does not
- * change with the cpu frequency. So we need
- * an extra value to store the TSC freq
- */
-unsigned int tsc_khz;
-EXPORT_SYMBOL_GPL(tsc_khz);
-
-#ifdef CONFIG_X86_TSC
-static int __init tsc_setup(char *str)
-{
-	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-	       "cannot disable TSC completely.\n");
-	tsc_disabled = 1;
-	return 1;
-}
-#else
-/*
- * disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c
- */
-static int __init tsc_setup(char *str)
-{
-	setup_clear_cpu_cap(X86_FEATURE_TSC);
-	return 1;
-}
-#endif
-
-__setup("notsc", tsc_setup);
-
-/*
- * code to mark and check if the TSC is unstable
- * due to cpufreq or due to unsynced TSCs
- */
-static int tsc_unstable;
-
-int check_tsc_unstable(void)
-{
-	return tsc_unstable;
-}
-EXPORT_SYMBOL_GPL(check_tsc_unstable);
-
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
-{
-	unsigned long long tsc_now, ns_now;
-	unsigned long flags, *scale;
-
-	local_irq_save(flags);
-	sched_clock_idle_sleep_event();
-
-	scale = &per_cpu(cyc2ns, cpu);
-
-	rdtscll(tsc_now);
-	ns_now = __cycles_2_ns(tsc_now);
-
-	if (cpu_khz)
-		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-
-	/*
-	 * Start smoothly with the new frequency:
-	 */
-	sched_clock_idle_wakeup_event(0);
-	local_irq_restore(flags);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long native_sched_clock(void)
-{
-	unsigned long long this_offset;
-
-	/*
-	 * Fall back to jiffies if there's no TSC available:
-	 * ( But note that we still use it if the TSC is marked
-	 *   unstable. We do this because unlike Time Of Day,
-	 *   the scheduler clock tolerates small errors and it's
-	 *   very important for it to be as fast as the platform
-	 *   can achive it. )
-	 */
-	if (unlikely(tsc_disabled))
-		/* No locking but a rare wrong value is not a big deal: */
-		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-
-	/* read the Time Stamp Counter: */
-	rdtscll(this_offset);
-
-	/* return the value in ns */
-	return cycles_2_ns(this_offset);
-}
-
-/* We need to define a real function for sched_clock, to override the
-   weak default version */
-#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
-{
-	return paravirt_sched_clock();
-}
-#else
-unsigned long long sched_clock(void)
-	__attribute__((alias("native_sched_clock")));
-#endif
-
-unsigned long native_calculate_cpu_khz(void)
-{
-	unsigned long long start, end;
-	unsigned long count;
-	u64 delta64 = (u64)ULLONG_MAX;
-	int i;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	/* run 3 times to ensure the cache is warm and to get an accurate reading */
-	for (i = 0; i < 3; i++) {
-		mach_prepare_counter();
-		rdtscll(start);
-		mach_countup(&count);
-		rdtscll(end);
-
-		/*
-		 * Error: ECTCNEVERSET
-		 * The CTC wasn't reliable: we got a hit on the very first read,
-		 * or the CPU was so fast/slow that the quotient wouldn't fit in
-		 * 32 bits..
-		 */
-		if (count <= 1)
-			continue;
-
-		/* cpu freq too slow: */
-		if ((end - start) <= CALIBRATE_TIME_MSEC)
-			continue;
-
-		/*
-		 * We want the minimum time of all runs in case one of them
-		 * is inaccurate due to SMI or other delay
-		 */
-		delta64 = min(delta64, (end - start));
-	}
-
-	/* cpu freq too fast (or every run was bad): */
-	if (delta64 > (1ULL<<32))
-		goto err;
-
-	delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
-	do_div(delta64,CALIBRATE_TIME_MSEC);
-
-	local_irq_restore(flags);
-	return (unsigned long)delta64;
-err:
-	local_irq_restore(flags);
-	return 0;
-}
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
-	unsigned long cpu_khz_old = cpu_khz;
-
-	if (cpu_has_tsc) {
-		cpu_khz = calculate_cpu_khz();
-		tsc_khz = cpu_khz;
-		cpu_data(0).loops_per_jiffy =
-			cpufreq_scale(cpu_data(0).loops_per_jiffy,
-					cpu_khz_old, cpu_khz);
-		return 0;
-	} else
-		return -ENODEV;
-#else
-	return -ENODEV;
-#endif
-}
-
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-#ifdef CONFIG_CPU_FREQ
-
-/*
- * if the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-static unsigned int ref_freq;
-static unsigned long loops_per_jiffy_ref;
-static unsigned long cpu_khz_ref;
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
-{
-	struct cpufreq_freqs *freq = data;
-
-	if (!ref_freq) {
-		if (!freq->old){
-			ref_freq = freq->new;
-			return 0;
-		}
-		ref_freq = freq->old;
-		loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
-		cpu_khz_ref = cpu_khz;
-	}
-
-	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
-	    (val == CPUFREQ_RESUMECHANGE)) {
-		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			cpu_data(freq->cpu).loops_per_jiffy =
-				cpufreq_scale(loops_per_jiffy_ref,
-						ref_freq, freq->new);
-
-		if (cpu_khz) {
-
-			if (num_online_cpus() == 1)
-				cpu_khz = cpufreq_scale(cpu_khz_ref,
-						ref_freq, freq->new);
-			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
-				tsc_khz = cpu_khz;
-				set_cyc2ns_scale(cpu_khz, freq->cpu);
-				/*
-				 * TSC based sched_clock turns
-				 * to junk w/ cpufreq
-				 */
-				mark_tsc_unstable("cpufreq changes");
-			}
-		}
-	}
-
-	return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
-	.notifier_call	= time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
-	return cpufreq_register_notifier(&time_cpufreq_notifier_block,
-					 CPUFREQ_TRANSITION_NOTIFIER);
-}
-core_initcall(cpufreq_tsc);
-
-#endif
-
-/* clock source code */
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp issue. This can be observed in
- * a very small window right after one CPU updated cycle_last under
- * xtime lock and the other CPU reads a TSC value which is smaller
- * than the cycle_last reference value due to a TSC which is slighty
- * behind. This delta is nowhere else observable, but in that case it
- * results in a forward time jump in the range of hours due to the
- * unsigned delta calculation of the time keeping core code, which is
- * necessary to support wrapping clocksources like pm timer.
- */
-static cycle_t read_tsc(void)
-{
-	cycle_t ret;
-
-	rdtscll(ret);
-
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
-	.name			= "tsc",
-	.rating			= 300,
-	.read			= read_tsc,
-	.mask			= CLOCKSOURCE_MASK(64),
-	.mult			= 0, /* to be set */
-	.shift			= 22,
-	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
-				  CLOCK_SOURCE_MUST_VERIFY,
-};
-
-void mark_tsc_unstable(char *reason)
-{
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
-		printk("Marking TSC unstable due to: %s.\n", reason);
-		/* Can be called before registration */
-		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
-			clocksource_tsc.rating = 0;
-	}
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
-	       d->ident);
-	tsc_unstable = 1;
-	return 0;
-}
-
-/* List of systems that have known TSC problems */
-static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
-	{
-	 .callback = dmi_mark_tsc_unstable,
-	 .ident = "IBM Thinkpad 380XD",
-	 .matches = {
-		     DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
-		     DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
-		     },
-	 },
-	 {}
-};
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
-	if (!cpu_has_tsc || tsc_unstable)
-		return 1;
-
-	/* Anything with constant TSC should be synchronized */
-	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
-	/*
-	 * Intel systems are normally all synchronized.
-	 * Exceptions must mark TSC as unstable:
-	 */
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
-		/* assume multi socket systems are not synchronized: */
-		if (num_possible_cpus() > 1)
-			tsc_unstable = 1;
-	}
-	return tsc_unstable;
-}
-
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
-#ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
-#define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
-	unsigned long res_low, res_high;
-
-	rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
-	if (res_low & RTSC_SUSP)
-		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
-#endif
-
-
-void __init tsc_init(void)
-{
-	int cpu;
-	u64 lpj;
-
-	if (!cpu_has_tsc || tsc_disabled > 0)
-		return;
-
-	cpu_khz = calculate_cpu_khz();
-	tsc_khz = cpu_khz;
-
-	if (!cpu_khz) {
-		mark_tsc_unstable("could not calculate TSC khz");
-		return;
-	}
-
-	lpj = ((u64)tsc_khz * 1000);
-	do_div(lpj, HZ);
-	lpj_fine = lpj;
-
-	/* now allow native_sched_clock() to use rdtsc */
-	tsc_disabled = 0;
-
-	printk("Detected %lu.%03lu MHz processor.\n",
-				(unsigned long)cpu_khz / 1000,
-				(unsigned long)cpu_khz % 1000);
-
-	/*
-	 * Secondary CPUs do not run through tsc_init(), so set up
-	 * all the scale factors for all CPUs, assuming the same
-	 * speed as the bootup CPU. (cpufreq notifiers will fix this
-	 * up if their speed diverges)
-	 */
-	for_each_possible_cpu(cpu)
-		set_cyc2ns_scale(cpu_khz, cpu);
-
-	use_tsc_delay();
-
-	/* Check and install the TSC clocksource */
-	dmi_check_system(bad_tsc_dmi_table);
-
-	unsynchronized_tsc();
-	check_geode_tsc_reliable();
-	clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
-						    clocksource_tsc.shift);
-	/* lower the rating if we already know its unstable: */
-	if (check_tsc_unstable()) {
-		clocksource_tsc.rating = 0;
-		clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
-	}
-	clocksource_register(&clocksource_tsc);
-}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
deleted file mode 100644
index 9898fb01edf..00000000000
--- a/arch/x86/kernel/tsc_64.c
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/clocksource.h>
-#include <linux/time.h>
-#include <linux/acpi.h>
-#include <linux/cpufreq.h>
-#include <linux/acpi_pmtmr.h>
-
-#include <asm/hpet.h>
-#include <asm/timex.h>
-#include <asm/timer.h>
-#include <asm/vgtod.h>
-
-static int notsc __initdata = 0;
-
-unsigned int cpu_khz;		/* TSC clocks / usec, not used here */
-EXPORT_SYMBOL(cpu_khz);
-unsigned int tsc_khz;
-EXPORT_SYMBOL(tsc_khz);
-
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
-{
-	unsigned long long tsc_now, ns_now;
-	unsigned long flags, *scale;
-
-	local_irq_save(flags);
-	sched_clock_idle_sleep_event();
-
-	scale = &per_cpu(cyc2ns, cpu);
-
-	rdtscll(tsc_now);
-	ns_now = __cycles_2_ns(tsc_now);
-
-	if (cpu_khz)
-		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-
-	sched_clock_idle_wakeup_event(0);
-	local_irq_restore(flags);
-}
-
-unsigned long long native_sched_clock(void)
-{
-	unsigned long a = 0;
-
-	/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
-	 * which means it is not completely exact and may not be monotonous
-	 * between CPUs. But the errors should be too small to matter for
-	 * scheduling purposes.
-	 */
-
-	rdtscll(a);
-	return cycles_2_ns(a);
-}
-
-/* We need to define a real function for sched_clock, to override the
-   weak default version */
-#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
-{
-	return paravirt_sched_clock();
-}
-#else
-unsigned long long
-sched_clock(void) __attribute__((alias("native_sched_clock")));
-#endif
-
-
-static int tsc_unstable;
-
-int check_tsc_unstable(void)
-{
-	return tsc_unstable;
-}
-EXPORT_SYMBOL_GPL(check_tsc_unstable);
-
-#ifdef CONFIG_CPU_FREQ
-
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
- * changes.
- *
- * RED-PEN: On SMP we assume all CPUs run with the same frequency.  It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
- *
- * Should fix up last_tsc too. Currently gettimeofday in the
- * first tick after the change will be slightly wrong.
- */
-
-static unsigned int  ref_freq;
-static unsigned long loops_per_jiffy_ref;
-static unsigned long tsc_khz_ref;
-
-static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				 void *data)
-{
-	struct cpufreq_freqs *freq = data;
-	unsigned long *lpj, dummy;
-
-	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
-	lpj = &dummy;
-	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-#ifdef CONFIG_SMP
-		lpj = &cpu_data(freq->cpu).loops_per_jiffy;
-#else
-		lpj = &boot_cpu_data.loops_per_jiffy;
-#endif
-
-	if (!ref_freq) {
-		ref_freq = freq->old;
-		loops_per_jiffy_ref = *lpj;
-		tsc_khz_ref = tsc_khz;
-	}
-	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-		(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
-		(val == CPUFREQ_RESUMECHANGE)) {
-		*lpj =
-		cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-
-		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
-		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			mark_tsc_unstable("cpufreq changes");
-	}
-
-	set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
-
-	return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
-	.notifier_call  = time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
-	cpufreq_register_notifier(&time_cpufreq_notifier_block,
-				  CPUFREQ_TRANSITION_NOTIFIER);
-	return 0;
-}
-
-core_initcall(cpufreq_tsc);
-
-#endif
-
-#define MAX_RETRIES	5
-#define SMI_TRESHOLD	50000
-
-/*
- * Read TSC and the reference counters. Take care of SMI disturbance
- */
-static unsigned long __init tsc_read_refs(unsigned long *pm,
-					  unsigned long *hpet)
-{
-	unsigned long t1, t2;
-	int i;
-
-	for (i = 0; i < MAX_RETRIES; i++) {
-		t1 = get_cycles();
-		if (hpet)
-			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
-		else
-			*pm = acpi_pm_read_early();
-		t2 = get_cycles();
-		if ((t2 - t1) < SMI_TRESHOLD)
-			return t2;
-	}
-	return ULONG_MAX;
-}
-
-/**
- * tsc_calibrate - calibrate the tsc on boot
- */
-void __init tsc_calibrate(void)
-{
-	unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
-	int hpet = is_hpet_enabled(), cpu;
-
-	local_irq_save(flags);
-
-	tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
-
-	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
-	outb(0xb0, 0x43);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
-	tr1 = get_cycles();
-	while ((inb(0x61) & 0x20) == 0);
-	tr2 = get_cycles();
-
-	tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
-
-	local_irq_restore(flags);
-
-	/*
-	 * Preset the result with the raw and inaccurate PIT
-	 * calibration value
-	 */
-	tsc_khz = (tr2 - tr1) / 50;
-
-	/* hpet or pmtimer available ? */
-	if (!hpet && !pm1 && !pm2) {
-		printk(KERN_INFO "TSC calibrated against PIT\n");
-		goto out;
-	}
-
-	/* Check, whether the sampling was disturbed by an SMI */
-	if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
-		printk(KERN_WARNING "TSC calibration disturbed by SMI, "
-		       "using PIT calibration result\n");
-		goto out;
-	}
-
-	tsc2 = (tsc2 - tsc1) * 1000000L;
-
-	if (hpet) {
-		printk(KERN_INFO "TSC calibrated against HPET\n");
-		if (hpet2 < hpet1)
-			hpet2 += 0x100000000UL;
-		hpet2 -= hpet1;
-		tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
-	} else {
-		printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
-		if (pm2 < pm1)
-			pm2 += ACPI_PM_OVRRUN;
-		pm2 -= pm1;
-		tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
-	}
-
-	tsc_khz = tsc2 / tsc1;
-
-out:
-	for_each_possible_cpu(cpu)
-		set_cyc2ns_scale(tsc_khz, cpu);
-}
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
-	if (tsc_unstable)
-		return 1;
-
-#ifdef CONFIG_SMP
-	if (apic_is_clustered_box())
-		return 1;
-#endif
-
-	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
-	/* Assume multi socket systems are not synchronized */
-	return num_present_cpus() > 1;
-}
-
-int __init notsc_setup(char *s)
-{
-	notsc = 1;
-	return 1;
-}
-
-__setup("notsc", notsc_setup);
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp. This can be observed in a
- * very small window right after one CPU updated cycle_last under
- * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
- * is smaller than the cycle_last reference value due to a TSC which
- * is slighty behind. This delta is nowhere else observable, but in
- * that case it results in a forward time jump in the range of hours
- * due to the unsigned delta calculation of the time keeping core
- * code, which is necessary to support wrapping clocksources like pm
- * timer.
- */
-static cycle_t read_tsc(void)
-{
-	cycle_t ret = (cycle_t)get_cycles();
-
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
-}
-
-static cycle_t __vsyscall_fn vread_tsc(void)
-{
-	cycle_t ret = (cycle_t)vget_cycles();
-
-	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
-		ret : __vsyscall_gtod_data.clock.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
-	.name			= "tsc",
-	.rating			= 300,
-	.read			= read_tsc,
-	.mask			= CLOCKSOURCE_MASK(64),
-	.shift			= 22,
-	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
-				  CLOCK_SOURCE_MUST_VERIFY,
-	.vread			= vread_tsc,
-};
-
-void mark_tsc_unstable(char *reason)
-{
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
-		printk("Marking TSC unstable due to %s\n", reason);
-		/* Change only the rating, when not registered */
-		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
-			clocksource_tsc.rating = 0;
-	}
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-void __init init_tsc_clocksource(void)
-{
-	if (!notsc) {
-		clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
-							clocksource_tsc.shift);
-		if (check_tsc_unstable())
-			clocksource_tsc.rating = 0;
-
-		clocksource_register(&clocksource_tsc);
-	}
-}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
new file mode 100644
index 00000000000..e94bdb6add1
--- /dev/null
+++ b/arch/x86/kernel/visws_quirks.c
@@ -0,0 +1,709 @@
+/*
+ *  SGI Visual Workstation support and quirks, unmaintained.
+ *
+ *  Split out from setup.c by davej@suse.de
+ *
+ *	Copyright (C) 1999 Bent Hagemark, Ingo Molnar
+ *
+ *  SGI Visual Workstation interrupt controller
+ *
+ *  The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
+ *  which serves as the main interrupt controller in the system.  Non-legacy
+ *  hardware in the system uses this controller directly.  Legacy devices
+ *  are connected to the PIIX4 which in turn has its 8259(s) connected to
+ *  a of the Cobalt APIC entry.
+ *
+ *  09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
+ *
+ *  25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/visws/cobalt.h>
+#include <asm/visws/piix4.h>
+#include <asm/arch_hooks.h>
+#include <asm/fixmap.h>
+#include <asm/reboot.h>
+#include <asm/setup.h>
+#include <asm/e820.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+
+#include <mach_ipi.h>
+
+#include "mach_apic.h"
+
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/irq_vectors.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
+#include <asm/visws/piix4.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+extern int no_broadcast;
+
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/arch_hooks.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
+
+char visws_board_type	= -1;
+char visws_board_rev	= -1;
+
+int is_visws_box(void)
+{
+	return visws_board_type >= 0;
+}
+
+static int __init visws_time_init_quirk(void)
+{
+	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
+
+	/* Set the countdown value */
+	co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
+
+	/* Start the timer */
+	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
+
+	/* Enable (unmask) the timer interrupt */
+	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
+
+	/*
+	 * Zero return means the generic timer setup code will set up
+	 * the standard vector:
+	 */
+	return 0;
+}
+
+static int __init visws_pre_intr_init_quirk(void)
+{
+	init_VISWS_APIC_irqs();
+
+	/*
+	 * We dont want ISA irqs to be set up by the generic code:
+	 */
+	return 1;
+}
+
+/* Quirk for machine specific memory setup. */
+
+#define MB (1024 * 1024)
+
+unsigned long sgivwfb_mem_phys;
+unsigned long sgivwfb_mem_size;
+EXPORT_SYMBOL(sgivwfb_mem_phys);
+EXPORT_SYMBOL(sgivwfb_mem_size);
+
+long long mem_size __initdata = 0;
+
+static char * __init visws_memory_setup_quirk(void)
+{
+	long long gfx_mem_size = 8 * MB;
+
+	mem_size = boot_params.alt_mem_k;
+
+	if (!mem_size) {
+		printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
+		mem_size = 128 * MB;
+	}
+
+	/*
+	 * this hardcodes the graphics memory to 8 MB
+	 * it really should be sized dynamically (or at least
+	 * set as a boot param)
+	 */
+	if (!sgivwfb_mem_size) {
+		printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
+		sgivwfb_mem_size = 8 * MB;
+	}
+
+	/*
+	 * Trim to nearest MB
+	 */
+	sgivwfb_mem_size &= ~((1 << 20) - 1);
+	sgivwfb_mem_phys = mem_size - gfx_mem_size;
+
+	e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+	e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+	e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+
+	return "PROM";
+}
+
+static void visws_machine_emergency_restart(void)
+{
+	/*
+	 * Visual Workstations restart after this
+	 * register is poked on the PIIX4
+	 */
+	outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
+}
+
+static void visws_machine_power_off(void)
+{
+	unsigned short pm_status;
+/*	extern unsigned int pci_bus0; */
+
+	while ((pm_status = inw(PMSTS_PORT)) & 0x100)
+		outw(pm_status, PMSTS_PORT);
+
+	outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
+
+	mdelay(10);
+
+#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
+	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+
+/*	outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
+	outl(PIIX_SPECIAL_STOP, 0xCFC);
+}
+
+static int __init visws_get_smp_config_quirk(unsigned int early)
+{
+	/*
+	 * Prevent MP-table parsing by the generic code:
+	 */
+	return 1;
+}
+
+extern unsigned int __cpuinitdata maxcpus;
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+	int ver, logical_apicid;
+	physid_mask_t apic_cpus;
+
+	if (!(m->mpc_cpuflag & CPU_ENABLED))
+		return;
+
+	logical_apicid = m->mpc_apicid;
+	printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+	       m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
+
+	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
+		boot_cpu_physical_apicid = m->mpc_apicid;
+
+	ver = m->mpc_apicver;
+	if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
+		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+			m->mpc_apicid, MAX_APICS);
+		return;
+	}
+
+	apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
+	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
+	/*
+	 * Validate version
+	 */
+	if (ver == 0x0) {
+		printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
+			"fixing up to 0x10. (tell your hw vendor)\n",
+			m->mpc_apicid);
+		ver = 0x10;
+	}
+	apic_version[m->mpc_apicid] = ver;
+}
+
+int __init visws_find_smp_config_quirk(unsigned int reserve)
+{
+	struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
+	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
+
+	if (ncpus > CO_CPU_MAX) {
+		printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
+			ncpus, mp);
+
+		ncpus = CO_CPU_MAX;
+	}
+
+	if (ncpus > maxcpus)
+		ncpus = maxcpus;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	smp_found_config = 1;
+#endif
+	while (ncpus--)
+		MP_processor_info(mp++);
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	return 1;
+}
+
+extern int visws_trap_init_quirk(void);
+
+void __init visws_early_detect(void)
+{
+	int raw;
+
+	visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
+							 >> PIIX_GPI_BD_SHIFT;
+
+	if (visws_board_type < 0)
+		return;
+
+	/*
+	 * Install special quirks for timer, interrupt and memory setup:
+	 */
+	arch_time_init_quirk		= visws_time_init_quirk;
+	arch_pre_intr_init_quirk	= visws_pre_intr_init_quirk;
+	arch_memory_setup_quirk		= visws_memory_setup_quirk;
+
+	/*
+	 * Fall back to generic behavior for traps:
+	 */
+	arch_intr_init_quirk		= NULL;
+	arch_trap_init_quirk		= visws_trap_init_quirk;
+
+	/*
+	 * Install reboot quirks:
+	 */
+	pm_power_off			= visws_machine_power_off;
+	machine_ops.emergency_restart	= visws_machine_emergency_restart;
+
+	/*
+	 * Do not use broadcast IPIs:
+	 */
+	no_broadcast = 0;
+
+	/*
+	 * Override generic MP-table parsing:
+	 */
+	mach_get_smp_config_quirk	= visws_get_smp_config_quirk;
+	mach_find_smp_config_quirk	= visws_find_smp_config_quirk;
+
+#ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Turn off IO-APIC detection and initialization:
+	 */
+	skip_ioapic_setup		= 1;
+#endif
+
+	/*
+	 * Get Board rev.
+	 * First, we have to initialize the 307 part to allow us access
+	 * to the GPIO registers.  Let's map them at 0x0fc0 which is right
+	 * after the PIIX4 PM section.
+	 */
+	outb_p(SIO_DEV_SEL, SIO_INDEX);
+	outb_p(SIO_GP_DEV, SIO_DATA);	/* Talk to GPIO regs. */
+
+	outb_p(SIO_DEV_MSB, SIO_INDEX);
+	outb_p(SIO_GP_MSB, SIO_DATA);	/* MSB of GPIO base address */
+
+	outb_p(SIO_DEV_LSB, SIO_INDEX);
+	outb_p(SIO_GP_LSB, SIO_DATA);	/* LSB of GPIO base address */
+
+	outb_p(SIO_DEV_ENB, SIO_INDEX);
+	outb_p(1, SIO_DATA);		/* Enable GPIO registers. */
+
+	/*
+	 * Now, we have to map the power management section to write
+	 * a bit which enables access to the GPIO registers.
+	 * What lunatic came up with this shit?
+	 */
+	outb_p(SIO_DEV_SEL, SIO_INDEX);
+	outb_p(SIO_PM_DEV, SIO_DATA);	/* Talk to GPIO regs. */
+
+	outb_p(SIO_DEV_MSB, SIO_INDEX);
+	outb_p(SIO_PM_MSB, SIO_DATA);	/* MSB of PM base address */
+
+	outb_p(SIO_DEV_LSB, SIO_INDEX);
+	outb_p(SIO_PM_LSB, SIO_DATA);	/* LSB of PM base address */
+
+	outb_p(SIO_DEV_ENB, SIO_INDEX);
+	outb_p(1, SIO_DATA);		/* Enable PM registers. */
+
+	/*
+	 * Now, write the PM register which enables the GPIO registers.
+	 */
+	outb_p(SIO_PM_FER2, SIO_PM_INDEX);
+	outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
+
+	/*
+	 * Now, initialize the GPIO registers.
+	 * We want them all to be inputs which is the
+	 * power on default, so let's leave them alone.
+	 * So, let's just read the board rev!
+	 */
+	raw = inb_p(SIO_GP_DATA1);
+	raw &= 0x7f;	/* 7 bits of valid board revision ID. */
+
+	if (visws_board_type == VISWS_320) {
+		if (raw < 0x6) {
+			visws_board_rev = 4;
+		} else if (raw < 0xc) {
+			visws_board_rev = 5;
+		} else {
+			visws_board_rev = 6;
+		}
+	} else if (visws_board_type == VISWS_540) {
+			visws_board_rev = 2;
+		} else {
+			visws_board_rev = raw;
+		}
+
+	printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
+	       (visws_board_type == VISWS_320 ? "320" :
+	       (visws_board_type == VISWS_540 ? "540" :
+		"unknown")), visws_board_rev);
+}
+
+#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
+#define BCD (LI_INTB | LI_INTC | LI_INTD)
+#define ALLDEVS (A01234 | BCD)
+
+static __init void lithium_init(void)
+{
+	set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
+	set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
+
+	if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+	    (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
+/*		panic("This machine is not SGI Visual Workstation 320/540"); */
+	}
+
+	if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+	    (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
+/*		panic("This machine is not SGI Visual Workstation 320/540"); */
+	}
+
+	li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
+	li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
+}
+
+static __init void cobalt_init(void)
+{
+	/*
+	 * On normal SMP PC this is used only with SMP, but we have to
+	 * use it and set it up here to start the Cobalt clock
+	 */
+	set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
+	setup_local_APIC();
+	printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+		(unsigned int)apic_read(APIC_LVR),
+		(unsigned int)apic_read(APIC_ID));
+
+	set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
+	set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
+	printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
+		co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
+
+	/* Enable Cobalt APIC being careful to NOT change the ID! */
+	co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
+
+	printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
+		co_apic_read(CO_APIC_ID));
+}
+
+int __init visws_trap_init_quirk(void)
+{
+	lithium_init();
+	cobalt_init();
+
+	return 1;
+}
+
+/*
+ * IRQ controller / APIC support:
+ */
+
+static DEFINE_SPINLOCK(cobalt_lock);
+
+/*
+ * Set the given Cobalt APIC Redirection Table entry to point
+ * to the given IDT vector/index.
+ */
+static inline void co_apic_set(int entry, int irq)
+{
+	co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
+	co_apic_write(CO_APIC_HI(entry), 0);
+}
+
+/*
+ * Cobalt (IO)-APIC functions to handle PCI devices.
+ */
+static inline int co_apic_ide0_hack(void)
+{
+	extern char visws_board_type;
+	extern char visws_board_rev;
+
+	if (visws_board_type == VISWS_320 && visws_board_rev == 5)
+		return 5;
+	return CO_APIC_IDE0;
+}
+
+static int is_co_apic(unsigned int irq)
+{
+	if (IS_CO_APIC(irq))
+		return CO_APIC(irq);
+
+	switch (irq) {
+		case 0: return CO_APIC_CPU;
+		case CO_IRQ_IDE0: return co_apic_ide0_hack();
+		case CO_IRQ_IDE1: return CO_APIC_IDE1;
+		default: return -1;
+	}
+}
+
+
+/*
+ * This is the SGI Cobalt (IO-)APIC:
+ */
+
+static void enable_cobalt_irq(unsigned int irq)
+{
+	co_apic_set(is_co_apic(irq), irq);
+}
+
+static void disable_cobalt_irq(unsigned int irq)
+{
+	int entry = is_co_apic(irq);
+
+	co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
+	co_apic_read(CO_APIC_LO(entry));
+}
+
+/*
+ * "irq" really just serves to identify the device.  Here is where we
+ * map this to the Cobalt APIC entry where it's physically wired.
+ * This is called via request_irq -> setup_irq -> irq_desc->startup()
+ */
+static unsigned int startup_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+	enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+	return 0;
+}
+
+static void ack_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	disable_cobalt_irq(irq);
+	apic_write(APIC_EOI, APIC_EIO_ACK);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static void end_cobalt_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+		enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct irq_chip cobalt_irq_type = {
+	.typename =	"Cobalt-APIC",
+	.startup =	startup_cobalt_irq,
+	.shutdown =	disable_cobalt_irq,
+	.enable =	enable_cobalt_irq,
+	.disable =	disable_cobalt_irq,
+	.ack =		ack_cobalt_irq,
+	.end =		end_cobalt_irq,
+};
+
+
+/*
+ * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
+ * -- not the manner expected by the code in i8259.c.
+ *
+ * there is a 'master' physical interrupt source that gets sent to
+ * the CPU. But in the chipset there are various 'virtual' interrupts
+ * waiting to be handled. We represent this to Linux through a 'master'
+ * interrupt controller type, and through a special virtual interrupt-
+ * controller. Device drivers only see the virtual interrupt sources.
+ */
+static unsigned int startup_piix4_master_irq(unsigned int irq)
+{
+	init_8259A(0);
+
+	return startup_cobalt_irq(irq);
+}
+
+static void end_piix4_master_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cobalt_lock, flags);
+	enable_cobalt_irq(irq);
+	spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct irq_chip piix4_master_irq_type = {
+	.typename =	"PIIX4-master",
+	.startup =	startup_piix4_master_irq,
+	.ack =		ack_cobalt_irq,
+	.end =		end_piix4_master_irq,
+};
+
+
+static struct irq_chip piix4_virtual_irq_type = {
+	.typename =	"PIIX4-virtual",
+	.shutdown =	disable_8259A_irq,
+	.enable =	enable_8259A_irq,
+	.disable =	disable_8259A_irq,
+};
+
+
+/*
+ * PIIX4-8259 master/virtual functions to handle interrupt requests
+ * from legacy devices: floppy, parallel, serial, rtc.
+ *
+ * None of these get Cobalt APIC entries, neither do they have IDT
+ * entries. These interrupts are purely virtual and distributed from
+ * the 'master' interrupt source: CO_IRQ_8259.
+ *
+ * When the 8259 interrupts its handler figures out which of these
+ * devices is interrupting and dispatches to its handler.
+ *
+ * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
+ * enable_irq gets the right irq. This 'master' irq is never directly
+ * manipulated by any driver.
+ */
+static irqreturn_t piix4_master_intr(int irq, void *dev_id)
+{
+	int realirq;
+	irq_desc_t *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	/* Find out what's interrupting in the PIIX4 master 8259 */
+	outb(0x0c, 0x20);		/* OCW3 Poll command */
+	realirq = inb(0x20);
+
+	/*
+	 * Bit 7 == 0 means invalid/spurious
+	 */
+	if (unlikely(!(realirq & 0x80)))
+		goto out_unlock;
+
+	realirq &= 7;
+
+	if (unlikely(realirq == 2)) {
+		outb(0x0c, 0xa0);
+		realirq = inb(0xa0);
+
+		if (unlikely(!(realirq & 0x80)))
+			goto out_unlock;
+
+		realirq = (realirq & 7) + 8;
+	}
+
+	/* mask and ack interrupt */
+	cached_irq_mask |= 1 << realirq;
+	if (unlikely(realirq > 7)) {
+		inb(0xa1);
+		outb(cached_slave_mask, 0xa1);
+		outb(0x60 + (realirq & 7), 0xa0);
+		outb(0x60 + 2, 0x20);
+	} else {
+		inb(0x21);
+		outb(cached_master_mask, 0x21);
+		outb(0x60 + realirq, 0x20);
+	}
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	desc = irq_desc + realirq;
+
+	/*
+	 * handle this 'virtual interrupt' as a Cobalt one now.
+	 */
+	kstat_cpu(smp_processor_id()).irqs[realirq]++;
+
+	if (likely(desc->action != NULL))
+		handle_IRQ_event(realirq, desc->action);
+
+	if (!(desc->status & IRQ_DISABLED))
+		enable_8259A_irq(realirq);
+
+	return IRQ_HANDLED;
+
+out_unlock:
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+	return IRQ_NONE;
+}
+
+static struct irqaction master_action = {
+	.handler =	piix4_master_intr,
+	.name =		"PIIX4-8259",
+};
+
+static struct irqaction cascade_action = {
+	.handler = 	no_action,
+	.name =		"cascade",
+};
+
+
+void init_VISWS_APIC_irqs(void)
+{
+	int i;
+
+	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
+		irq_desc[i].status = IRQ_DISABLED;
+		irq_desc[i].action = 0;
+		irq_desc[i].depth = 1;
+
+		if (i == 0) {
+			irq_desc[i].chip = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_IDE0) {
+			irq_desc[i].chip = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_IDE1) {
+			irq_desc[i].chip = &cobalt_irq_type;
+		}
+		else if (i == CO_IRQ_8259) {
+			irq_desc[i].chip = &piix4_master_irq_type;
+		}
+		else if (i < CO_IRQ_APIC0) {
+			irq_desc[i].chip = &piix4_virtual_irq_type;
+		}
+		else if (IS_CO_APIC(i)) {
+			irq_desc[i].chip = &cobalt_irq_type;
+		}
+	}
+
+	setup_irq(CO_IRQ_8259, &master_action);
+	setup_irq(2, &cascade_action);
+}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 946bf13b44a..b15346092b7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void)
 		pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
 #endif
 		pv_time_ops.sched_clock = vmi_sched_clock;
- 		pv_time_ops.get_cpu_khz = vmi_cpu_khz;
+		pv_time_ops.get_tsc_khz = vmi_tsc_khz;
 
 		/* We have true wallclock functions; disable CMOS clock sync */
 		no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index ba7d19e102b..6953859fe28 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void)
 	return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
 }
 
-/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
-unsigned long vmi_cpu_khz(void)
+/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
+unsigned long vmi_tsc_khz(void)
 {
 	unsigned long long khz;
 	khz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2674f579627..cdb2363697d 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -49,16 +49,14 @@ SECTIONS
   	_etext = .;			/* End of text section */
   } :text = 0x9090
 
+  NOTES :text :note
+
   . = ALIGN(16);		/* Exception table */
   __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
   	__start___ex_table = .;
 	 *(__ex_table)
   	__stop___ex_table = .;
-  }
-
-  NOTES :text :note
-
-  BUG_TABLE :text
+  } :text = 0x9090
 
   RODATA
 
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fd246e22fe6..63e5c1a22e8 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,7 +19,7 @@ PHDRS {
 	data PT_LOAD FLAGS(7);	/* RWE */
 	user PT_LOAD FLAGS(7);	/* RWE */
 	data.init PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(4);	/* R__ */
+	note PT_NOTE FLAGS(0);	/* ___ */
 }
 SECTIONS
 {
@@ -40,16 +40,14 @@ SECTIONS
 	_etext = .;		/* End of text section */
   } :text = 0x9090
 
+  NOTES :text :note
+
   . = ALIGN(16);		/* Exception table */
   __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
   	__start___ex_table = .;
 	 *(__ex_table)
   	__stop___ex_table = .;
-  }
-
-  NOTES :text :note
-
-  BUG_TABLE :text
+  } :text = 0x9090
 
   RODATA
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd84c3e..0b8b6690a86 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
 #include <asm/topology.h>
 #include <asm/vgtod.h>
 
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+		__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
 #define __syscall_clobber "r11","cx","memory"
 
 /*
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 {
 	long cpu = (long)arg;
 	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
 	return NOTIFY_DONE;
 }
 
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void)
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2);
 #endif
-	on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+	on_each_cpu(cpu_vsyscall_init, NULL, 1);
 	hotcpu_notifier(cpu_vsyscall_notifier, 0);
 	return 0;
 }
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a82689..b545f371b5f 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
    All C exports should go in the respective C files. */
 
 #include <linux/module.h>
-#include <net/checksum.h>
 #include <linux/smp.h>
 
+#include <net/checksum.h>
+
 #include <asm/processor.h>
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/uaccess.h>
 #include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
 
 EXPORT_SYMBOL(kernel_thread);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 540e9517907..10ce6ee4c49 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -335,7 +335,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
 {
 	if (vmx->vcpu.cpu == -1)
 		return;
-	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
+	smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
 	vmx->launched = 0;
 }
 
@@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (vmx->vmcs) {
-		on_each_cpu(__vcpu_clear, vmx, 0, 1);
+		on_each_cpu(__vcpu_clear, vmx, 1);
 		free_vmcs(vmx->vmcs);
 		vmx->vmcs = NULL;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63a77caa59f..0faa2546b1c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4044,6 +4044,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	 * So need not to call smp_call_function_single() in that case.
 	 */
 	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();
 }
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 964dfa36d36..c70e12b1a63 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -3,7 +3,7 @@ config LGUEST_GUEST
 	select PARAVIRT
 	depends on X86_32
 	depends on !X86_PAE
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !X86_VOYAGER
 	select VIRTIO
 	select VIRTIO_RING
 	select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e72cf0793fb..50dad44fb54 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void)
  * what speed it runs at, or 0 if it's unusable as a reliable clock source.
  * This matches what we want here: if we return 0 from this function, the x86
  * TSC clock will give up and not register itself. */
-static unsigned long lguest_cpu_khz(void)
+static unsigned long lguest_tsc_khz(void)
 {
 	return lguest_data.tsc_khz;
 }
@@ -998,7 +998,7 @@ __init void lguest_init(void)
 	/* time operations */
 	pv_time_ops.get_wallclock = lguest_get_wallclock;
 	pv_time_ops.time_init = lguest_time_init;
-	pv_time_ops.get_cpu_khz = lguest_cpu_khz;
+	pv_time_ops.get_tsc_khz = lguest_tsc_khz;
 
 	/* Now is a good time to look at the implementations of these functions
 	 * before returning to the rest of lguest_init(). */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a88..aa3fa411942 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -4,8 +4,9 @@
 
 obj-$(CONFIG_SMP) := msr-on-cpu.o
 
-lib-y := delay_$(BITS).o
-lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
+lib-y := delay.o
+lib-y += thunk_$(BITS).o
+lib-y += usercopy_$(BITS).o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 
 ifeq ($(CONFIG_X86_32),y)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ee1c3f63515..dfdf428975c 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -1,8 +1,10 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
+/*
+ * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Subject to the GNU Public License v2.
- * 
- * Functions to copy from and to user space.		
- */		 
+ *
+ * Functions to copy from and to user space.
+ */
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
@@ -20,60 +22,88 @@
 	.long \orig-1f	/* by default jump to orig */
 1:
 	.section .altinstr_replacement,"ax"
-2:	.byte 0xe9	             /* near jump with 32bit immediate */
+2:	.byte 0xe9			/* near jump with 32bit immediate */
 	.long \alt-1b /* offset */   /* or alternatively to alt */
 	.previous
 	.section .altinstructions,"a"
 	.align 8
 	.quad  0b
 	.quad  2b
-	.byte  \feature		     /* when feature is set */
+	.byte  \feature			/* when feature is set */
 	.byte  5
 	.byte  5
 	.previous
 	.endm
 
-/* Standard copy_to_user with segment limit checking */		
+	.macro ALIGN_DESTINATION
+#ifdef FIX_ALIGNMENT
+	/* check for bad alignment of destination */
+	movl %edi,%ecx
+	andl $7,%ecx
+	jz 102f				/* already aligned */
+	subl $8,%ecx
+	negl %ecx
+	subl %ecx,%edx
+100:	movb (%rsi),%al
+101:	movb %al,(%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz 100b
+102:
+	.section .fixup,"ax"
+103:	addl %r8d,%edx			/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
+
+	.section __ex_table,"a"
+	.align 8
+	.quad 100b,103b
+	.quad 101b,103b
+	.previous
+#endif
+	.endm
+
+/* Standard copy_to_user with segment limit checking */
 ENTRY(copy_to_user)
 	CFI_STARTPROC
 	GET_THREAD_INFO(%rax)
 	movq %rdi,%rcx
 	addq %rdx,%rcx
-	jc  bad_to_user
-	cmpq threadinfo_addr_limit(%rax),%rcx
+	jc bad_to_user
+	cmpq TI_addr_limit(%rax),%rcx
 	jae bad_to_user
-	xorl %eax,%eax	/* clear zero flag */
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
 
-ENTRY(copy_user_generic)
+/* Standard copy_from_user with segment limit checking */
+ENTRY(copy_from_user)
 	CFI_STARTPROC
-	movl $1,%ecx	/* set zero flag */
+	GET_THREAD_INFO(%rax)
+	movq %rsi,%rcx
+	addq %rdx,%rcx
+	jc bad_from_user
+	cmpq TI_addr_limit(%rax),%rcx
+	jae bad_from_user
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
+ENDPROC(copy_from_user)
 
-ENTRY(__copy_from_user_inatomic)
+ENTRY(copy_user_generic)
 	CFI_STARTPROC
-	xorl %ecx,%ecx	/* clear zero flag */
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
+ENDPROC(copy_user_generic)
 
-/* Standard copy_from_user with segment limit checking */	
-ENTRY(copy_from_user)
+ENTRY(__copy_from_user_inatomic)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%rax)
-	movq %rsi,%rcx
-	addq %rdx,%rcx
-	jc  bad_from_user
-	cmpq threadinfo_addr_limit(%rax),%rcx
-	jae  bad_from_user
-	movl $1,%ecx	/* set zero flag */
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
 	CFI_ENDPROC
-ENDPROC(copy_from_user)
-	
+ENDPROC(__copy_from_user_inatomic)
+
 	.section .fixup,"ax"
 	/* must zero dest */
+ENTRY(bad_from_user)
 bad_from_user:
 	CFI_STARTPROC
 	movl %edx,%ecx
@@ -81,271 +111,158 @@ bad_from_user:
 	rep
 	stosb
 bad_to_user:
-	movl	%edx,%eax
+	movl %edx,%eax
 	ret
 	CFI_ENDPROC
-END(bad_from_user)
+ENDPROC(bad_from_user)
 	.previous
-	
-		
+
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
- * 	
- * Input:	
+ * This version is for CPUs like P4 that don't have efficient micro
+ * code for rep movsq
+ *
+ * Input:
  * rdi destination
  * rsi source
  * rdx count
- * ecx zero flag -- if true zero destination on error
  *
- * Output:		
- * eax uncopied bytes or 0 if successful.
+ * Output:
+ * eax uncopied bytes or 0 if successfull.
  */
 ENTRY(copy_user_generic_unrolled)
 	CFI_STARTPROC
-	pushq %rbx
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rbx, 0
-	pushq %rcx
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rcx, 0
-	xorl %eax,%eax		/*zero for the exception handler */
-
-#ifdef FIX_ALIGNMENT
-	/* check for bad alignment of destination */
-	movl %edi,%ecx
-	andl $7,%ecx
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
-	movq %rdx,%rcx
-
-	movl $64,%ebx
-	shrq $6,%rdx
-	decq %rdx
-	js   .Lhandle_tail
-
-	.p2align 4
-.Lloop:
-.Ls1:	movq (%rsi),%r11
-.Ls2:	movq 1*8(%rsi),%r8
-.Ls3:	movq 2*8(%rsi),%r9
-.Ls4:	movq 3*8(%rsi),%r10
-.Ld1:	movq %r11,(%rdi)
-.Ld2:	movq %r8,1*8(%rdi)
-.Ld3:	movq %r9,2*8(%rdi)
-.Ld4:	movq %r10,3*8(%rdi)
-
-.Ls5:	movq 4*8(%rsi),%r11
-.Ls6:	movq 5*8(%rsi),%r8
-.Ls7:	movq 6*8(%rsi),%r9
-.Ls8:	movq 7*8(%rsi),%r10
-.Ld5:	movq %r11,4*8(%rdi)
-.Ld6:	movq %r8,5*8(%rdi)
-.Ld7:	movq %r9,6*8(%rdi)
-.Ld8:	movq %r10,7*8(%rdi)
-
-	decq %rdx
-
+	cmpl $8,%edx
+	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	ALIGN_DESTINATION
+	movl %edx,%ecx
+	andl $63,%edx
+	shrl $6,%ecx
+	jz 17f
+1:	movq (%rsi),%r8
+2:	movq 1*8(%rsi),%r9
+3:	movq 2*8(%rsi),%r10
+4:	movq 3*8(%rsi),%r11
+5:	movq %r8,(%rdi)
+6:	movq %r9,1*8(%rdi)
+7:	movq %r10,2*8(%rdi)
+8:	movq %r11,3*8(%rdi)
+9:	movq 4*8(%rsi),%r8
+10:	movq 5*8(%rsi),%r9
+11:	movq 6*8(%rsi),%r10
+12:	movq 7*8(%rsi),%r11
+13:	movq %r8,4*8(%rdi)
+14:	movq %r9,5*8(%rdi)
+15:	movq %r10,6*8(%rdi)
+16:	movq %r11,7*8(%rdi)
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
-
-	jns  .Lloop
-
-	.p2align 4
-.Lhandle_tail:
-	movl %ecx,%edx
-	andl $63,%ecx
-	shrl $3,%ecx
-	jz   .Lhandle_7
-	movl $8,%ebx
-	.p2align 4
-.Lloop_8:
-.Ls9:	movq (%rsi),%r8
-.Ld9:	movq %r8,(%rdi)
 	decl %ecx
-	leaq 8(%rdi),%rdi
+	jnz 1b
+17:	movl %edx,%ecx
+	andl $7,%edx
+	shrl $3,%ecx
+	jz 20f
+18:	movq (%rsi),%r8
+19:	movq %r8,(%rdi)
 	leaq 8(%rsi),%rsi
-	jnz .Lloop_8
-
-.Lhandle_7:
+	leaq 8(%rdi),%rdi
+	decl %ecx
+	jnz 18b
+20:	andl %edx,%edx
+	jz 23f
 	movl %edx,%ecx
-	andl $7,%ecx
-	jz   .Lende
-	.p2align 4
-.Lloop_1:
-.Ls10:	movb (%rsi),%bl
-.Ld10:	movb %bl,(%rdi)
-	incq %rdi
+21:	movb (%rsi),%al
+22:	movb %al,(%rdi)
 	incq %rsi
+	incq %rdi
 	decl %ecx
-	jnz .Lloop_1
-
-	CFI_REMEMBER_STATE
-.Lende:
-	popq %rcx
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_RESTORE rcx
-	popq %rbx
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_RESTORE rbx
+	jnz 21b
+23:	xor %eax,%eax
 	ret
-	CFI_RESTORE_STATE
 
-#ifdef FIX_ALIGNMENT
-	/* align destination */
-	.p2align 4
-.Lbad_alignment:
-	movl $8,%r9d
-	subl %ecx,%r9d
-	movl %r9d,%ecx
-	cmpq %r9,%rdx
-	jz   .Lhandle_7
-	js   .Lhandle_7
-.Lalign_1:
-.Ls11:	movb (%rsi),%bl
-.Ld11:	movb %bl,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .Lalign_1
-	subq %r9,%rdx
-	jmp .Lafter_bad_alignment
-#endif
+	.section .fixup,"ax"
+30:	shll $6,%ecx
+	addl %ecx,%edx
+	jmp 60f
+40:	lea (%rdx,%rcx,8),%rdx
+	jmp 60f
+50:	movl %ecx,%edx
+60:	jmp copy_user_handle_tail /* ecx is zerorest also */
+	.previous
 
-	/* table sorted by exception address */
 	.section __ex_table,"a"
 	.align 8
-	.quad .Ls1,.Ls1e	/* Ls1-Ls4 have copied zero bytes */
-	.quad .Ls2,.Ls1e
-	.quad .Ls3,.Ls1e
-	.quad .Ls4,.Ls1e
-	.quad .Ld1,.Ls1e	/* Ld1-Ld4 have copied 0-24 bytes */
-	.quad .Ld2,.Ls2e
-	.quad .Ld3,.Ls3e
-	.quad .Ld4,.Ls4e
-	.quad .Ls5,.Ls5e	/* Ls5-Ls8 have copied 32 bytes */
-	.quad .Ls6,.Ls5e
-	.quad .Ls7,.Ls5e
-	.quad .Ls8,.Ls5e
-	.quad .Ld5,.Ls5e	/* Ld5-Ld8 have copied 32-56 bytes */
-	.quad .Ld6,.Ls6e
-	.quad .Ld7,.Ls7e
-	.quad .Ld8,.Ls8e
-	.quad .Ls9,.Le_quad
-	.quad .Ld9,.Le_quad
-	.quad .Ls10,.Le_byte
-	.quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
-	.quad .Ls11,.Lzero_rest
-	.quad .Ld11,.Lzero_rest
-#endif
-	.quad .Le5,.Le_zero
+	.quad 1b,30b
+	.quad 2b,30b
+	.quad 3b,30b
+	.quad 4b,30b
+	.quad 5b,30b
+	.quad 6b,30b
+	.quad 7b,30b
+	.quad 8b,30b
+	.quad 9b,30b
+	.quad 10b,30b
+	.quad 11b,30b
+	.quad 12b,30b
+	.quad 13b,30b
+	.quad 14b,30b
+	.quad 15b,30b
+	.quad 16b,30b
+	.quad 18b,40b
+	.quad 19b,40b
+	.quad 21b,50b
+	.quad 22b,50b
 	.previous
-
-	/* eax: zero, ebx: 64 */
-.Ls1e: 	addl $8,%eax		/* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
-.Ls2e: 	addl $8,%eax
-.Ls3e: 	addl $8,%eax
-.Ls4e: 	addl $8,%eax
-.Ls5e: 	addl $8,%eax
-.Ls6e: 	addl $8,%eax
-.Ls7e: 	addl $8,%eax
-.Ls8e: 	addl $8,%eax
-	addq %rbx,%rdi	/* +64 */
-	subq %rax,%rdi  /* correct destination with computed offset */
-
-	shlq $6,%rdx	/* loop counter * 64 (stride length) */
-	addq %rax,%rdx	/* add offset to loopcnt */
-	andl $63,%ecx	/* remaining bytes */
-	addq %rcx,%rdx	/* add them */
-	jmp .Lzero_rest
-
-	/* exception on quad word loop in tail handling */
-	/* ecx:	loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
-	shll $3,%ecx
-	andl $7,%edx
-	addl %ecx,%edx
-	/* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
-	cmpl $0,(%rsp)
-	jz   .Le_zero
-	movq %rdx,%rcx
-.Le_byte:
-	xorl %eax,%eax
-.Le5:	rep
-	stosb
-	/* when there is another exception while zeroing the rest just return */
-.Le_zero:
-	movq %rdx,%rax
-	jmp .Lende
 	CFI_ENDPROC
-ENDPROC(copy_user_generic)
+ENDPROC(copy_user_generic_unrolled)
 
-
-	/* Some CPUs run faster using the string copy instructions.
-	   This is also a lot simpler. Use them when possible.
-	   Patch in jmps to this code instead of copying it fully
-	   to avoid unwanted aliasing in the exception tables. */
-
- /* rdi	destination
-  * rsi source
-  * rdx count
-  * ecx zero flag
-  *
-  * Output:
-  * eax uncopied bytes or 0 if successfull.
-  *
-  * Only 4GB of copy is supported. This shouldn't be a problem
-  * because the kernel normally only writes from/to page sized chunks
-  * even if user space passed a longer buffer.
-  * And more would be dangerous because both Intel and AMD have
-  * errata with rep movsq > 4GB. If someone feels the need to fix
-  * this please consider this.
-  */
+/* Some CPUs run faster using the string copy instructions.
+ * This is also a lot simpler. Use them when possible.
+ *
+ * Only 4GB of copy is supported. This shouldn't be a problem
+ * because the kernel normally only writes from/to page sized chunks
+ * even if user space passed a longer buffer.
+ * And more would be dangerous because both Intel and AMD have
+ * errata with rep movsq > 4GB. If someone feels the need to fix
+ * this please consider this.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
 ENTRY(copy_user_generic_string)
 	CFI_STARTPROC
-	movl %ecx,%r8d		/* save zero flag */
+	andl %edx,%edx
+	jz 4f
+	cmpl $8,%edx
+	jb 2f		/* less than 8 bytes, go to byte copy loop */
+	ALIGN_DESTINATION
 	movl %edx,%ecx
 	shrl $3,%ecx
-	andl $7,%edx	
-	jz   10f
-1:	rep 
-	movsq 
-	movl %edx,%ecx
-2:	rep
-	movsb
-9:	movl %ecx,%eax
-	ret
-
-	/* multiple of 8 byte */
-10:	rep
+	andl $7,%edx
+1:	rep
 	movsq
-	xor %eax,%eax
+2:	movl %edx,%ecx
+3:	rep
+	movsb
+4:	xorl %eax,%eax
 	ret
 
-	/* exception handling */
-3:      lea (%rdx,%rcx,8),%rax	/* exception on quad loop */
-	jmp 6f
-5:	movl %ecx,%eax		/* exception on byte loop */
-	/* eax: left over bytes */
-6:	testl %r8d,%r8d		/* zero flag set? */
-	jz 7f
-	movl %eax,%ecx		/* initialize x86 loop counter */
-	push %rax
-	xorl %eax,%eax
-8:	rep
-	stosb 			/* zero the rest */
-11:	pop %rax
-7:	ret
-	CFI_ENDPROC
-END(copy_user_generic_c)
+	.section .fixup,"ax"
+11:	lea (%rdx,%rcx,8),%rcx
+12:	movl %ecx,%edx		/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
 
 	.section __ex_table,"a"
-	.quad 1b,3b
-	.quad 2b,5b
-	.quad 8b,11b
-	.quad 10b,3b
+	.align 8
+	.quad 1b,11b
+	.quad 3b,12b
 	.previous
+	CFI_ENDPROC
+ENDPROC(copy_user_generic_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 9d3d1ab8376..40e0e309d27 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -1,4 +1,6 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
+/*
+ * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Subject to the GNU Public License v2.
  *
  * Functions to copy from and to user space.
@@ -12,204 +14,125 @@
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- * rcx zero flag	when 1 zero on exception
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(__copy_user_nocache)
-	CFI_STARTPROC
-	pushq %rbx
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rbx, 0
-	pushq %rcx		/* save zero flag */
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rcx, 0
-
-	xorl %eax,%eax		/* zero for the exception handler */
 
+	.macro ALIGN_DESTINATION
 #ifdef FIX_ALIGNMENT
 	/* check for bad alignment of destination */
 	movl %edi,%ecx
 	andl $7,%ecx
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
-	movq %rdx,%rcx
-
-	movl $64,%ebx
-	shrq $6,%rdx
-	decq %rdx
-	js   .Lhandle_tail
-
-	.p2align 4
-.Lloop:
-.Ls1:	movq (%rsi),%r11
-.Ls2:	movq 1*8(%rsi),%r8
-.Ls3:	movq 2*8(%rsi),%r9
-.Ls4:	movq 3*8(%rsi),%r10
-.Ld1:	movnti %r11,(%rdi)
-.Ld2:	movnti %r8,1*8(%rdi)
-.Ld3:	movnti %r9,2*8(%rdi)
-.Ld4:	movnti %r10,3*8(%rdi)
-
-.Ls5:	movq 4*8(%rsi),%r11
-.Ls6:	movq 5*8(%rsi),%r8
-.Ls7:	movq 6*8(%rsi),%r9
-.Ls8:	movq 7*8(%rsi),%r10
-.Ld5:	movnti %r11,4*8(%rdi)
-.Ld6:	movnti %r8,5*8(%rdi)
-.Ld7:	movnti %r9,6*8(%rdi)
-.Ld8:	movnti %r10,7*8(%rdi)
+	jz 102f				/* already aligned */
+	subl $8,%ecx
+	negl %ecx
+	subl %ecx,%edx
+100:	movb (%rsi),%al
+101:	movb %al,(%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz 100b
+102:
+	.section .fixup,"ax"
+103:	addl %r8d,%edx			/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
 
-	dec  %rdx
+	.section __ex_table,"a"
+	.align 8
+	.quad 100b,103b
+	.quad 101b,103b
+	.previous
+#endif
+	.endm
 
+/*
+ * copy_user_nocache - Uncached memory copy with exception handling
+ * This will force destination/source out of cache for more performance.
+ */
+ENTRY(__copy_user_nocache)
+	CFI_STARTPROC
+	cmpl $8,%edx
+	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	ALIGN_DESTINATION
+	movl %edx,%ecx
+	andl $63,%edx
+	shrl $6,%ecx
+	jz 17f
+1:	movq (%rsi),%r8
+2:	movq 1*8(%rsi),%r9
+3:	movq 2*8(%rsi),%r10
+4:	movq 3*8(%rsi),%r11
+5:	movnti %r8,(%rdi)
+6:	movnti %r9,1*8(%rdi)
+7:	movnti %r10,2*8(%rdi)
+8:	movnti %r11,3*8(%rdi)
+9:	movq 4*8(%rsi),%r8
+10:	movq 5*8(%rsi),%r9
+11:	movq 6*8(%rsi),%r10
+12:	movq 7*8(%rsi),%r11
+13:	movnti %r8,4*8(%rdi)
+14:	movnti %r9,5*8(%rdi)
+15:	movnti %r10,6*8(%rdi)
+16:	movnti %r11,7*8(%rdi)
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
-
-	jns  .Lloop
-
-	.p2align 4
-.Lhandle_tail:
-	movl %ecx,%edx
-	andl $63,%ecx
-	shrl $3,%ecx
-	jz   .Lhandle_7
-	movl $8,%ebx
-	.p2align 4
-.Lloop_8:
-.Ls9:	movq (%rsi),%r8
-.Ld9:	movnti %r8,(%rdi)
 	decl %ecx
-	leaq 8(%rdi),%rdi
+	jnz 1b
+17:	movl %edx,%ecx
+	andl $7,%edx
+	shrl $3,%ecx
+	jz 20f
+18:	movq (%rsi),%r8
+19:	movnti %r8,(%rdi)
 	leaq 8(%rsi),%rsi
-	jnz .Lloop_8
-
-.Lhandle_7:
+	leaq 8(%rdi),%rdi
+	decl %ecx
+	jnz 18b
+20:	andl %edx,%edx
+	jz 23f
 	movl %edx,%ecx
-	andl $7,%ecx
-	jz   .Lende
-	.p2align 4
-.Lloop_1:
-.Ls10:	movb (%rsi),%bl
-.Ld10:	movb %bl,(%rdi)
-	incq %rdi
+21:	movb (%rsi),%al
+22:	movb %al,(%rdi)
 	incq %rsi
+	incq %rdi
 	decl %ecx
-	jnz .Lloop_1
-
-	CFI_REMEMBER_STATE
-.Lende:
-	popq %rcx
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_RESTORE %rcx
-	popq %rbx
-	CFI_ADJUST_CFA_OFFSET -8
-	CFI_RESTORE rbx
+	jnz 21b
+23:	xorl %eax,%eax
 	sfence
 	ret
-	CFI_RESTORE_STATE
 
-#ifdef FIX_ALIGNMENT
-	/* align destination */
-	.p2align 4
-.Lbad_alignment:
-	movl $8,%r9d
-	subl %ecx,%r9d
-	movl %r9d,%ecx
-	cmpq %r9,%rdx
-	jz   .Lhandle_7
-	js   .Lhandle_7
-.Lalign_1:
-.Ls11:	movb (%rsi),%bl
-.Ld11:	movb %bl,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .Lalign_1
-	subq %r9,%rdx
-	jmp .Lafter_bad_alignment
-#endif
+	.section .fixup,"ax"
+30:	shll $6,%ecx
+	addl %ecx,%edx
+	jmp 60f
+40:	lea (%rdx,%rcx,8),%rdx
+	jmp 60f
+50:	movl %ecx,%edx
+60:	sfence
+	movl %r8d,%ecx
+	jmp copy_user_handle_tail
+	.previous
 
-	/* table sorted by exception address */
 	.section __ex_table,"a"
-	.align 8
-	.quad .Ls1,.Ls1e	/* .Ls[1-4] - 0 bytes copied */
-	.quad .Ls2,.Ls1e
-	.quad .Ls3,.Ls1e
-	.quad .Ls4,.Ls1e
-	.quad .Ld1,.Ls1e	/* .Ld[1-4] - 0..24 bytes coped */
-	.quad .Ld2,.Ls2e
-	.quad .Ld3,.Ls3e
-	.quad .Ld4,.Ls4e
-	.quad .Ls5,.Ls5e	/* .Ls[5-8] - 32 bytes copied */
-	.quad .Ls6,.Ls5e
-	.quad .Ls7,.Ls5e
-	.quad .Ls8,.Ls5e
-	.quad .Ld5,.Ls5e	/* .Ld[5-8] - 32..56 bytes copied */
-	.quad .Ld6,.Ls6e
-	.quad .Ld7,.Ls7e
-	.quad .Ld8,.Ls8e
-	.quad .Ls9,.Le_quad
-	.quad .Ld9,.Le_quad
-	.quad .Ls10,.Le_byte
-	.quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
-	.quad .Ls11,.Lzero_rest
-	.quad .Ld11,.Lzero_rest
-#endif
-	.quad .Le5,.Le_zero
+	.quad 1b,30b
+	.quad 2b,30b
+	.quad 3b,30b
+	.quad 4b,30b
+	.quad 5b,30b
+	.quad 6b,30b
+	.quad 7b,30b
+	.quad 8b,30b
+	.quad 9b,30b
+	.quad 10b,30b
+	.quad 11b,30b
+	.quad 12b,30b
+	.quad 13b,30b
+	.quad 14b,30b
+	.quad 15b,30b
+	.quad 16b,30b
+	.quad 18b,40b
+	.quad 19b,40b
+	.quad 21b,50b
+	.quad 22b,50b
 	.previous
-
-	/* eax: zero, ebx: 64 */
-.Ls1e: 	addl $8,%eax	/* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
-.Ls2e: 	addl $8,%eax
-.Ls3e: 	addl $8,%eax
-.Ls4e: 	addl $8,%eax
-.Ls5e: 	addl $8,%eax
-.Ls6e: 	addl $8,%eax
-.Ls7e: 	addl $8,%eax
-.Ls8e: 	addl $8,%eax
-	addq %rbx,%rdi	/* +64 */
-	subq %rax,%rdi  /* correct destination with computed offset */
-
-	shlq $6,%rdx	/* loop counter * 64 (stride length) */
-	addq %rax,%rdx	/* add offset to loopcnt */
-	andl $63,%ecx	/* remaining bytes */
-	addq %rcx,%rdx	/* add them */
-	jmp .Lzero_rest
-
-	/* exception on quad word loop in tail handling */
-	/* ecx:	loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
-	shll $3,%ecx
-	andl $7,%edx
-	addl %ecx,%edx
-	/* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
-	cmpl $0,(%rsp)	/* zero flag set? */
-	jz   .Le_zero
-	movq %rdx,%rcx
-.Le_byte:
-	xorl %eax,%eax
-.Le5:	rep
-	stosb
-	/* when there is another exception while zeroing the rest just return */
-.Le_zero:
-	movq %rdx,%rax
-	jmp .Lende
 	CFI_ENDPROC
 ENDPROC(__copy_user_nocache)
-
-
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay.c
index ef691316f8b..f4568605d7d 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay.c
@@ -29,7 +29,7 @@
 /* simple loop based delay: */
 static void delay_loop(unsigned long loops)
 {
-	__asm__ __volatile__(
+	asm volatile(
 		"	test %0,%0	\n"
 		"	jz 3f		\n"
 		"	jmp 1f		\n"
@@ -38,9 +38,9 @@ static void delay_loop(unsigned long loops)
 		"1:	jmp 2f		\n"
 
 		".align 16		\n"
-		"2:	decl %0		\n"
+		"2:	dec %0		\n"
 		"	jnz 2b		\n"
-		"3:	decl %0		\n"
+		"3:	dec %0		\n"
 
 		: /* we don't need output */
 		:"a" (loops)
@@ -98,7 +98,7 @@ void use_tsc_delay(void)
 int __devinit read_current_timer(unsigned long *timer_val)
 {
 	if (delay_fn == delay_tsc) {
-		rdtscl(*timer_val);
+		rdtscll(*timer_val);
 		return 0;
 	}
 	return -1;
@@ -108,31 +108,30 @@ void __delay(unsigned long loops)
 {
 	delay_fn(loops);
 }
+EXPORT_SYMBOL(__delay);
 
 inline void __const_udelay(unsigned long xloops)
 {
 	int d0;
 
 	xloops *= 4;
-	__asm__("mull %0"
+	asm("mull %%edx"
 		:"=d" (xloops), "=&a" (d0)
 		:"1" (xloops), "0"
 		(cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
 
 	__delay(++xloops);
 }
+EXPORT_SYMBOL(__const_udelay);
 
 void __udelay(unsigned long usecs)
 {
 	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
 }
+EXPORT_SYMBOL(__udelay);
 
 void __ndelay(unsigned long nsecs)
 {
 	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
 }
-
-EXPORT_SYMBOL(__delay);
-EXPORT_SYMBOL(__const_udelay);
-EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
deleted file mode 100644
index 4c441be9264..00000000000
--- a/arch/x86/lib/delay_64.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *	Precise Delay Loops for x86-64
- *
- *	Copyright (C) 1993 Linus Torvalds
- *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- *	The __delay function must _NOT_ be inlined as its execution time
- *	depends wildly on alignment on many x86 processors. 
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/timex.h>
-#include <linux/preempt.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-
-#include <asm/delay.h>
-#include <asm/msr.h>
-
-#ifdef CONFIG_SMP
-#include <asm/smp.h>
-#endif
-
-int __devinit read_current_timer(unsigned long *timer_value)
-{
-	rdtscll(*timer_value);
-	return 0;
-}
-
-void __delay(unsigned long loops)
-{
-	unsigned bclock, now;
-	int cpu;
-
-	preempt_disable();
-	cpu = smp_processor_id();
-	rdtscl(bclock);
-	for (;;) {
-		rdtscl(now);
-		if ((now - bclock) >= loops)
-			break;
-
-		/* Allow RT tasks to run */
-		preempt_enable();
-		rep_nop();
-		preempt_disable();
-
-		/*
-		 * It is possible that we moved to another CPU, and
-		 * since TSC's are per-cpu we need to calculate
-		 * that. The delay must guarantee that we wait "at
-		 * least" the amount of time. Being moved to another
-		 * CPU could make the wait longer but we just need to
-		 * make sure we waited long enough. Rebalance the
-		 * counter for this CPU.
-		 */
-		if (unlikely(cpu != smp_processor_id())) {
-			loops -= (now - bclock);
-			cpu = smp_processor_id();
-			rdtscl(bclock);
-		}
-	}
-	preempt_enable();
-}
-EXPORT_SYMBOL(__delay);
-
-inline void __const_udelay(unsigned long xloops)
-{
-	__delay(((xloops * HZ *
-		cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1);
-}
-EXPORT_SYMBOL(__const_udelay);
-
-void __udelay(unsigned long usecs)
-{
-	__const_udelay(usecs * 0x000010c7);  /* 2**32 / 1000000 (rounded up) */
-}
-EXPORT_SYMBOL(__udelay);
-
-void __ndelay(unsigned long nsecs)
-{
-	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
-}
-EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser.S
index 5448876261f..ad374003742 100644
--- a/arch/x86/lib/getuser_64.S
+++ b/arch/x86/lib/getuser.S
@@ -3,6 +3,7 @@
  *
  * (C) Copyright 1998 Linus Torvalds
  * (C) Copyright 2005 Andi Kleen
+ * (C) Copyright 2008 Glauber Costa
  *
  * These functions have a non-standard call interface
  * to make them more efficient, especially as they
@@ -13,14 +14,13 @@
 /*
  * __get_user_X
  *
- * Inputs:	%rcx contains the address.
+ * Inputs:	%[r|e]ax contains the address.
  *		The register is modified, but all changes are undone
  *		before returning because the C code doesn't know about it.
  *
- * Outputs:	%rax is error code (0 or -EFAULT)
- *		%rdx contains zero-extended value
- * 
- * %r8 is destroyed.
+ * Outputs:	%[r|e]ax is error code (0 or -EFAULT)
+ *		%[r|e]dx contains zero-extended value
+ *
  *
  * These functions should not modify any other registers,
  * as they get called from within inline assembly.
@@ -32,78 +32,73 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/asm.h>
 
 	.text
 ENTRY(__get_user_1)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	cmpq threadinfo_addr_limit(%r8),%rcx
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
-1:	movzb (%rcx),%edx
-	xorl %eax,%eax
+1:	movzb (%_ASM_AX),%edx
+	xor %eax,%eax
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $1,%rcx
-	jc 20f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae 20f
-	decq   %rcx
-2:	movzwl (%rcx),%edx
-	xorl %eax,%eax
+	add $1,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae bad_get_user
+2:	movzwl -1(%_ASM_AX),%edx
+	xor %eax,%eax
 	ret
-20:	decq    %rcx
-	jmp	bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_2)
 
 ENTRY(__get_user_4)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $3,%rcx
-	jc 30f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae 30f
-	subq $3,%rcx
-3:	movl (%rcx),%edx
-	xorl %eax,%eax
+	add $3,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae bad_get_user
+3:	mov -3(%_ASM_AX),%edx
+	xor %eax,%eax
 	ret
-30:	subq $3,%rcx
-	jmp bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_4)
 
+#ifdef CONFIG_X86_64
 ENTRY(__get_user_8)
 	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $7,%rcx
-	jc 40f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae	40f
-	subq	$7,%rcx
-4:	movq (%rcx),%rdx
-	xorl %eax,%eax
+	add $7,%_ASM_AX
+	jc bad_get_user
+	GET_THREAD_INFO(%_ASM_DX)
+	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	jae	bad_get_user
+4:	movq -7(%_ASM_AX),%_ASM_DX
+	xor %eax,%eax
 	ret
-40:	subq $7,%rcx
-	jmp bad_get_user
 	CFI_ENDPROC
 ENDPROC(__get_user_8)
+#endif
 
 bad_get_user:
 	CFI_STARTPROC
-	xorl %edx,%edx
-	movq $(-EFAULT),%rax
+	xor %edx,%edx
+	mov $(-EFAULT),%_ASM_AX
 	ret
 	CFI_ENDPROC
 END(bad_get_user)
 
 .section __ex_table,"a"
-	.quad 1b,bad_get_user
-	.quad 2b,bad_get_user
-	.quad 3b,bad_get_user
-	.quad 4b,bad_get_user
-.previous
+	_ASM_PTR 1b,bad_get_user
+	_ASM_PTR 2b,bad_get_user
+	_ASM_PTR 3b,bad_get_user
+#ifdef CONFIG_X86_64
+	_ASM_PTR 4b,bad_get_user
+#endif
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S
deleted file mode 100644
index 6d84b53f12a..00000000000
--- a/arch/x86/lib/getuser_32.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * __get_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/thread_info.h>
-
-
-/*
- * __get_user_X
- *
- * Inputs:	%eax contains the address
- *
- * Outputs:	%eax is error code (0 or -EFAULT)
- *		%edx contains zero-extended value
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-.text
-ENTRY(__get_user_1)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%edx)
-	cmpl TI_addr_limit(%edx),%eax
-	jae bad_get_user
-1:	movzbl (%eax),%edx
-	xorl %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_1)
-
-ENTRY(__get_user_2)
-	CFI_STARTPROC
-	addl $1,%eax
-	jc bad_get_user
-	GET_THREAD_INFO(%edx)
-	cmpl TI_addr_limit(%edx),%eax
-	jae bad_get_user
-2:	movzwl -1(%eax),%edx
-	xorl %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_2)
-
-ENTRY(__get_user_4)
-	CFI_STARTPROC
-	addl $3,%eax
-	jc bad_get_user
-	GET_THREAD_INFO(%edx)
-	cmpl TI_addr_limit(%edx),%eax
-	jae bad_get_user
-3:	movl -3(%eax),%edx
-	xorl %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__get_user_4)
-
-bad_get_user:
-	CFI_STARTPROC
-	xorl %edx,%edx
-	movl $-14,%eax
-	ret
-	CFI_ENDPROC
-END(bad_get_user)
-
-.section __ex_table,"a"
-	.long 1b,bad_get_user
-	.long 2b,bad_get_user
-	.long 3b,bad_get_user
-.previous
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 57d043fa893..d5a2b39f882 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
 
 	rv.msr_no = msr_no;
 	if (safe) {
-		smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
 		err = rv.err;
 	} else {
-		smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	}
 	*l = rv.l;
 	*h = rv.h;
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
 	rv.l = l;
 	rv.h = h;
 	if (safe) {
-		smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
 		err = rv.err;
 	} else {
-		smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1);
+		smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
 	}
 
 	return err;
diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser.S
index f58fba109d1..36b0d15ae6e 100644
--- a/arch/x86/lib/putuser_32.S
+++ b/arch/x86/lib/putuser.S
@@ -2,6 +2,8 @@
  * __put_user functions.
  *
  * (C) Copyright 2005 Linus Torvalds
+ * (C) Copyright 2005 Andi Kleen
+ * (C) Copyright 2008 Glauber Costa
  *
  * These functions have a non-standard call interface
  * to make them more efficient, especially as they
@@ -11,6 +13,8 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/thread_info.h>
+#include <asm/errno.h>
+#include <asm/asm.h>
 
 
 /*
@@ -26,73 +30,68 @@
  */
 
 #define ENTER	CFI_STARTPROC ; \
-		pushl %ebx ; \
-		CFI_ADJUST_CFA_OFFSET 4 ; \
-		CFI_REL_OFFSET ebx, 0 ; \
-		GET_THREAD_INFO(%ebx)
-#define EXIT	popl %ebx ; \
-		CFI_ADJUST_CFA_OFFSET -4 ; \
-		CFI_RESTORE ebx ; \
-		ret ; \
+		GET_THREAD_INFO(%_ASM_BX)
+#define EXIT	ret ; \
 		CFI_ENDPROC
 
 .text
 ENTRY(__put_user_1)
 	ENTER
-	cmpl TI_addr_limit(%ebx),%ecx
+	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
-1:	movb %al,(%ecx)
-	xorl %eax,%eax
+1:	movb %al,(%_ASM_CX)
+	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
-	movl TI_addr_limit(%ebx),%ebx
-	subl $1,%ebx
-	cmpl %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $1,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-2:	movw %ax,(%ecx)
-	xorl %eax,%eax
+2:	movw %ax,(%_ASM_CX)
+	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
-	movl TI_addr_limit(%ebx),%ebx
-	subl $3,%ebx
-	cmpl %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $3,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-3:	movl %eax,(%ecx)
-	xorl %eax,%eax
+3:	movl %eax,(%_ASM_CX)
+	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
-	movl TI_addr_limit(%ebx),%ebx
-	subl $7,%ebx
-	cmpl %ebx,%ecx
+	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $7,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
-4:	movl %eax,(%ecx)
-5:	movl %edx,4(%ecx)
-	xorl %eax,%eax
+4:	mov %_ASM_AX,(%_ASM_CX)
+#ifdef CONFIG_X86_32
+5:	movl %edx,4(%_ASM_CX)
+#endif
+	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_8)
 
 bad_put_user:
-	CFI_STARTPROC simple
-	CFI_DEF_CFA esp, 2*4
-	CFI_OFFSET eip, -1*4
-	CFI_OFFSET ebx, -2*4
-	movl $-14,%eax
+	CFI_STARTPROC
+	movl $-EFAULT,%eax
 	EXIT
 END(bad_put_user)
 
 .section __ex_table,"a"
-	.long 1b,bad_put_user
-	.long 2b,bad_put_user
-	.long 3b,bad_put_user
-	.long 4b,bad_put_user
-	.long 5b,bad_put_user
+	_ASM_PTR 1b,bad_put_user
+	_ASM_PTR 2b,bad_put_user
+	_ASM_PTR 3b,bad_put_user
+	_ASM_PTR 4b,bad_put_user
+#ifdef CONFIG_X86_32
+	_ASM_PTR 5b,bad_put_user
+#endif
 .previous
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
deleted file mode 100644
index 4989f5a8fa9..00000000000
--- a/arch/x86/lib/putuser_64.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * __put_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-
-/*
- * __put_user_X
- *
- * Inputs:	%rcx contains the address
- *		%rdx contains new value
- *
- * Outputs:	%rax is error code (0 or -EFAULT)
- *
- * %r8 is destroyed.
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/page.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-	.text
-ENTRY(__put_user_1)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae bad_put_user
-1:	movb %dl,(%rcx)
-	xorl %eax,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(__put_user_1)
-
-ENTRY(__put_user_2)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $1,%rcx
-	jc 20f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae 20f
-	decq %rcx
-2:	movw %dx,(%rcx)
-	xorl %eax,%eax
-	ret
-20:	decq %rcx
-	jmp bad_put_user
-	CFI_ENDPROC
-ENDPROC(__put_user_2)
-
-ENTRY(__put_user_4)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $3,%rcx
-	jc 30f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae 30f
-	subq $3,%rcx
-3:	movl %edx,(%rcx)
-	xorl %eax,%eax
-	ret
-30:	subq $3,%rcx
-	jmp bad_put_user
-	CFI_ENDPROC
-ENDPROC(__put_user_4)
-
-ENTRY(__put_user_8)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%r8)
-	addq $7,%rcx
-	jc 40f
-	cmpq threadinfo_addr_limit(%r8),%rcx
-	jae 40f
-	subq $7,%rcx
-4:	movq %rdx,(%rcx)
-	xorl %eax,%eax
-	ret
-40:	subq $7,%rcx
-	jmp bad_put_user
-	CFI_ENDPROC
-ENDPROC(__put_user_8)
-
-bad_put_user:
-	CFI_STARTPROC
-	movq $(-EFAULT),%rax
-	ret
-	CFI_ENDPROC
-END(bad_put_user)
-
-.section __ex_table,"a"
-	.quad 1b,bad_put_user
-	.quad 2b,bad_put_user
-	.quad 3b,bad_put_user
-	.quad 4b,bad_put_user
-.previous
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 00000000000..650b11e00ec
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
+/*
+ * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
+ * Copyright 2008 by Steven Rostedt, Red Hat, Inc
+ *  (inspired by Andi Kleen's thunk_64.S)
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
+
+	#include <linux/linkage.h>
+
+#define ARCH_TRACE_IRQS_ON			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/* put return address in eax (arg1) */
+	.macro thunk_ra name,func
+	.globl \name
+\name:
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	/* Place EIP in the arg1 */
+	movl 3*4(%esp), %eax
+	call \func
+	popl %edx
+	popl %ecx
+	popl %eax
+	ret
+	.endm
+
+	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+	thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
+#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9..bf9a7d5a542 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
  * Save registers before calling assembly functions. This avoids
  * disturbance of register allocation in some inline assembly constructs.
  * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
  * Subject to the GNU public license, v.2. No warranty of any kind.
  */
 
@@ -42,8 +43,22 @@
 #endif	
 	
 #ifdef CONFIG_TRACE_IRQFLAGS
-	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
-	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+	/* put return address in rdi (arg1) */
+	.macro thunk_ra name,func
+	.globl \name
+\name:
+	CFI_STARTPROC
+	SAVE_ARGS
+	/* SAVE_ARGS pushs 9 elements */
+	/* the next element would be the rip */
+	movq 9*8(%rsp), %rdi
+	call \func
+	jmp  restore
+	CFI_ENDPROC
+	.endm
+
+	thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+	thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
 #endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0c89d1bb028..f4df6e7c718 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
 }
 EXPORT_SYMBOL(copy_in_user);
 
+/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ */
+unsigned long
+copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+{
+	char c;
+	unsigned zero_len;
+
+	for (; len; --len) {
+		if (__get_user_nocheck(c, from++, sizeof(char)))
+			break;
+		if (__put_user_nocheck(c, to++, sizeof(char)))
+			break;
+	}
+
+	for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
+		if (__put_user_nocheck(c, to++, sizeof(char)))
+			break;
+	return len;
+}
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 2f5e277686b..48278fa7d3d 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,6 +10,14 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
+/*
+ * Any quirks to be performed to initialize timers/irqs/etc?
+ */
+int (*arch_time_init_quirk)(void);
+int (*arch_pre_intr_init_quirk)(void);
+int (*arch_intr_init_quirk)(void);
+int (*arch_trap_init_quirk)(void);
+
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #else
@@ -29,6 +37,10 @@ int no_broadcast=DEFAULT_SEND_IPI;
  **/
 void __init pre_intr_init_hook(void)
 {
+	if (arch_pre_intr_init_quirk) {
+		if (arch_pre_intr_init_quirk())
+			return;
+	}
 	init_ISA_irqs();
 }
 
@@ -52,6 +64,10 @@ static struct irqaction irq2 = {
  **/
 void __init intr_init_hook(void)
 {
+	if (arch_intr_init_quirk) {
+		if (arch_intr_init_quirk())
+			return;
+	}
 #ifdef CONFIG_X86_LOCAL_APIC
 	apic_intr_init();
 #endif
@@ -65,7 +81,7 @@ void __init intr_init_hook(void)
  *
  * Description:
  *	generally used to activate any machine specific identification
- *	routines that may be needed before setup_arch() runs.  On VISWS
+ *	routines that may be needed before setup_arch() runs.  On Voyager
  *	this is used to get the board revision and type.
  **/
 void __init pre_setup_arch_hook(void)
@@ -81,6 +97,10 @@ void __init pre_setup_arch_hook(void)
  **/
 void __init trap_init_hook(void)
 {
+	if (arch_trap_init_quirk) {
+		if (arch_trap_init_quirk())
+			return;
+	}
 }
 
 static struct irqaction irq0  = {
@@ -99,6 +119,16 @@ static struct irqaction irq0  = {
  **/
 void __init time_init_hook(void)
 {
+	if (arch_time_init_quirk) {
+		/*
+		 * A nonzero return code does not mean failure, it means
+		 * that the architecture quirk does not want any
+		 * generic (timer) setup to be performed after this:
+		 */
+		if (arch_time_init_quirk())
+			return;
+	}
+
 	irq0.mask = cpumask_of_cpu(0);
 	setup_irq(0, &irq0);
 }
diff --git a/arch/x86/mach-visws/Makefile b/arch/x86/mach-visws/Makefile
deleted file mode 100644
index 835fd96ad76..00000000000
--- a/arch/x86/mach-visws/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-obj-y				:= setup.o traps.o reboot.o
-
-obj-$(CONFIG_X86_VISWS_APIC)	+= visws_apic.o
-obj-$(CONFIG_X86_LOCAL_APIC)	+= mpparse.o
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
deleted file mode 100644
index a2fb78c0d15..00000000000
--- a/arch/x86/mach-visws/mpparse.c
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/smp.h>
-#include <asm/io.h>
-
-#include "cobalt.h"
-#include "mach_apic.h"
-
-extern unsigned int __cpuinitdata maxcpus;
-
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-
-static void __init MP_processor_info (struct mpc_config_processor *m)
-{
-	int ver, logical_apicid;
-	physid_mask_t apic_cpus;
-
-	if (!(m->mpc_cpuflag & CPU_ENABLED))
-		return;
-
-	logical_apicid = m->mpc_apicid;
-	printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
-	       m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
-	       m->mpc_apicid,
-	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	       m->mpc_apicver);
-
-	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
-		boot_cpu_physical_apicid = m->mpc_apicid;
-
-	ver = m->mpc_apicver;
-	if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
-		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-			m->mpc_apicid, MAX_APICS);
-		return;
-	}
-
-	apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
-	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
-	/*
-	 * Validate version
-	 */
-	if (ver == 0x0) {
-		printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
-			"fixing up to 0x10. (tell your hw vendor)\n",
-			m->mpc_apicid);
-		ver = 0x10;
-	}
-	apic_version[m->mpc_apicid] = ver;
-}
-
-void __init find_smp_config(void)
-{
-	struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
-	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
-
-	if (ncpus > CO_CPU_MAX) {
-		printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
-			ncpus, mp);
-
-		ncpus = CO_CPU_MAX;
-	}
-
-	if (ncpus > maxcpus)
-		ncpus = maxcpus;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	smp_found_config = 1;
-#endif
-	while (ncpus--)
-		MP_processor_info(mp++);
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-
-void __init get_smp_config (void)
-{
-}
diff --git a/arch/x86/mach-visws/reboot.c b/arch/x86/mach-visws/reboot.c
deleted file mode 100644
index 99332abfad4..00000000000
--- a/arch/x86/mach-visws/reboot.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-
-#include <asm/io.h>
-#include "piix4.h"
-
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void machine_shutdown(void)
-{
-#ifdef CONFIG_SMP
-	smp_send_stop();
-#endif
-}
-
-void machine_emergency_restart(void)
-{
-	/*
-	 * Visual Workstations restart after this
-	 * register is poked on the PIIX4
-	 */
-	outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
-}
-
-void machine_restart(char * __unused)
-{
-	machine_shutdown();
-	machine_emergency_restart();
-}
-
-void machine_power_off(void)
-{
-	unsigned short pm_status;
-	extern unsigned int pci_bus0;
-
-	while ((pm_status = inw(PMSTS_PORT)) & 0x100)
-		outw(pm_status, PMSTS_PORT);
-
-	outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
-
-	mdelay(10);
-
-#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
-	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-
-	outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8);
-	outl(PIIX_SPECIAL_STOP, 0xCFC);
-}
-
-void machine_halt(void)
-{
-}
-
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c
deleted file mode 100644
index d67868ec9b7..00000000000
--- a/arch/x86/mach-visws/setup.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- *  Unmaintained SGI Visual Workstation support.
- *  Split out from setup.c by davej@suse.de
- */
-
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include <asm/fixmap.h>
-#include <asm/arch_hooks.h>
-#include <asm/io.h>
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include "cobalt.h"
-#include "piix4.h"
-
-int no_broadcast;
-
-char visws_board_type = -1;
-char visws_board_rev = -1;
-
-void __init visws_get_board_type_and_rev(void)
-{
-	int raw;
-
-	visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
-							 >> PIIX_GPI_BD_SHIFT;
-	/*
-	 * Get Board rev.
-	 * First, we have to initialize the 307 part to allow us access
-	 * to the GPIO registers.  Let's map them at 0x0fc0 which is right
-	 * after the PIIX4 PM section.
-	 */
-	outb_p(SIO_DEV_SEL, SIO_INDEX);
-	outb_p(SIO_GP_DEV, SIO_DATA);	/* Talk to GPIO regs. */
-
-	outb_p(SIO_DEV_MSB, SIO_INDEX);
-	outb_p(SIO_GP_MSB, SIO_DATA);	/* MSB of GPIO base address */
-
-	outb_p(SIO_DEV_LSB, SIO_INDEX);
-	outb_p(SIO_GP_LSB, SIO_DATA);	/* LSB of GPIO base address */
-
-	outb_p(SIO_DEV_ENB, SIO_INDEX);
-	outb_p(1, SIO_DATA);		/* Enable GPIO registers. */
-
-	/*
-	 * Now, we have to map the power management section to write
-	 * a bit which enables access to the GPIO registers.
-	 * What lunatic came up with this shit?
-	 */
-	outb_p(SIO_DEV_SEL, SIO_INDEX);
-	outb_p(SIO_PM_DEV, SIO_DATA);	/* Talk to GPIO regs. */
-
-	outb_p(SIO_DEV_MSB, SIO_INDEX);
-	outb_p(SIO_PM_MSB, SIO_DATA);	/* MSB of PM base address */
-
-	outb_p(SIO_DEV_LSB, SIO_INDEX);
-	outb_p(SIO_PM_LSB, SIO_DATA);	/* LSB of PM base address */
-
-	outb_p(SIO_DEV_ENB, SIO_INDEX);
-	outb_p(1, SIO_DATA);		/* Enable PM registers. */
-
-	/*
-	 * Now, write the PM register which enables the GPIO registers.
-	 */
-	outb_p(SIO_PM_FER2, SIO_PM_INDEX);
-	outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
-
-	/*
-	 * Now, initialize the GPIO registers.
-	 * We want them all to be inputs which is the
-	 * power on default, so let's leave them alone.
-	 * So, let's just read the board rev!
-	 */
-	raw = inb_p(SIO_GP_DATA1);
-	raw &= 0x7f;	/* 7 bits of valid board revision ID. */
-
-	if (visws_board_type == VISWS_320) {
-		if (raw < 0x6) {
-			visws_board_rev = 4;
-		} else if (raw < 0xc) {
-			visws_board_rev = 5;
-		} else {
-			visws_board_rev = 6;
-		}
-	} else if (visws_board_type == VISWS_540) {
-			visws_board_rev = 2;
-		} else {
-			visws_board_rev = raw;
-		}
-
-	printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
-	       (visws_board_type == VISWS_320 ? "320" :
-	       (visws_board_type == VISWS_540 ? "540" :
-		"unknown")), visws_board_rev);
-}
-
-void __init pre_intr_init_hook(void)
-{
-	init_VISWS_APIC_irqs();
-}
-
-void __init intr_init_hook(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic_intr_init();
-#endif
-}
-
-void __init pre_setup_arch_hook()
-{
-	visws_get_board_type_and_rev();
-}
-
-static struct irqaction irq0 = {
-	.handler =	timer_interrupt,
-	.flags =	IRQF_DISABLED | IRQF_IRQPOLL,
-	.name =		"timer",
-};
-
-void __init time_init_hook(void)
-{
-	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
-
-	/* Set the countdown value */
-	co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
-
-	/* Start the timer */
-	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
-
-	/* Enable (unmask) the timer interrupt */
-	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
-
-	/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
-	setup_irq(0, &irq0);
-}
-
-/* Hook for machine specific memory setup. */
-
-#define MB (1024 * 1024)
-
-unsigned long sgivwfb_mem_phys;
-unsigned long sgivwfb_mem_size;
-EXPORT_SYMBOL(sgivwfb_mem_phys);
-EXPORT_SYMBOL(sgivwfb_mem_size);
-
-long long mem_size __initdata = 0;
-
-char * __init machine_specific_memory_setup(void)
-{
-	long long gfx_mem_size = 8 * MB;
-
-	mem_size = boot_params.alt_mem_k;
-
-	if (!mem_size) {
-		printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
-		mem_size = 128 * MB;
-	}
-
-	/*
-	 * this hardcodes the graphics memory to 8 MB
-	 * it really should be sized dynamically (or at least
-	 * set as a boot param)
-	 */
-	if (!sgivwfb_mem_size) {
-		printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
-		sgivwfb_mem_size = 8 * MB;
-	}
-
-	/*
-	 * Trim to nearest MB
-	 */
-	sgivwfb_mem_size &= ~((1 << 20) - 1);
-	sgivwfb_mem_phys = mem_size - gfx_mem_size;
-
-	e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-	e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
-	e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
-
-	return "PROM";
-}
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
deleted file mode 100644
index bfac6ba10f8..00000000000
--- a/arch/x86/mach-visws/traps.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/* VISWS traps */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/apic.h>
-#include "cobalt.h"
-#include "lithium.h"
-
-
-#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
-#define BCD (LI_INTB | LI_INTC | LI_INTD)
-#define ALLDEVS (A01234 | BCD)
-
-static __init void lithium_init(void)
-{
-	set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
-	set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
-
-	if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
-	    (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
-		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
-		panic("This machine is not SGI Visual Workstation 320/540");
-	}
-
-	if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
-	    (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
-		printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
-		panic("This machine is not SGI Visual Workstation 320/540");
-	}
-
-	li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
-	li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
-}
-
-static __init void cobalt_init(void)
-{
-	/*
-	 * On normal SMP PC this is used only with SMP, but we have to
-	 * use it and set it up here to start the Cobalt clock
-	 */
-	set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
-	setup_local_APIC();
-	printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
-		(unsigned int)apic_read(APIC_LVR),
-		(unsigned int)apic_read(APIC_ID));
-
-	set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
-	set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
-	printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
-		co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
-
-	/* Enable Cobalt APIC being careful to NOT change the ID! */
-	co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
-
-	printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
-		co_apic_read(CO_APIC_ID));
-}
-
-void __init trap_init_hook(void)
-{
-	lithium_init();
-	cobalt_init();
-}
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
deleted file mode 100644
index d8b2cfd85d9..00000000000
--- a/arch/x86/mach-visws/visws_apic.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- *	Copyright (C) 1999 Bent Hagemark, Ingo Molnar
- *
- *  SGI Visual Workstation interrupt controller
- *
- *  The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
- *  which serves as the main interrupt controller in the system.  Non-legacy
- *  hardware in the system uses this controller directly.  Legacy devices
- *  are connected to the PIIX4 which in turn has its 8259(s) connected to
- *  a of the Cobalt APIC entry.
- *
- *  09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
- *
- *  25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-#include <asm/irq_vectors.h>
-
-#include "cobalt.h"
-
-static DEFINE_SPINLOCK(cobalt_lock);
-
-/*
- * Set the given Cobalt APIC Redirection Table entry to point
- * to the given IDT vector/index.
- */
-static inline void co_apic_set(int entry, int irq)
-{
-	co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
-	co_apic_write(CO_APIC_HI(entry), 0);
-}
-
-/*
- * Cobalt (IO)-APIC functions to handle PCI devices.
- */
-static inline int co_apic_ide0_hack(void)
-{
-	extern char visws_board_type;
-	extern char visws_board_rev;
-
-	if (visws_board_type == VISWS_320 && visws_board_rev == 5)
-		return 5;
-	return CO_APIC_IDE0;
-}
-
-static int is_co_apic(unsigned int irq)
-{
-	if (IS_CO_APIC(irq))
-		return CO_APIC(irq);
-
-	switch (irq) {
-		case 0: return CO_APIC_CPU;
-		case CO_IRQ_IDE0: return co_apic_ide0_hack();
-		case CO_IRQ_IDE1: return CO_APIC_IDE1;
-		default: return -1;
-	}
-}
-
-
-/*
- * This is the SGI Cobalt (IO-)APIC:
- */
-
-static void enable_cobalt_irq(unsigned int irq)
-{
-	co_apic_set(is_co_apic(irq), irq);
-}
-
-static void disable_cobalt_irq(unsigned int irq)
-{
-	int entry = is_co_apic(irq);
-
-	co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
-	co_apic_read(CO_APIC_LO(entry));
-}
-
-/*
- * "irq" really just serves to identify the device.  Here is where we
- * map this to the Cobalt APIC entry where it's physically wired.
- * This is called via request_irq -> setup_irq -> irq_desc->startup()
- */
-static unsigned int startup_cobalt_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-		irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
-	enable_cobalt_irq(irq);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-	return 0;
-}
-
-static void ack_cobalt_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	disable_cobalt_irq(irq);
-	apic_write(APIC_EOI, APIC_EIO_ACK);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
-static void end_cobalt_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		enable_cobalt_irq(irq);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
-static struct irq_chip cobalt_irq_type = {
-	.typename =	"Cobalt-APIC",
-	.startup =	startup_cobalt_irq,
-	.shutdown =	disable_cobalt_irq,
-	.enable =	enable_cobalt_irq,
-	.disable =	disable_cobalt_irq,
-	.ack =		ack_cobalt_irq,
-	.end =		end_cobalt_irq,
-};
-
-
-/*
- * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
- * -- not the manner expected by the code in i8259.c.
- *
- * there is a 'master' physical interrupt source that gets sent to
- * the CPU. But in the chipset there are various 'virtual' interrupts
- * waiting to be handled. We represent this to Linux through a 'master'
- * interrupt controller type, and through a special virtual interrupt-
- * controller. Device drivers only see the virtual interrupt sources.
- */
-static unsigned int startup_piix4_master_irq(unsigned int irq)
-{
-	init_8259A(0);
-
-	return startup_cobalt_irq(irq);
-}
-
-static void end_piix4_master_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	enable_cobalt_irq(irq);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
-static struct irq_chip piix4_master_irq_type = {
-	.typename =	"PIIX4-master",
-	.startup =	startup_piix4_master_irq,
-	.ack =		ack_cobalt_irq,
-	.end =		end_piix4_master_irq,
-};
-
-
-static struct irq_chip piix4_virtual_irq_type = {
-	.typename =	"PIIX4-virtual",
-	.shutdown =	disable_8259A_irq,
-	.enable =	enable_8259A_irq,
-	.disable =	disable_8259A_irq,
-};
-
-
-/*
- * PIIX4-8259 master/virtual functions to handle interrupt requests
- * from legacy devices: floppy, parallel, serial, rtc.
- *
- * None of these get Cobalt APIC entries, neither do they have IDT
- * entries. These interrupts are purely virtual and distributed from
- * the 'master' interrupt source: CO_IRQ_8259.
- *
- * When the 8259 interrupts its handler figures out which of these
- * devices is interrupting and dispatches to its handler.
- *
- * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
- * enable_irq gets the right irq. This 'master' irq is never directly
- * manipulated by any driver.
- */
-static irqreturn_t piix4_master_intr(int irq, void *dev_id)
-{
-	int realirq;
-	irq_desc_t *desc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	/* Find out what's interrupting in the PIIX4 master 8259 */
-	outb(0x0c, 0x20);		/* OCW3 Poll command */
-	realirq = inb(0x20);
-
-	/*
-	 * Bit 7 == 0 means invalid/spurious
-	 */
-	if (unlikely(!(realirq & 0x80)))
-		goto out_unlock;
-
-	realirq &= 7;
-
-	if (unlikely(realirq == 2)) {
-		outb(0x0c, 0xa0);
-		realirq = inb(0xa0);
-
-		if (unlikely(!(realirq & 0x80)))
-			goto out_unlock;
-
-		realirq = (realirq & 7) + 8;
-	}
-
-	/* mask and ack interrupt */
-	cached_irq_mask |= 1 << realirq;
-	if (unlikely(realirq > 7)) {
-		inb(0xa1);
-		outb(cached_slave_mask, 0xa1);
-		outb(0x60 + (realirq & 7), 0xa0);
-		outb(0x60 + 2, 0x20);
-	} else {
-		inb(0x21);
-		outb(cached_master_mask, 0x21);
-		outb(0x60 + realirq, 0x20);
-	}
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	desc = irq_desc + realirq;
-
-	/*
-	 * handle this 'virtual interrupt' as a Cobalt one now.
-	 */
-	kstat_cpu(smp_processor_id()).irqs[realirq]++;
-
-	if (likely(desc->action != NULL))
-		handle_IRQ_event(realirq, desc->action);
-
-	if (!(desc->status & IRQ_DISABLED))
-		enable_8259A_irq(realirq);
-
-	return IRQ_HANDLED;
-
-out_unlock:
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-	return IRQ_NONE;
-}
-
-static struct irqaction master_action = {
-	.handler =	piix4_master_intr,
-	.name =		"PIIX4-8259",
-};
-
-static struct irqaction cascade_action = {
-	.handler = 	no_action,
-	.name =		"cascade",
-};
-
-
-void init_VISWS_APIC_irqs(void)
-{
-	int i;
-
-	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].action = 0;
-		irq_desc[i].depth = 1;
-
-		if (i == 0) {
-			irq_desc[i].chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_IDE0) {
-			irq_desc[i].chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_IDE1) {
-			irq_desc[i].chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_8259) {
-			irq_desc[i].chip = &piix4_master_irq_type;
-		}
-		else if (i < CO_IRQ_APIC0) {
-			irq_desc[i].chip = &piix4_virtual_irq_type;
-		}
-		else if (IS_CO_APIC(i)) {
-			irq_desc[i].chip = &cobalt_irq_type;
-		}
-	}
-
-	setup_irq(CO_IRQ_8259, &master_action);
-	setup_irq(2, &cascade_action);
-}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8dedd01e909..ee0fba09215 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -950,94 +950,24 @@ static void smp_stop_cpu_function(void *dummy)
 		halt();
 }
 
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	volatile unsigned long started;
-	volatile unsigned long finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
 static void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	/* must take copy of wait because call_data may be replaced
-	 * unless the function is waiting for us to finish */
-	int wait = call_data->wait;
-	__u8 cpu = smp_processor_id();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	if (!test_and_clear_bit(cpu, &call_data->started)) {
-		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-		       " with no call pending\n", cpu);
-		return;
-	}
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func) (info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
-	if (wait) {
-		mb();
-		clear_bit(cpu, &call_data->finished);
-	}
 }
 
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
-			       void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
 {
-	struct call_data_struct data;
-	u32 mask = cpus_addr(cpumask)[0];
-
-	mask &= ~(1 << smp_processor_id());
-
-	if (!mask)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.started = mask;
-	data.wait = wait;
-	if (wait)
-		data.finished = mask;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
-	/* Wait for response */
-	while (data.started)
-		barrier();
-
-	if (wait)
-		while (data.finished)
-			barrier();
-
-	spin_unlock(&call_lock);
-
-	return 0;
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 }
 
 /* Sorry about the name.  In an APIC based system, the APICs
@@ -1094,6 +1024,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
 	smp_call_function_interrupt();
 }
 
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+	smp_call_function_single_interrupt();
+}
+
 void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1114,6 +1050,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
 		smp_enable_irq_interrupt();
 	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_single_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1129,7 +1067,7 @@ static void do_flush_tlb_all(void *info)
 /* flush the TLB of every active CPU in the system */
 void flush_tlb_all(void)
 {
-	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
+	on_each_cpu(do_flush_tlb_all, 0, 1);
 }
 
 /* send a reschedule CPI to one CPU by physical CPU number*/
@@ -1161,7 +1099,7 @@ int safe_smp_processor_id(void)
 /* broadcast a halt to all other CPUs */
 static void voyager_smp_send_stop(void)
 {
-	smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
+	smp_call_function(smp_stop_cpu_function, NULL, 1);
 }
 
 /* this function is triggered in time.c when a clock tick fires
@@ -1848,5 +1786,7 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
-	.smp_call_function_mask = voyager_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c107641cd39..9873716e9f7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
+obj-$(CONFIG_MMIOTRACE_HOOKS)	+= kmmio.o
+obj-$(CONFIG_MMIOTRACE)		+= mmiotrace.o
+mmiotrace-y			:= pf_in.o mmio-mod.o
+obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
+
 ifeq ($(CONFIG_X86_32),y)
 obj-$(CONFIG_NUMA)		+= discontig_32.o
 else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0f5fce77d9..455f3fe67b4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/mmiotrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -49,6 +50,16 @@
 #define PF_RSVD		(1<<3)
 #define PF_INSTR	(1<<4)
 
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+{
+#ifdef CONFIG_MMIOTRACE_HOOKS
+	if (unlikely(is_kmmio_active()))
+		if (kmmio_handler(regs, addr) == 1)
+			return -1;
+#endif
+	return 0;
+}
+
 static inline int notify_page_fault(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 
 	if (notify_page_fault(regs))
 		return;
+	if (unlikely(kmmio_fault(regs, address)))
+		return;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b5a0fd5f4c5..9689a5138e6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -1034,6 +1035,8 @@ void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long size = PFN_ALIGN(_etext) - start;
 
+#ifndef CONFIG_DYNAMIC_FTRACE
+	/* Dynamic tracing modifies the kernel text section */
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	printk(KERN_INFO "Write protecting the kernel text: %luk\n",
 		size >> 10);
@@ -1046,6 +1049,8 @@ void mark_rodata_ro(void)
 	printk(KERN_INFO "Testing CPA: write protecting again\n");
 	set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 #endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 	start += size;
 	size = (unsigned long)__end_rodata - start;
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 77d129d62c9..306049edd55 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 static unsigned long dma_reserve __initdata;
@@ -202,6 +203,46 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 }
 
 /*
+ * Create large page table mappings for a range of physical addresses.
+ */
+static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
+						pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
+	for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
+		pgd = pgd_offset_k((unsigned long)__va(phys));
+		if (pgd_none(*pgd)) {
+			pud = (pud_t *) spp_getpage();
+			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+						_PAGE_USER));
+		}
+		pud = pud_offset(pgd, (unsigned long)__va(phys));
+		if (pud_none(*pud)) {
+			pmd = (pmd_t *) spp_getpage();
+			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
+						_PAGE_USER));
+		}
+		pmd = pmd_offset(pud, phys);
+		BUG_ON(!pmd_none(*pmd));
+		set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
+	}
+}
+
+void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
+{
+	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
+}
+
+void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
+{
+	__init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
+}
+
+/*
  * The head.S code sets up the kernel high mapping:
  *
  *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
@@ -262,11 +303,13 @@ static __meminit void unmap_low_page(void *adr)
 	early_iounmap(adr, PAGE_SIZE);
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
 {
 	unsigned pages = 0;
+	unsigned long last_map_addr = end;
 	int i;
+
 	pte_t *pte = pte_page + pte_index(addr);
 
 	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
@@ -286,23 +329,28 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
 			printk("   pte=%p addr=%lx pte=%016lx\n",
 			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
+		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
 		pages++;
 	}
 	update_page_count(PG_LEVEL_4K, pages);
+
+	return last_map_addr;
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
 {
 	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
 
-	phys_pte_init(pte, address, end);
+	return phys_pte_init(pte, address, end);
 }
 
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	unsigned long pages = 0;
+	unsigned long last_map_addr = end;
 
 	int i = pmd_index(address);
 
@@ -321,42 +369,46 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 
 		if (pmd_val(*pmd)) {
 			if (!pmd_large(*pmd))
-				phys_pte_update(pmd, address, end);
+				last_map_addr = phys_pte_update(pmd, address,
+								 end);
 			continue;
 		}
 
-		if (cpu_has_pse) {
+		if (page_size_mask & (1<<PG_LEVEL_2M)) {
 			pages++;
 			set_pte((pte_t *)pmd,
 				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+			last_map_addr = (address & PMD_MASK) + PMD_SIZE;
 			continue;
 		}
 
 		pte = alloc_low_page(&pte_phys);
-		phys_pte_init(pte, address, end);
+		last_map_addr = phys_pte_init(pte, address, end);
 		unmap_low_page(pte);
 
 		pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
 	}
 	update_page_count(PG_LEVEL_2M, pages);
-	return address;
+	return last_map_addr;
 }
 
 static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
 	unsigned long last_map_addr;
 
 	spin_lock(&init_mm.page_table_lock);
-	last_map_addr = phys_pmd_init(pmd, address, end);
+	last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
 	spin_unlock(&init_mm.page_table_lock);
 	__flush_tlb_all();
 	return last_map_addr;
 }
 
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	unsigned long pages = 0;
 	unsigned long last_map_addr = end;
@@ -378,11 +430,12 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 
 		if (pud_val(*pud)) {
 			if (!pud_large(*pud))
-				last_map_addr = phys_pmd_update(pud, addr, end);
+				last_map_addr = phys_pmd_update(pud, addr, end,
+							 page_size_mask);
 			continue;
 		}
 
-		if (direct_gbpages) {
+		if (page_size_mask & (1<<PG_LEVEL_1G)) {
 			pages++;
 			set_pte((pte_t *)pud,
 				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
@@ -393,7 +446,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 		pmd = alloc_low_page(&pmd_phys);
 
 		spin_lock(&init_mm.page_table_lock);
-		last_map_addr = phys_pmd_init(pmd, addr, end);
+		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
 		unmap_low_page(pmd);
 		pud_populate(&init_mm, pud, __va(pmd_phys));
 		spin_unlock(&init_mm.page_table_lock);
@@ -406,29 +459,37 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 }
 
 static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
+		 unsigned long page_size_mask)
 {
 	pud_t *pud;
 
 	pud = (pud_t *)pgd_page_vaddr(*pgd);
 
-	return phys_pud_init(pud, addr, end);
+	return phys_pud_init(pud, addr, end, page_size_mask);
 }
 
 static void __init find_early_table_space(unsigned long end)
 {
-	unsigned long puds, tables, start;
+	unsigned long puds, pmds, ptes, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
-	if (!direct_gbpages) {
-		unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-		tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
-	}
-	if (!cpu_has_pse) {
-		unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
-	}
+	if (direct_gbpages) {
+		unsigned long extra;
+		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
+		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+	} else
+		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+
+	if (cpu_has_pse) {
+		unsigned long extra;
+		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	} else
+		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
 
 	/*
 	 * RED-PEN putting page tables only on node 0 could
@@ -568,29 +629,12 @@ static void __init early_memtest(unsigned long start, unsigned long end)
 }
 #endif
 
-/*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
- */
-unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
+static unsigned long __init kernel_physical_mapping_init(unsigned long start,
+						unsigned long end,
+						unsigned long page_size_mask)
 {
-	unsigned long next, last_map_addr = end;
-	unsigned long start_phys = start, end_phys = end;
 
-	printk(KERN_INFO "init_memory_mapping\n");
-
-	/*
-	 * Find space for the kernel direct mapping tables.
-	 *
-	 * Later we should allocate these tables in the local node of the
-	 * memory mapped. Unfortunately this is done currently before the
-	 * nodes are discovered.
-	 */
-	if (!after_bootmem) {
-		init_gbpages();
-		find_early_table_space(end);
-	}
+	unsigned long next, last_map_addr = end;
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -600,12 +644,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
 		unsigned long pud_phys;
 		pud_t *pud;
 
-		next = start + PGDIR_SIZE;
+		next = (start + PGDIR_SIZE) & PGDIR_MASK;
 		if (next > end)
 			next = end;
 
 		if (pgd_val(*pgd)) {
-			last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
+			last_map_addr = phys_pud_update(pgd, __pa(start),
+						 __pa(end), page_size_mask);
 			continue;
 		}
 
@@ -614,22 +659,151 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
 		else
 			pud = alloc_low_page(&pud_phys);
 
-		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+						 page_size_mask);
 		unmap_low_page(pud);
 		pgd_populate(&init_mm, pgd_offset_k(start),
 			     __va(pud_phys));
 	}
 
+	return last_map_addr;
+}
+
+struct map_range {
+	unsigned long start;
+	unsigned long end;
+	unsigned page_size_mask;
+};
+
+#define NR_RANGE_MR 5
+
+static int save_mr(struct map_range *mr, int nr_range,
+		   unsigned long start_pfn, unsigned long end_pfn,
+		   unsigned long page_size_mask)
+{
+
+	if (start_pfn < end_pfn) {
+		if (nr_range >= NR_RANGE_MR)
+			panic("run out of range for init_memory_mapping\n");
+		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
+		mr[nr_range].page_size_mask = page_size_mask;
+		nr_range++;
+	}
+
+	return nr_range;
+}
+
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+					       unsigned long end)
+{
+	unsigned long last_map_addr = 0;
+	unsigned long page_size_mask = 0;
+	unsigned long start_pfn, end_pfn;
+
+	struct map_range mr[NR_RANGE_MR];
+	int nr_range, i;
+
+	printk(KERN_INFO "init_memory_mapping\n");
+
+	/*
+	 * Find space for the kernel direct mapping tables.
+	 *
+	 * Later we should allocate these tables in the local node of the
+	 * memory mapped. Unfortunately this is done currently before the
+	 * nodes are discovered.
+	 */
+	if (!after_bootmem)
+		init_gbpages();
+
+	if (direct_gbpages)
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	if (cpu_has_pse)
+		page_size_mask |= 1 << PG_LEVEL_2M;
+
+	memset(mr, 0, sizeof(mr));
+	nr_range = 0;
+
+	/* head if not big page alignment ?*/
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+			<< (PMD_SHIFT - PAGE_SHIFT);
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+
+	/* big page (2M) range*/
+	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+			 << (PMD_SHIFT - PAGE_SHIFT);
+	end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+			 << (PUD_SHIFT - PAGE_SHIFT);
+	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
+		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+			page_size_mask & (1<<PG_LEVEL_2M));
+
+	/* big page (1G) range */
+	start_pfn = end_pfn;
+	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+				page_size_mask &
+				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+
+	/* tail is not big page (1G) alignment */
+	start_pfn = end_pfn;
+	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+			page_size_mask & (1<<PG_LEVEL_2M));
+
+	/* tail is not big page (2M) alignment */
+	start_pfn = end_pfn;
+	end_pfn = end>>PAGE_SHIFT;
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+
+	/* try to merge same page size and continuous */
+	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+		unsigned long old_start;
+		if (mr[i].end != mr[i+1].start ||
+		    mr[i].page_size_mask != mr[i+1].page_size_mask)
+			continue;
+		/* move it */
+		old_start = mr[i].start;
+		memmove(&mr[i], &mr[i+1],
+			 (nr_range - 1 - i) * sizeof (struct map_range));
+		mr[i].start = old_start;
+		nr_range--;
+	}
+
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+				mr[i].start, mr[i].end,
+			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+
+	if (!after_bootmem)
+		find_early_table_space(end);
+
+	for (i = 0; i < nr_range; i++)
+		last_map_addr = kernel_physical_mapping_init(
+					mr[i].start, mr[i].end,
+					mr[i].page_size_mask);
+
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();
 	__flush_tlb_all();
 
-	if (!after_bootmem)
+	if (!after_bootmem && table_end > table_start)
 		reserve_early(table_start << PAGE_SHIFT,
 				 table_end << PAGE_SHIFT, "PGTABLE");
 
+	printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
+			 last_map_addr, end);
+
 	if (!after_bootmem)
-		early_memtest(start_phys, end_phys);
+		early_memtest(start, end);
 
 	return last_map_addr >> PAGE_SHIFT;
 }
@@ -817,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
 void mark_rodata_ro(void)
 {
 	unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+	unsigned long rodata_start =
+		((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/* Dynamic tracing modifies the kernel text section */
+	start = rodata_start;
+#endif
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
@@ -826,8 +1007,7 @@ void mark_rodata_ro(void)
 	 * The rodata section (but not the kernel text!) should also be
 	 * not-executable.
 	 */
-	start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
-	set_memory_nx(start, (end - start) >> PAGE_SHIFT);
+	set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
 
 	rodata_test();
 
@@ -1036,9 +1216,6 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 						PAGE_KERNEL_LARGE);
 				set_pmd(pmd, __pmd(pte_val(entry)));
 
-				addr_end = addr + PMD_SIZE;
-				p_end = p + PMD_SIZE;
-
 				/* check to see if we have contiguous blocks */
 				if (p_end != p || node_start != node) {
 					if (p_start)
@@ -1048,6 +1225,9 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 					node_start = node;
 					p_start = p;
 				}
+
+				addr_end = addr + PMD_SIZE;
+				p_end = p + PMD_SIZE;
 			} else
 				vmemmap_verify((pte_t *)pmd, node, addr, next);
 		}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 115f13ee40c..24c1d3c3018 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/mmiotrace.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 {
 	unsigned long pfn, offset, vaddr;
 	resource_size_t last_addr;
+	const resource_size_t unaligned_phys_addr = phys_addr;
+	const unsigned long unaligned_size = size;
 	struct vm_struct *area;
 	unsigned long new_prot_val;
 	pgprot_t prot;
 	int retval;
+	void __iomem *ret_addr;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 		return NULL;
 	}
 
-	return (void __iomem *) (vaddr + offset);
+	ret_addr = (void __iomem *) (vaddr + offset);
+	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
+
+	return ret_addr;
 }
 
 /**
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr)
 	addr = (volatile void __iomem *)
 		(PAGE_MASK & (unsigned long __force)addr);
 
+	mmiotrace_iounmap(addr);
+
 	/* Use the vm area unlocked, assuming the caller
 	   ensures there isn't another iounmap for the same address
 	   in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 00000000000..93d82038af4
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
+/* Support for MMIO probes.
+ * Benfit many code from kprobes
+ * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
+ *     2007 Alexander Eichner
+ *     2008 Pekka Paalanen <pq@iki.fi>
+ */
+
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <linux/errno.h>
+#include <asm/debugreg.h>
+#include <linux/mmiotrace.h>
+
+#define KMMIO_PAGE_HASH_BITS 4
+#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
+
+struct kmmio_fault_page {
+	struct list_head list;
+	struct kmmio_fault_page *release_next;
+	unsigned long page; /* location of the fault page */
+
+	/*
+	 * Number of times this page has been registered as a part
+	 * of a probe. If zero, page is disarmed and this may be freed.
+	 * Used only by writers (RCU).
+	 */
+	int count;
+};
+
+struct kmmio_delayed_release {
+	struct rcu_head rcu;
+	struct kmmio_fault_page *release_list;
+};
+
+struct kmmio_context {
+	struct kmmio_fault_page *fpage;
+	struct kmmio_probe *probe;
+	unsigned long saved_flags;
+	unsigned long addr;
+	int active;
+};
+
+static DEFINE_SPINLOCK(kmmio_lock);
+
+/* Protected by kmmio_lock */
+unsigned int kmmio_count;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
+static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
+static LIST_HEAD(kmmio_probes);
+
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+
+/* Accessed per-cpu */
+static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
+
+/*
+ * this is basically a dynamic stabbing problem:
+ * Could use the existing prio tree code or
+ * Possible better implementations:
+ * The Interval Skip List: A Data Structure for Finding All Intervals That
+ * Overlap a Point (might be simple)
+ * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
+ */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
+static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
+{
+	struct kmmio_probe *p;
+	list_for_each_entry_rcu(p, &kmmio_probes, list) {
+		if (addr >= p->addr && addr <= (p->addr + p->len))
+			return p;
+	}
+	return NULL;
+}
+
+/* You must be holding RCU read lock. */
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+{
+	struct list_head *head;
+	struct kmmio_fault_page *p;
+
+	page &= PAGE_MASK;
+	head = kmmio_page_list(page);
+	list_for_each_entry_rcu(p, head, list) {
+		if (p->page == page)
+			return p;
+	}
+	return NULL;
+}
+
+static void set_page_present(unsigned long addr, bool present,
+							unsigned int *pglevel)
+{
+	pteval_t pteval;
+	pmdval_t pmdval;
+	unsigned int level;
+	pmd_t *pmd;
+	pte_t *pte = lookup_address(addr, &level);
+
+	if (!pte) {
+		pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+		return;
+	}
+
+	if (pglevel)
+		*pglevel = level;
+
+	switch (level) {
+	case PG_LEVEL_2M:
+		pmd = (pmd_t *)pte;
+		pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
+		if (present)
+			pmdval |= _PAGE_PRESENT;
+		set_pmd(pmd, __pmd(pmdval));
+		break;
+
+	case PG_LEVEL_4K:
+		pteval = pte_val(*pte) & ~_PAGE_PRESENT;
+		if (present)
+			pteval |= _PAGE_PRESENT;
+		set_pte_atomic(pte, __pte(pteval));
+		break;
+
+	default:
+		pr_err("kmmio: unexpected page level 0x%x.\n", level);
+		return;
+	}
+
+	__flush_tlb_one(addr);
+}
+
+/** Mark the given page as not present. Access to it will trigger a fault. */
+static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+	set_page_present(page & PAGE_MASK, false, pglevel);
+}
+
+/** Mark the given page as present. */
+static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+	set_page_present(page & PAGE_MASK, true, pglevel);
+}
+
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ */
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+	struct kmmio_context *ctx;
+	struct kmmio_fault_page *faultpage;
+	int ret = 0; /* default to fault not handled */
+
+	/*
+	 * Preemption is now disabled to prevent process switch during
+	 * single stepping. We can only handle one active kmmio trace
+	 * per cpu, so ensure that we finish it before something else
+	 * gets to run. We also hold the RCU read lock over single
+	 * stepping to avoid looking up the probe and kmmio_fault_page
+	 * again.
+	 */
+	preempt_disable();
+	rcu_read_lock();
+
+	faultpage = get_kmmio_fault_page(addr);
+	if (!faultpage) {
+		/*
+		 * Either this page fault is not caused by kmmio, or
+		 * another CPU just pulled the kmmio probe from under
+		 * our feet. The latter case should not be possible.
+		 */
+		goto no_kmmio;
+	}
+
+	ctx = &get_cpu_var(kmmio_ctx);
+	if (ctx->active) {
+		disarm_kmmio_fault_page(faultpage->page, NULL);
+		if (addr == ctx->addr) {
+			/*
+			 * On SMP we sometimes get recursive probe hits on the
+			 * same address. Context is already saved, fall out.
+			 */
+			pr_debug("kmmio: duplicate probe hit on CPU %d, for "
+						"address 0x%08lx.\n",
+						smp_processor_id(), addr);
+			ret = 1;
+			goto no_kmmio_ctx;
+		}
+		/*
+		 * Prevent overwriting already in-flight context.
+		 * This should not happen, let's hope disarming at least
+		 * prevents a panic.
+		 */
+		pr_emerg("kmmio: recursive probe hit on CPU %d, "
+					"for address 0x%08lx. Ignoring.\n",
+					smp_processor_id(), addr);
+		pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
+					ctx->addr);
+		goto no_kmmio_ctx;
+	}
+	ctx->active++;
+
+	ctx->fpage = faultpage;
+	ctx->probe = get_kmmio_probe(addr);
+	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
+	ctx->addr = addr;
+
+	if (ctx->probe && ctx->probe->pre_handler)
+		ctx->probe->pre_handler(ctx->probe, regs, addr);
+
+	/*
+	 * Enable single-stepping and disable interrupts for the faulting
+	 * context. Local interrupts must not get enabled during stepping.
+	 */
+	regs->flags |= X86_EFLAGS_TF;
+	regs->flags &= ~X86_EFLAGS_IF;
+
+	/* Now we set present bit in PTE and single step. */
+	disarm_kmmio_fault_page(ctx->fpage->page, NULL);
+
+	/*
+	 * If another cpu accesses the same page while we are stepping,
+	 * the access will not be caught. It will simply succeed and the
+	 * only downside is we lose the event. If this becomes a problem,
+	 * the user should drop to single cpu before tracing.
+	 */
+
+	put_cpu_var(kmmio_ctx);
+	return 1; /* fault handled */
+
+no_kmmio_ctx:
+	put_cpu_var(kmmio_ctx);
+no_kmmio:
+	rcu_read_unlock();
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ * This must always get called as the pair to kmmio_handler().
+ */
+static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
+{
+	int ret = 0;
+	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+
+	if (!ctx->active) {
+		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+							smp_processor_id());
+		goto out;
+	}
+
+	if (ctx->probe && ctx->probe->post_handler)
+		ctx->probe->post_handler(ctx->probe, condition, regs);
+
+	arm_kmmio_fault_page(ctx->fpage->page, NULL);
+
+	regs->flags &= ~X86_EFLAGS_TF;
+	regs->flags |= ctx->saved_flags;
+
+	/* These were acquired in kmmio_handler(). */
+	ctx->active--;
+	BUG_ON(ctx->active);
+	rcu_read_unlock();
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, flags
+	 * will have TF set, in which case, continue the remaining processing
+	 * of do_debug, as if this is not a probe hit.
+	 */
+	if (!(regs->flags & X86_EFLAGS_TF))
+		ret = 1;
+out:
+	put_cpu_var(kmmio_ctx);
+	return ret;
+}
+
+/* You must be holding kmmio_lock. */
+static int add_kmmio_fault_page(unsigned long page)
+{
+	struct kmmio_fault_page *f;
+
+	page &= PAGE_MASK;
+	f = get_kmmio_fault_page(page);
+	if (f) {
+		if (!f->count)
+			arm_kmmio_fault_page(f->page, NULL);
+		f->count++;
+		return 0;
+	}
+
+	f = kmalloc(sizeof(*f), GFP_ATOMIC);
+	if (!f)
+		return -1;
+
+	f->count = 1;
+	f->page = page;
+	list_add_rcu(&f->list, kmmio_page_list(f->page));
+
+	arm_kmmio_fault_page(f->page, NULL);
+
+	return 0;
+}
+
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+				struct kmmio_fault_page **release_list)
+{
+	struct kmmio_fault_page *f;
+
+	page &= PAGE_MASK;
+	f = get_kmmio_fault_page(page);
+	if (!f)
+		return;
+
+	f->count--;
+	BUG_ON(f->count < 0);
+	if (!f->count) {
+		disarm_kmmio_fault_page(f->page, NULL);
+		f->release_next = *release_list;
+		*release_list = f;
+	}
+}
+
+/*
+ * With page-unaligned ioremaps, one or two armed pages may contain
+ * addresses from outside the intended mapping. Events for these addresses
+ * are currently silently dropped. The events may result only from programming
+ * mistakes by accessing addresses before the beginning or past the end of a
+ * mapping.
+ */
+int register_kmmio_probe(struct kmmio_probe *p)
+{
+	unsigned long flags;
+	int ret = 0;
+	unsigned long size = 0;
+	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+
+	spin_lock_irqsave(&kmmio_lock, flags);
+	if (get_kmmio_probe(p->addr)) {
+		ret = -EEXIST;
+		goto out;
+	}
+	kmmio_count++;
+	list_add_rcu(&p->list, &kmmio_probes);
+	while (size < size_lim) {
+		if (add_kmmio_fault_page(p->addr + size))
+			pr_err("kmmio: Unable to set page fault.\n");
+		size += PAGE_SIZE;
+	}
+out:
+	spin_unlock_irqrestore(&kmmio_lock, flags);
+	/*
+	 * XXX: What should I do here?
+	 * Here was a call to global_flush_tlb(), but it does not exist
+	 * anymore. It seems it's not needed after all.
+	 */
+	return ret;
+}
+EXPORT_SYMBOL(register_kmmio_probe);
+
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+	struct kmmio_delayed_release *dr = container_of(
+						head,
+						struct kmmio_delayed_release,
+						rcu);
+	struct kmmio_fault_page *p = dr->release_list;
+	while (p) {
+		struct kmmio_fault_page *next = p->release_next;
+		BUG_ON(p->count);
+		kfree(p);
+		p = next;
+	}
+	kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+	struct kmmio_delayed_release *dr = container_of(
+						head,
+						struct kmmio_delayed_release,
+						rcu);
+	struct kmmio_fault_page *p = dr->release_list;
+	struct kmmio_fault_page **prevp = &dr->release_list;
+	unsigned long flags;
+	spin_lock_irqsave(&kmmio_lock, flags);
+	while (p) {
+		if (!p->count)
+			list_del_rcu(&p->list);
+		else
+			*prevp = p->release_next;
+		prevp = &p->release_next;
+		p = p->release_next;
+	}
+	spin_unlock_irqrestore(&kmmio_lock, flags);
+	/* This is the real RCU destroy call. */
+	call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore. Only after that
+ * you may actually release your struct kmmio_probe.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ *    Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ *    Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ *    Actally free the kmmio_fault_page structs as with RCU.
+ */
+void unregister_kmmio_probe(struct kmmio_probe *p)
+{
+	unsigned long flags;
+	unsigned long size = 0;
+	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+	struct kmmio_fault_page *release_list = NULL;
+	struct kmmio_delayed_release *drelease;
+
+	spin_lock_irqsave(&kmmio_lock, flags);
+	while (size < size_lim) {
+		release_kmmio_fault_page(p->addr + size, &release_list);
+		size += PAGE_SIZE;
+	}
+	list_del_rcu(&p->list);
+	kmmio_count--;
+	spin_unlock_irqrestore(&kmmio_lock, flags);
+
+	drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+	if (!drelease) {
+		pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+		return;
+	}
+	drelease->release_list = release_list;
+
+	/*
+	 * This is not really RCU here. We have just disarmed a set of
+	 * pages so that they cannot trigger page faults anymore. However,
+	 * we cannot remove the pages from kmmio_page_table,
+	 * because a probe hit might be in flight on another CPU. The
+	 * pages are collected into a list, and they will be removed from
+	 * kmmio_page_table when it is certain that no probe hit related to
+	 * these pages can be in flight. RCU grace period sounds like a
+	 * good choice.
+	 *
+	 * If we removed the pages too early, kmmio page fault handler might
+	 * not find the respective kmmio_fault_page and determine it's not
+	 * a kmmio fault, when it actually is. This would lead to madness.
+	 */
+	call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
+}
+EXPORT_SYMBOL(unregister_kmmio_probe);
+
+static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
+								void *args)
+{
+	struct die_args *arg = args;
+
+	if (val == DIE_DEBUG && (arg->err & DR_STEP))
+		if (post_kmmio_handler(arg->err, arg->regs) == 1)
+			return NOTIFY_STOP;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_die = {
+	.notifier_call = kmmio_die_notifier
+};
+
+static int __init init_kmmio(void)
+{
+	int i;
+	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+		INIT_LIST_HEAD(&kmmio_page_table[i]);
+	return register_die_notifier(&nb_die);
+}
+fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 00000000000..e7397e108be
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2005
+ *               Jeff Muizelaar, 2006, 2007
+ *               Pekka Paalanen, 2008 <pq@iki.fi>
+ *
+ * Derived from the read-mod example from relay-examples by Tom Zanussi.
+ */
+#define DEBUG 1
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <asm/pgtable.h>
+#include <linux/mmiotrace.h>
+#include <asm/e820.h> /* for ISA_START_ADDRESS */
+#include <asm/atomic.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include "pf_in.h"
+
+#define NAME "mmiotrace: "
+
+struct trap_reason {
+	unsigned long addr;
+	unsigned long ip;
+	enum reason_type type;
+	int active_traces;
+};
+
+struct remap_trace {
+	struct list_head list;
+	struct kmmio_probe probe;
+	resource_size_t phys;
+	unsigned long id;
+};
+
+/* Accessed per-cpu. */
+static DEFINE_PER_CPU(struct trap_reason, pf_reason);
+static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
+
+#if 0 /* XXX: no way gather this info anymore */
+/* Access to this is not per-cpu. */
+static DEFINE_PER_CPU(atomic_t, dropped);
+#endif
+
+static struct dentry *marker_file;
+
+static DEFINE_MUTEX(mmiotrace_mutex);
+static DEFINE_SPINLOCK(trace_lock);
+static atomic_t mmiotrace_enabled;
+static LIST_HEAD(trace_list);		/* struct remap_trace */
+
+/*
+ * Locking in this file:
+ * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
+ * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
+ *   and trace_lock.
+ * - Routines depending on is_enabled() must take trace_lock.
+ * - trace_list users must hold trace_lock.
+ * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - pre/post callbacks assume the effect of is_enabled() being true.
+ */
+
+/* module parameters */
+static unsigned long	filter_offset;
+static int		nommiotrace;
+static int		trace_pc;
+
+module_param(filter_offset, ulong, 0);
+module_param(nommiotrace, bool, 0);
+module_param(trace_pc, bool, 0);
+
+MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
+MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
+MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
+
+static bool is_enabled(void)
+{
+	return atomic_read(&mmiotrace_enabled);
+}
+
+#if 0 /* XXX: needs rewrite */
+/*
+ * Write callback for the debugfs entry:
+ * Read a marker and write it to the mmio trace log
+ */
+static ssize_t write_marker(struct file *file, const char __user *buffer,
+						size_t count, loff_t *ppos)
+{
+	char *event = NULL;
+	struct mm_io_header *headp;
+	ssize_t len = (count > 65535) ? 65535 : count;
+
+	event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
+	if (!event)
+		return -ENOMEM;
+
+	headp = (struct mm_io_header *)event;
+	headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
+	headp->data_len = len;
+
+	if (copy_from_user(event + sizeof(*headp), buffer, len)) {
+		kfree(event);
+		return -EFAULT;
+	}
+
+	spin_lock_irq(&trace_lock);
+#if 0 /* XXX: convert this to use tracing */
+	if (is_enabled())
+		relay_write(chan, event, sizeof(*headp) + len);
+	else
+#endif
+		len = -EINVAL;
+	spin_unlock_irq(&trace_lock);
+	kfree(event);
+	return len;
+}
+#endif
+
+static void print_pte(unsigned long address)
+{
+	unsigned int level;
+	pte_t *pte = lookup_address(address, &level);
+
+	if (!pte) {
+		pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
+							__func__, address);
+		return;
+	}
+
+	if (level == PG_LEVEL_2M) {
+		pr_emerg(NAME "4MB pages are not currently supported: "
+							"0x%08lx\n", address);
+		BUG();
+	}
+	pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
+		(unsigned long long)pte_val(*pte),
+		(unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
+}
+
+/*
+ * For some reason the pre/post pairs have been called in an
+ * unmatched order. Report and die.
+ */
+static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
+{
+	const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+	pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
+					"last fault for address: 0x%08lx\n",
+					addr, my_reason->addr);
+	print_pte(addr);
+	print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
+	print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
+#ifdef __i386__
+	pr_emerg("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+			regs->ax, regs->bx, regs->cx, regs->dx);
+	pr_emerg("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+			regs->si, regs->di, regs->bp, regs->sp);
+#else
+	pr_emerg("rax: %016lx   rcx: %016lx   rdx: %016lx\n",
+					regs->ax, regs->cx, regs->dx);
+	pr_emerg("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
+				regs->si, regs->di, regs->bp, regs->sp);
+#endif
+	put_cpu_var(pf_reason);
+	BUG();
+}
+
+static void pre(struct kmmio_probe *p, struct pt_regs *regs,
+						unsigned long addr)
+{
+	struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+	struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+	const unsigned long instptr = instruction_pointer(regs);
+	const enum reason_type type = get_ins_type(instptr);
+	struct remap_trace *trace = p->private;
+
+	/* it doesn't make sense to have more than one active trace per cpu */
+	if (my_reason->active_traces)
+		die_kmmio_nesting_error(regs, addr);
+	else
+		my_reason->active_traces++;
+
+	my_reason->type = type;
+	my_reason->addr = addr;
+	my_reason->ip = instptr;
+
+	my_trace->phys = addr - trace->probe.addr + trace->phys;
+	my_trace->map_id = trace->id;
+
+	/*
+	 * Only record the program counter when requested.
+	 * It may taint clean-room reverse engineering.
+	 */
+	if (trace_pc)
+		my_trace->pc = instptr;
+	else
+		my_trace->pc = 0;
+
+	/*
+	 * XXX: the timestamp recorded will be *after* the tracing has been
+	 * done, not at the time we hit the instruction. SMP implications
+	 * on event ordering?
+	 */
+
+	switch (type) {
+	case REG_READ:
+		my_trace->opcode = MMIO_READ;
+		my_trace->width = get_ins_mem_width(instptr);
+		break;
+	case REG_WRITE:
+		my_trace->opcode = MMIO_WRITE;
+		my_trace->width = get_ins_mem_width(instptr);
+		my_trace->value = get_ins_reg_val(instptr, regs);
+		break;
+	case IMM_WRITE:
+		my_trace->opcode = MMIO_WRITE;
+		my_trace->width = get_ins_mem_width(instptr);
+		my_trace->value = get_ins_imm_val(instptr);
+		break;
+	default:
+		{
+			unsigned char *ip = (unsigned char *)instptr;
+			my_trace->opcode = MMIO_UNKNOWN_OP;
+			my_trace->width = 0;
+			my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
+								*(ip + 2);
+		}
+	}
+	put_cpu_var(cpu_trace);
+	put_cpu_var(pf_reason);
+}
+
+static void post(struct kmmio_probe *p, unsigned long condition,
+							struct pt_regs *regs)
+{
+	struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+	struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+
+	/* this should always return the active_trace count to 0 */
+	my_reason->active_traces--;
+	if (my_reason->active_traces) {
+		pr_emerg(NAME "unexpected post handler");
+		BUG();
+	}
+
+	switch (my_reason->type) {
+	case REG_READ:
+		my_trace->value = get_ins_reg_val(my_reason->ip, regs);
+		break;
+	default:
+		break;
+	}
+
+	mmio_trace_rw(my_trace);
+	put_cpu_var(cpu_trace);
+	put_cpu_var(pf_reason);
+}
+
+static void ioremap_trace_core(resource_size_t offset, unsigned long size,
+							void __iomem *addr)
+{
+	static atomic_t next_id;
+	struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
+	/* These are page-unaligned. */
+	struct mmiotrace_map map = {
+		.phys = offset,
+		.virt = (unsigned long)addr,
+		.len = size,
+		.opcode = MMIO_PROBE
+	};
+
+	if (!trace) {
+		pr_err(NAME "kmalloc failed in ioremap\n");
+		return;
+	}
+
+	*trace = (struct remap_trace) {
+		.probe = {
+			.addr = (unsigned long)addr,
+			.len = size,
+			.pre_handler = pre,
+			.post_handler = post,
+			.private = trace
+		},
+		.phys = offset,
+		.id = atomic_inc_return(&next_id)
+	};
+	map.map_id = trace->id;
+
+	spin_lock_irq(&trace_lock);
+	if (!is_enabled())
+		goto not_enabled;
+
+	mmio_trace_mapping(&map);
+	list_add_tail(&trace->list, &trace_list);
+	if (!nommiotrace)
+		register_kmmio_probe(&trace->probe);
+
+not_enabled:
+	spin_unlock_irq(&trace_lock);
+}
+
+void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
+						void __iomem *addr)
+{
+	if (!is_enabled()) /* recheck and proper locking in *_core() */
+		return;
+
+	pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
+				(unsigned long long)offset, size, addr);
+	if ((filter_offset) && (offset != filter_offset))
+		return;
+	ioremap_trace_core(offset, size, addr);
+}
+
+static void iounmap_trace_core(volatile void __iomem *addr)
+{
+	struct mmiotrace_map map = {
+		.phys = 0,
+		.virt = (unsigned long)addr,
+		.len = 0,
+		.opcode = MMIO_UNPROBE
+	};
+	struct remap_trace *trace;
+	struct remap_trace *tmp;
+	struct remap_trace *found_trace = NULL;
+
+	pr_debug(NAME "Unmapping %p.\n", addr);
+
+	spin_lock_irq(&trace_lock);
+	if (!is_enabled())
+		goto not_enabled;
+
+	list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+		if ((unsigned long)addr == trace->probe.addr) {
+			if (!nommiotrace)
+				unregister_kmmio_probe(&trace->probe);
+			list_del(&trace->list);
+			found_trace = trace;
+			break;
+		}
+	}
+	map.map_id = (found_trace) ? found_trace->id : -1;
+	mmio_trace_mapping(&map);
+
+not_enabled:
+	spin_unlock_irq(&trace_lock);
+	if (found_trace) {
+		synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+		kfree(found_trace);
+	}
+}
+
+void mmiotrace_iounmap(volatile void __iomem *addr)
+{
+	might_sleep();
+	if (is_enabled()) /* recheck and proper locking in *_core() */
+		iounmap_trace_core(addr);
+}
+
+static void clear_trace_list(void)
+{
+	struct remap_trace *trace;
+	struct remap_trace *tmp;
+
+	/*
+	 * No locking required, because the caller ensures we are in a
+	 * critical section via mutex, and is_enabled() is false,
+	 * i.e. nothing can traverse or modify this list.
+	 * Caller also ensures is_enabled() cannot change.
+	 */
+	list_for_each_entry(trace, &trace_list, list) {
+		pr_notice(NAME "purging non-iounmapped "
+					"trace @0x%08lx, size 0x%lx.\n",
+					trace->probe.addr, trace->probe.len);
+		if (!nommiotrace)
+			unregister_kmmio_probe(&trace->probe);
+	}
+	synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+
+	list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+		list_del(&trace->list);
+		kfree(trace);
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static cpumask_t downed_cpus;
+
+static void enter_uniprocessor(void)
+{
+	int cpu;
+	int err;
+
+	get_online_cpus();
+	downed_cpus = cpu_online_map;
+	cpu_clear(first_cpu(cpu_online_map), downed_cpus);
+	if (num_online_cpus() > 1)
+		pr_notice(NAME "Disabling non-boot CPUs...\n");
+	put_online_cpus();
+
+	for_each_cpu_mask(cpu, downed_cpus) {
+		err = cpu_down(cpu);
+		if (!err)
+			pr_info(NAME "CPU%d is down.\n", cpu);
+		else
+			pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+	}
+	if (num_online_cpus() > 1)
+		pr_warning(NAME "multiple CPUs still online, "
+						"may miss events.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+	int cpu;
+	int err;
+
+	if (cpus_weight(downed_cpus) == 0)
+		return;
+	pr_notice(NAME "Re-enabling CPUs...\n");
+	for_each_cpu_mask(cpu, downed_cpus) {
+		err = cpu_up(cpu);
+		if (!err)
+			pr_info(NAME "enabled CPU%d.\n", cpu);
+		else
+			pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
+	}
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static void enter_uniprocessor(void)
+{
+	if (num_online_cpus() > 1)
+		pr_warning(NAME "multiple CPUs are online, may miss events. "
+			"Suggest booting with maxcpus=1 kernel argument.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+}
+#endif
+
+#if 0 /* XXX: out of order */
+static struct file_operations fops_marker = {
+	.owner =	THIS_MODULE,
+	.write =	write_marker
+};
+#endif
+
+void enable_mmiotrace(void)
+{
+	mutex_lock(&mmiotrace_mutex);
+	if (is_enabled())
+		goto out;
+
+#if 0 /* XXX: tracing does not support text entries */
+	marker_file = debugfs_create_file("marker", 0660, dir, NULL,
+								&fops_marker);
+	if (!marker_file)
+		pr_err(NAME "marker file creation failed.\n");
+#endif
+
+	if (nommiotrace)
+		pr_info(NAME "MMIO tracing disabled.\n");
+	enter_uniprocessor();
+	spin_lock_irq(&trace_lock);
+	atomic_inc(&mmiotrace_enabled);
+	spin_unlock_irq(&trace_lock);
+	pr_info(NAME "enabled.\n");
+out:
+	mutex_unlock(&mmiotrace_mutex);
+}
+
+void disable_mmiotrace(void)
+{
+	mutex_lock(&mmiotrace_mutex);
+	if (!is_enabled())
+		goto out;
+
+	spin_lock_irq(&trace_lock);
+	atomic_dec(&mmiotrace_enabled);
+	BUG_ON(is_enabled());
+	spin_unlock_irq(&trace_lock);
+
+	clear_trace_list(); /* guarantees: no more kmmio callbacks */
+	leave_uniprocessor();
+	if (marker_file) {
+		debugfs_remove(marker_file);
+		marker_file = NULL;
+	}
+
+	pr_info(NAME "disabled.\n");
+out:
+	mutex_unlock(&mmiotrace_mutex);
+}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index afd40054d15..65c6e46bf05 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -141,7 +141,7 @@ static void cpa_flush_all(unsigned long cache)
 {
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
+	on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
 
 static void __cpa_flush_range(void *arg)
@@ -162,7 +162,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	BUG_ON(irqs_disabled());
 	WARN_ON(PAGE_ALIGN(start) != start);
 
-	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+	on_each_cpu(__cpa_flush_range, NULL, 1);
 
 	if (!cache)
 		return;
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
 
 	return pte_offset_kernel(pmd, address);
 }
+EXPORT_SYMBOL_GPL(lookup_address);
 
 /*
  * Set the new pmd in all the pgds we know about:
@@ -536,8 +537,14 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	if (address >= (unsigned long)__va(0) &&
+		address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+		split_page_count(level);
+
+#ifdef CONFIG_X86_64
+	if (address >= (unsigned long)__va(1UL<<32) &&
 		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
 		split_page_count(level);
+#endif
 
 	/*
 	 * Install the new, split up pagetable. Important details here:
@@ -652,15 +659,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	struct cpa_data alias_cpa;
 	int ret = 0;
 
-	if (cpa->pfn > max_pfn_mapped)
+	if (cpa->pfn >= max_pfn_mapped)
 		return 0;
 
+#ifdef CONFIG_X86_64
+	if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+		return 0;
+#endif
 	/*
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (!within(cpa->vaddr, PAGE_OFFSET,
-		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+	if (!(within(cpa->vaddr, PAGE_OFFSET,
+		    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+		|| within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+	)) {
 
 		alias_cpa = *cpa;
 		alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index a885a1019b8..d4585077977 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 	if (retval < 0)
 		return 0;
 
-	if (pfn <= max_pfn_mapped &&
+	if (((pfn < max_low_pfn_mapped) ||
+	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
 		printk(KERN_INFO
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 00000000000..efa1911e20c
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
+/*
+ *  Fault Injection Test harness (FI)
+ *  Copyright (C) Intel Crop.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ *  USA.
+ *
+ */
+
+/*  Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
+ *  Copyright by Intel Crop., 2002
+ *  Louis Zhuang (louis.zhuang@intel.com)
+ *
+ *  Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
+ */
+
+#include <linux/module.h>
+#include <linux/ptrace.h> /* struct pt_regs */
+#include "pf_in.h"
+
+#ifdef __i386__
+/* IA32 Manual 3, 2-1 */
+static unsigned char prefix_codes[] = {
+	0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
+	0x65, 0x2E, 0x3E, 0x66, 0x67
+};
+/* IA32 Manual 3, 3-432*/
+static unsigned int reg_rop[] = {
+	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+/* IA32 Manual 3, 3-432*/
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw32[] = {
+	0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw64[] = {};
+#else /* not __i386__ */
+static unsigned char prefix_codes[] = {
+	0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
+	0xF0, 0xF3, 0xF2,
+	/* REX Prefixes */
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
+};
+/* AMD64 Manual 3, Appendix A*/
+static unsigned int reg_rop[] = {
+	0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw32[] = {
+	0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+/* 8 bit only */
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+/* 16 bit only */
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+/* 16 or 32 bit */
+static unsigned int mw32[] = { 0xC7 };
+/* 16, 32 or 64 bit */
+static unsigned int mw64[] = { 0x89, 0x8B };
+#endif /* not __i386__ */
+
+static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
+								int *rexr)
+{
+	int i;
+	unsigned char *p = addr;
+	*shorted = 0;
+	*enlarged = 0;
+	*rexr = 0;
+
+restart:
+	for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
+		if (*p == prefix_codes[i]) {
+			if (*p == 0x66)
+				*shorted = 1;
+#ifdef __amd64__
+			if ((*p & 0xf8) == 0x48)
+				*enlarged = 1;
+			if ((*p & 0xf4) == 0x44)
+				*rexr = 1;
+#endif
+			p++;
+			goto restart;
+		}
+	}
+
+	return (p - addr);
+}
+
+static int get_opcode(unsigned char *addr, unsigned int *opcode)
+{
+	int len;
+
+	if (*addr == 0x0F) {
+		/* 0x0F is extension instruction */
+		*opcode = *(unsigned short *)addr;
+		len = 2;
+	} else {
+		*opcode = *addr;
+		len = 1;
+	}
+
+	return len;
+}
+
+#define CHECK_OP_TYPE(opcode, array, type) \
+	for (i = 0; i < ARRAY_SIZE(array); i++) { \
+		if (array[i] == opcode) { \
+			rv = type; \
+			goto exit; \
+		} \
+	}
+
+enum reason_type get_ins_type(unsigned long ins_addr)
+{
+	unsigned int opcode;
+	unsigned char *p;
+	int shorted, enlarged, rexr;
+	int i;
+	enum reason_type rv = OTHERS;
+
+	p = (unsigned char *)ins_addr;
+	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += get_opcode(p, &opcode);
+
+	CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
+	CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
+	CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
+
+exit:
+	return rv;
+}
+#undef CHECK_OP_TYPE
+
+static unsigned int get_ins_reg_width(unsigned long ins_addr)
+{
+	unsigned int opcode;
+	unsigned char *p;
+	int i, shorted, enlarged, rexr;
+
+	p = (unsigned char *)ins_addr;
+	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += get_opcode(p, &opcode);
+
+	for (i = 0; i < ARRAY_SIZE(rw8); i++)
+		if (rw8[i] == opcode)
+			return 1;
+
+	for (i = 0; i < ARRAY_SIZE(rw32); i++)
+		if (rw32[i] == opcode)
+			return (shorted ? 2 : (enlarged ? 8 : 4));
+
+	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+	return 0;
+}
+
+unsigned int get_ins_mem_width(unsigned long ins_addr)
+{
+	unsigned int opcode;
+	unsigned char *p;
+	int i, shorted, enlarged, rexr;
+
+	p = (unsigned char *)ins_addr;
+	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += get_opcode(p, &opcode);
+
+	for (i = 0; i < ARRAY_SIZE(mw8); i++)
+		if (mw8[i] == opcode)
+			return 1;
+
+	for (i = 0; i < ARRAY_SIZE(mw16); i++)
+		if (mw16[i] == opcode)
+			return 2;
+
+	for (i = 0; i < ARRAY_SIZE(mw32); i++)
+		if (mw32[i] == opcode)
+			return shorted ? 2 : 4;
+
+	for (i = 0; i < ARRAY_SIZE(mw64); i++)
+		if (mw64[i] == opcode)
+			return shorted ? 2 : (enlarged ? 8 : 4);
+
+	printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+	return 0;
+}
+
+/*
+ * Define register ident in mod/rm byte.
+ * Note: these are NOT the same as in ptrace-abi.h.
+ */
+enum {
+	arg_AL = 0,
+	arg_CL = 1,
+	arg_DL = 2,
+	arg_BL = 3,
+	arg_AH = 4,
+	arg_CH = 5,
+	arg_DH = 6,
+	arg_BH = 7,
+
+	arg_AX = 0,
+	arg_CX = 1,
+	arg_DX = 2,
+	arg_BX = 3,
+	arg_SP = 4,
+	arg_BP = 5,
+	arg_SI = 6,
+	arg_DI = 7,
+#ifdef __amd64__
+	arg_R8  = 8,
+	arg_R9  = 9,
+	arg_R10 = 10,
+	arg_R11 = 11,
+	arg_R12 = 12,
+	arg_R13 = 13,
+	arg_R14 = 14,
+	arg_R15 = 15
+#endif
+};
+
+static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+{
+	unsigned char *rv = NULL;
+
+	switch (no) {
+	case arg_AL:
+		rv = (unsigned char *)&regs->ax;
+		break;
+	case arg_BL:
+		rv = (unsigned char *)&regs->bx;
+		break;
+	case arg_CL:
+		rv = (unsigned char *)&regs->cx;
+		break;
+	case arg_DL:
+		rv = (unsigned char *)&regs->dx;
+		break;
+	case arg_AH:
+		rv = 1 + (unsigned char *)&regs->ax;
+		break;
+	case arg_BH:
+		rv = 1 + (unsigned char *)&regs->bx;
+		break;
+	case arg_CH:
+		rv = 1 + (unsigned char *)&regs->cx;
+		break;
+	case arg_DH:
+		rv = 1 + (unsigned char *)&regs->dx;
+		break;
+#ifdef __amd64__
+	case arg_R8:
+		rv = (unsigned char *)&regs->r8;
+		break;
+	case arg_R9:
+		rv = (unsigned char *)&regs->r9;
+		break;
+	case arg_R10:
+		rv = (unsigned char *)&regs->r10;
+		break;
+	case arg_R11:
+		rv = (unsigned char *)&regs->r11;
+		break;
+	case arg_R12:
+		rv = (unsigned char *)&regs->r12;
+		break;
+	case arg_R13:
+		rv = (unsigned char *)&regs->r13;
+		break;
+	case arg_R14:
+		rv = (unsigned char *)&regs->r14;
+		break;
+	case arg_R15:
+		rv = (unsigned char *)&regs->r15;
+		break;
+#endif
+	default:
+		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+		break;
+	}
+	return rv;
+}
+
+static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
+{
+	unsigned long *rv = NULL;
+
+	switch (no) {
+	case arg_AX:
+		rv = &regs->ax;
+		break;
+	case arg_BX:
+		rv = &regs->bx;
+		break;
+	case arg_CX:
+		rv = &regs->cx;
+		break;
+	case arg_DX:
+		rv = &regs->dx;
+		break;
+	case arg_SP:
+		rv = &regs->sp;
+		break;
+	case arg_BP:
+		rv = &regs->bp;
+		break;
+	case arg_SI:
+		rv = &regs->si;
+		break;
+	case arg_DI:
+		rv = &regs->di;
+		break;
+#ifdef __amd64__
+	case arg_R8:
+		rv = &regs->r8;
+		break;
+	case arg_R9:
+		rv = &regs->r9;
+		break;
+	case arg_R10:
+		rv = &regs->r10;
+		break;
+	case arg_R11:
+		rv = &regs->r11;
+		break;
+	case arg_R12:
+		rv = &regs->r12;
+		break;
+	case arg_R13:
+		rv = &regs->r13;
+		break;
+	case arg_R14:
+		rv = &regs->r14;
+		break;
+	case arg_R15:
+		rv = &regs->r15;
+		break;
+#endif
+	default:
+		printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+	}
+
+	return rv;
+}
+
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
+{
+	unsigned int opcode;
+	unsigned char mod_rm;
+	int reg;
+	unsigned char *p;
+	int i, shorted, enlarged, rexr;
+	unsigned long rv;
+
+	p = (unsigned char *)ins_addr;
+	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += get_opcode(p, &opcode);
+	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
+		if (reg_rop[i] == opcode) {
+			rv = REG_READ;
+			goto do_work;
+		}
+
+	for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
+		if (reg_wop[i] == opcode) {
+			rv = REG_WRITE;
+			goto do_work;
+		}
+
+	printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
+							"0x%02x\n", opcode);
+	goto err;
+
+do_work:
+	mod_rm = *p;
+	reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+	switch (get_ins_reg_width(ins_addr)) {
+	case 1:
+		return *get_reg_w8(reg, regs);
+
+	case 2:
+		return *(unsigned short *)get_reg_w32(reg, regs);
+
+	case 4:
+		return *(unsigned int *)get_reg_w32(reg, regs);
+
+#ifdef __amd64__
+	case 8:
+		return *(unsigned long *)get_reg_w32(reg, regs);
+#endif
+
+	default:
+		printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
+	}
+
+err:
+	return 0;
+}
+
+unsigned long get_ins_imm_val(unsigned long ins_addr)
+{
+	unsigned int opcode;
+	unsigned char mod_rm;
+	unsigned char mod;
+	unsigned char *p;
+	int i, shorted, enlarged, rexr;
+	unsigned long rv;
+
+	p = (unsigned char *)ins_addr;
+	p += skip_prefix(p, &shorted, &enlarged, &rexr);
+	p += get_opcode(p, &opcode);
+	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
+		if (imm_wop[i] == opcode) {
+			rv = IMM_WRITE;
+			goto do_work;
+		}
+
+	printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
+							"0x%02x\n", opcode);
+	goto err;
+
+do_work:
+	mod_rm = *p;
+	mod = mod_rm >> 6;
+	p++;
+	switch (mod) {
+	case 0:
+		/* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2)  */
+		/* AMD64: XXX Check for address size prefix? */
+		if ((mod_rm & 0x7) == 0x5)
+			p += 4;
+		break;
+
+	case 1:
+		p += 1;
+		break;
+
+	case 2:
+		p += 4;
+		break;
+
+	case 3:
+	default:
+		printk(KERN_ERR "mmiotrace: not a memory access instruction "
+						"at 0x%lx, rm_mod=0x%02x\n",
+						ins_addr, mod_rm);
+	}
+
+	switch (get_ins_reg_width(ins_addr)) {
+	case 1:
+		return *(unsigned char *)p;
+
+	case 2:
+		return *(unsigned short *)p;
+
+	case 4:
+		return *(unsigned int *)p;
+
+#ifdef __amd64__
+	case 8:
+		return *(unsigned long *)p;
+#endif
+
+	default:
+		printk(KERN_ERR "mmiotrace: Error: width.\n");
+	}
+
+err:
+	return 0;
+}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 00000000000..e05341a51a2
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
+/*
+ *  Fault Injection Test harness (FI)
+ *  Copyright (C) Intel Crop.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ *  USA.
+ *
+ */
+
+#ifndef __PF_H_
+#define __PF_H_
+
+enum reason_type {
+	NOT_ME,	/* page fault is not in regions */
+	NOTHING,	/* access others point in regions */
+	REG_READ,	/* read from addr to reg */
+	REG_WRITE,	/* write from reg to addr */
+	IMM_WRITE,	/* write from imm to addr */
+	OTHERS	/* Other instructions can not intercept */
+};
+
+enum reason_type get_ins_type(unsigned long ins_addr);
+unsigned int get_ins_mem_width(unsigned long ins_addr);
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
+unsigned long get_ins_imm_val(unsigned long ins_addr);
+
+#endif /* __PF_H_ */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 828907d001e..b4becbf8c57 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -141,7 +141,6 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 	__flush_tlb_one(vaddr);
 }
 
-static int fixmaps;
 unsigned long __FIXADDR_TOP = 0xfffff000;
 EXPORT_SYMBOL(__FIXADDR_TOP);
 
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index f41d67f8f83..1eb2973a301 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
 
 	num_memory_chunks++;
 
-	printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
+	printk(KERN_DEBUG "Memory range %08lx to %08lx"
 			  " in proximity domain %02x %s\n",
 		start_pfn, end_pfn,
-		memory_affinity->memory_type,
 		pxm,
 		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
 		 "enabled and removable" : "enabled" ) );
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 0fd67b81a8b..1b4763e26ea 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -100,7 +100,19 @@ static __init inline int srat_disabled(void)
 /* Callback for SLIT parsing */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
-	acpi_slit = slit;
+	unsigned length;
+	unsigned long phys;
+
+	length = slit->header.length;
+	phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
+		 PAGE_SIZE);
+
+	if (phys == -1L)
+		panic(" Can not save slit!\n");
+
+	acpi_slit = __va(phys);
+	memcpy(acpi_slit, slit, length);
+	reserve_early(phys, phys + length, "ACPI SLIT");
 }
 
 /* Callback for Proximity Domain -> LAPIC mapping */
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 00000000000..d877c5b423e
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
+/*
+ * Written by Pekka Paalanen, 2008 <pq@iki.fi>
+ */
+#include <linux/module.h>
+#include <linux/io.h>
+
+#define MODULE_NAME "testmmiotrace"
+
+static unsigned long mmio_address;
+module_param(mmio_address, ulong, 0);
+MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
+
+static void do_write_test(void __iomem *p)
+{
+	unsigned int i;
+	for (i = 0; i < 256; i++)
+		iowrite8(i, p + i);
+	for (i = 1024; i < (5 * 1024); i += 2)
+		iowrite16(i * 12 + 7, p + i);
+	for (i = (5 * 1024); i < (16 * 1024); i += 4)
+		iowrite32(i * 212371 + 13, p + i);
+}
+
+static void do_read_test(void __iomem *p)
+{
+	unsigned int i;
+	for (i = 0; i < 256; i++)
+		ioread8(p + i);
+	for (i = 1024; i < (5 * 1024); i += 2)
+		ioread16(p + i);
+	for (i = (5 * 1024); i < (16 * 1024); i += 4)
+		ioread32(p + i);
+}
+
+static void do_test(void)
+{
+	void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
+	if (!p) {
+		pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
+		return;
+	}
+	do_write_test(p);
+	do_read_test(p);
+	iounmap(p);
+}
+
+static int __init init(void)
+{
+	if (mmio_address == 0) {
+		pr_err(MODULE_NAME ": you have to use the module argument "
+							"mmio_address.\n");
+		pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
+				" YOU REALLY KNOW WHAT YOU ARE DOING!\n");
+		return -ENXIO;
+	}
+
+	pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
+					"in PCI address space, and writing "
+					"rubbish in there.\n", mmio_address);
+	do_test();
+	return 0;
+}
+
+static void __exit cleanup(void)
+{
+	pr_debug(MODULE_NAME ": unloaded.\n");
+}
+
+module_init(init);
+module_exit(cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 2b6ad5b9f9d..7f3329b55d2 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -218,8 +218,8 @@ static int nmi_setup(void)
 		}
 
 	}
-	on_each_cpu(nmi_save_registers, NULL, 0, 1);
-	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	on_each_cpu(nmi_save_registers, NULL, 1);
+	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
 	return 0;
 }
@@ -271,7 +271,7 @@ static void nmi_shutdown(void)
 {
 	struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
 	nmi_enabled = 0;
-	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 	unregister_die_notifier(&profile_exceptions_nb);
 	model->shutdown(msrs);
 	free_msrs();
@@ -286,7 +286,7 @@ static void nmi_cpu_start(void *dummy)
 
 static int nmi_start(void)
 {
-	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_start, NULL, 1);
 	return 0;
 }
 
@@ -298,7 +298,7 @@ static void nmi_cpu_stop(void *dummy)
 
 static void nmi_stop(void)
 {
-	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+	on_each_cpu(nmi_cpu_stop, NULL, 1);
 }
 
 struct op_counter_config counter_config[OP_MAX_COUNTER];
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index c5c8e485fc4..e515e8db842 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -1,5 +1,17 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/pci/Makefile_32
-else
-include ${srctree}/arch/x86/pci/Makefile_64
-endif
+obj-y				:= i386.o init.o
+
+obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
+obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig_$(BITS).o direct.o mmconfig-shared.o
+obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+obj-$(CONFIG_PCI_OLPC)		+= olpc.o
+
+pci-y				:= fixup.o
+pci-$(CONFIG_ACPI)		+= acpi.o
+pci-y				+= legacy.o irq.o
+
+pci-$(CONFIG_X86_VISWS)		+= visws.o
+
+pci-$(CONFIG_X86_NUMAQ)		+= numa.o
+
+obj-y				+= $(pci-y) common.o early.o
+obj-y				+= amd_bus.o
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
deleted file mode 100644
index a34fbf55792..00000000000
--- a/arch/x86/pci/Makefile_32
+++ /dev/null
@@ -1,26 +0,0 @@
-obj-y				:= i386.o init.o
-
-obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig_32.o direct.o mmconfig-shared.o
-obj-$(CONFIG_PCI_DIRECT)	+= direct.o
-obj-$(CONFIG_PCI_OLPC)		+= olpc.o
-
-pci-y				:= fixup.o
-
-# Do not change the ordering here. There is a nasty init function
-# ordering dependency which breaks when you move acpi.o below
-# legacy/irq.o
-pci-$(CONFIG_ACPI)		+= acpi.o
-pci-y				+= legacy.o irq.o
-
-# Careful: VISWS overrule the pci-y above. The colons are
-# therefor correct. This needs a proper fix by distangling the code.
-pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
-
-pci-$(CONFIG_X86_NUMAQ)		+= numa.o
-
-# Necessary for NUMAQ as well
-pci-$(CONFIG_NUMA)		+= mp_bus_to_node.o
-
-obj-y				+= $(pci-y) common.o early.o
-obj-y				+= amd_bus.o
diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64
deleted file mode 100644
index fd47068c95d..00000000000
--- a/arch/x86/pci/Makefile_64
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Makefile for X86_64 specific PCI routines
-#
-# Reuse the i386 PCI subsystem
-#
-EXTRA_CFLAGS += -Iarch/x86/pci
-
-obj-y		:= i386.o
-obj-$(CONFIG_PCI_DIRECT)+= direct.o
-obj-y		+= fixup.o init.o
-obj-$(CONFIG_ACPI)	+= acpi.o
-obj-y			+= legacy.o irq.o common.o early.o
-# mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o
-
-obj-y		+= amd_bus.o
-
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 4fa52d3dc84..19af06927fb 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -223,7 +223,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
 	return bus;
 }
 
-static int __init pci_acpi_init(void)
+int __init pci_acpi_init(void)
 {
 	struct pci_dev *dev = NULL;
 
@@ -257,4 +257,3 @@ static int __init pci_acpi_init(void)
 
 	return 0;
 }
-subsys_initcall(pci_acpi_init);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index d02c598451e..dbf53236971 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,44 +1,25 @@
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/topology.h>
 #include "pci.h"
 
 #ifdef CONFIG_X86_64
-
 #include <asm/pci-direct.h>
 #include <asm/mpspec.h>
 #include <linux/cpumask.h>
-#include <linux/topology.h>
+#endif
 
 /*
  * This discovers the pcibus <-> node mapping on AMD K8.
  * also get peer root bus resource for io,mmio
  */
 
-
-/*
- * sub bus (transparent) will use entres from 3 to store extra from root,
- * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
- */
-#define RES_NUM 16
-struct pci_root_info {
-	char name[12];
-	unsigned int res_num;
-	struct resource res[RES_NUM];
-	int bus_min;
-	int bus_max;
-	int node;
-	int link;
-};
-
-/* 4 at this time, it may become to 32 */
-#define PCI_ROOT_NR 4
-static int pci_root_num;
-static struct pci_root_info pci_root_info[PCI_ROOT_NR];
-
 #ifdef CONFIG_NUMA
 
 #define BUS_NR 256
 
+#ifdef CONFIG_X86_64
+
 static int mp_bus_to_node[BUS_NR];
 
 void set_mp_bus_to_node(int busnum, int node)
@@ -65,7 +46,52 @@ int get_mp_bus_to_node(int busnum)
 
 	return node;
 }
-#endif
+
+#else /* CONFIG_X86_32 */
+
+static unsigned char mp_bus_to_node[BUS_NR];
+
+void set_mp_bus_to_node(int busnum, int node)
+{
+	if (busnum >= 0 &&  busnum < BUS_NR)
+	mp_bus_to_node[busnum] = (unsigned char) node;
+}
+
+int get_mp_bus_to_node(int busnum)
+{
+	int node;
+
+	if (busnum < 0 || busnum > (BUS_NR - 1))
+		return 0;
+	node = mp_bus_to_node[busnum];
+	return node;
+}
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_X86_64
+
+/*
+ * sub bus (transparent) will use entres from 3 to store extra from root,
+ * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
+ */
+#define RES_NUM 16
+struct pci_root_info {
+	char name[12];
+	unsigned int res_num;
+	struct resource res[RES_NUM];
+	int bus_min;
+	int bus_max;
+	int node;
+	int link;
+};
+
+/* 4 at this time, it may become to 32 */
+#define PCI_ROOT_NR 4
+static int pci_root_num;
+static struct pci_root_info pci_root_info[PCI_ROOT_NR];
 
 void set_pci_bus_resources_arch_default(struct pci_bus *b)
 {
@@ -552,7 +578,7 @@ static int __init enable_pci_io_ecs(void)
 	/* assume all cpus from fam10h have IO ECS */
         if (boot_cpu_data.x86 < 0x10)
 		return 0;
-	on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1);
+	on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
 	pci_probe |= PCI_HAS_IO_ECS;
 	return 0;
 }
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 00a319cd5be..1485a26ddce 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -20,6 +20,7 @@
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
 
+unsigned int pci_early_dump_regs;
 static int pci_bf_sort;
 int pci_routeirq;
 int noioapicquirk;
@@ -33,7 +34,7 @@ struct pci_raw_ops *raw_pci_ext_ops;
 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val)
 {
-	if (reg < 256 && raw_pci_ops)
+	if (domain == 0 && reg < 256 && raw_pci_ops)
 		return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
 	if (raw_pci_ext_ops)
 		return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
@@ -43,7 +44,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
 int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 val)
 {
-	if (reg < 256 && raw_pci_ops)
+	if (domain == 0 && reg < 256 && raw_pci_ops)
 		return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
 	if (raw_pci_ext_ops)
 		return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
@@ -123,6 +124,21 @@ void __init dmi_check_skip_isa_align(void)
 	dmi_check_system(can_skip_pciprobe_dmi_table);
 }
 
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+	struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+
+	if (pci_probe & PCI_NOASSIGN_ROMS) {
+		if (rom_r->parent)
+			return;
+		if (rom_r->start) {
+			/* we deal with BIOS assigned ROM later */
+			return;
+		}
+		rom_r->start = rom_r->end = rom_r->flags = 0;
+	}
+}
+
 /*
  *  Called after each bus is probed, but before its children
  *  are examined.
@@ -130,7 +146,11 @@ void __init dmi_check_skip_isa_align(void)
 
 void __devinit  pcibios_fixup_bus(struct pci_bus *b)
 {
+	struct pci_dev *dev;
+
 	pci_read_bridge_bases(b);
+	list_for_each_entry(dev, &b->devices, bus_list)
+		pcibios_fixup_device_resources(dev);
 }
 
 /*
@@ -386,7 +406,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
 
 extern u8 pci_cache_line_size;
 
-static int __init pcibios_init(void)
+int __init pcibios_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
@@ -413,8 +433,6 @@ static int __init pcibios_init(void)
 	return 0;
 }
 
-subsys_initcall(pcibios_init);
-
 char * __devinit  pcibios_setup(char *str)
 {
 	if (!strcmp(str, "off")) {
@@ -485,12 +503,18 @@ char * __devinit  pcibios_setup(char *str)
 	else if (!strcmp(str, "rom")) {
 		pci_probe |= PCI_ASSIGN_ROMS;
 		return NULL;
+	} else if (!strcmp(str, "norom")) {
+		pci_probe |= PCI_NOASSIGN_ROMS;
+		return NULL;
 	} else if (!strcmp(str, "assign-busses")) {
 		pci_probe |= PCI_ASSIGN_ALL_BUSSES;
 		return NULL;
 	} else if (!strcmp(str, "use_crs")) {
 		pci_probe |= PCI_USE__CRS;
 		return NULL;
+	} else if (!strcmp(str, "earlydump")) {
+		pci_early_dump_regs = 1;
+		return NULL;
 	} else if (!strcmp(str, "routeirq")) {
 		pci_routeirq = 1;
 		return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 42df4b6606d..858dbe3399f 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
 {
 	PDprintk("%x writing to %x: %x\n", slot, offset, val);
 	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
-	outb(val, 0xcfc);
+	outb(val, 0xcfc + (offset&3));
+}
+
+void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
+{
+	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	outw(val, 0xcfc + (offset&2));
 }
 
 int early_pci_allowed(void)
@@ -57,3 +64,54 @@ int early_pci_allowed(void)
 	return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
 			PCI_PROBE_CONF1;
 }
+
+void early_dump_pci_device(u8 bus, u8 slot, u8 func)
+{
+	int i;
+	int j;
+	u32 val;
+
+	printk("PCI: %02x:%02x:%02x", bus, slot, func);
+
+	for (i = 0; i < 256; i += 4) {
+		if (!(i & 0x0f))
+			printk("\n%04x:",i);
+
+		val = read_pci_config(bus, slot, func, i);
+		for (j = 0; j < 4; j++) {
+			printk(" %02x", val & 0xff);
+			val >>= 8;
+		}
+	}
+	printk("\n");
+}
+
+void early_dump_pci_devices(void)
+{
+	unsigned bus, slot, func;
+
+	if (!early_pci_allowed())
+		return;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				u32 class;
+				u8 type;
+				class = read_pci_config(bus, slot, func,
+							PCI_CLASS_REVISION);
+				if (class == 0xffffffff)
+					break;
+
+				early_dump_pci_device(bus, slot, func);
+
+				/* No multi-function device? */
+				type = read_pci_config_byte(bus, slot, func,
+							       PCI_HEADER_TYPE);
+				if (!(type & 0x80))
+					break;
+			}
+		}
+	}
+}
+
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6ccd7a108cd..2aafb67dc5f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		flags = new_flags;
 	}
 
-	if (vma->vm_pgoff <= max_pfn_mapped &&
+	if (((vma->vm_pgoff < max_low_pfn_mapped) ||
+	     (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+	      vma->vm_pgoff < max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
 		free_memtype(addr, addr + len);
 		return -EINVAL;
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index b821f4462d9..d6c950f8185 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -4,7 +4,7 @@
 
 /* arch_initcall has too random ordering, so call the initializers
    in the right sequence from here. */
-static __init int pci_access_init(void)
+static __init int pci_arch_init(void)
 {
 #ifdef CONFIG_PCI_DIRECT
 	int type = 0;
@@ -40,4 +40,4 @@ static __init int pci_access_init(void)
 
 	return 0;
 }
-arch_initcall(pci_access_init);
+arch_initcall(pci_arch_init);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index f0859de23e2..6a06a2eb059 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -45,7 +45,8 @@ struct irq_router {
 	char *name;
 	u16 vendor, device;
 	int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
-	int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+	int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
+		int new);
 };
 
 struct irq_router_handler {
@@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
 	for (i = 0; i < rt->size; i++)
 		sum += addr[i];
 	if (!sum) {
-		DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
+		DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
+			rt);
 		return rt;
 	}
 	return NULL;
@@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset,
 	return (nr & 1) ? (x >> 4) : (x & 0xf);
 }
 
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+static void write_config_nybble(struct pci_dev *router, unsigned offset,
+	unsigned nr, unsigned int val)
 {
 	u8 x;
 	unsigned reg = offset + (nr >> 1);
@@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int
 	return inb(0xc01) & 0xf;
 }
 
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
+	int pirq, int irq)
 {
 	outb(pirq, 0xc00);
 	outb(irq, 0xc01);
@@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router
 }
 
 
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+static __init int serverworks_router_probe(struct irq_router *r,
+		struct pci_dev *router, u16 device)
 {
 	switch (device) {
 	case PCI_DEVICE_ID_SERVERWORKS_OSB4:
@@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r)
 
 	for (h = pirq_routers; h->vendor; h++) {
 		/* First look for a router match */
-		if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+		if (rt->rtr_vendor == h->vendor &&
+			h->probe(r, pirq_router_dev, rt->rtr_device))
 			break;
 		/* Fall back to a device match */
-		if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+		if (pirq_router_dev->vendor == h->vendor &&
+			h->probe(r, pirq_router_dev, pirq_router_dev->device))
 			break;
 	}
 	printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
@@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r)
 static struct irq_info *pirq_get_info(struct pci_dev *dev)
 {
 	struct irq_routing_table *rt = pirq_table;
-	int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+	int entries = (rt->size - sizeof(struct irq_routing_table)) /
+		sizeof(struct irq_info);
 	struct irq_info *info;
 
 	for (info = rt->slots; entries--; info++)
-		if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+		if (info->bus == dev->bus->number &&
+			PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
 			return info;
 	return NULL;
 }
@@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		DBG(" -> not routed\n" KERN_DEBUG);
 		return 0;
 	}
-	DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+	DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask,
+		pirq_table->exclusive_irqs);
 	mask &= pcibios_irq_mask;
 
 	/* Work around broken HP Pavilion Notebooks which assign USB to
@@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 	}
 
 	/* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
-	if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
+	if (acer_tm360_irqrouting && dev->irq == 11 &&
+		dev->vendor == PCI_VENDOR_ID_O2) {
 		pirq = 0x68;
 		mask = 0x400;
 		dev->irq = r->get(pirq_router_dev, dev, pirq);
@@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 			newirq = 0;
 		else
 			printk("\n" KERN_WARNING
-			"PCI: IRQ %i for device %s doesn't match PIRQ mask "
-			"- try pci=usepirqmask\n" KERN_DEBUG, newirq,
-			pci_name(dev));
+				"PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n"
+				KERN_DEBUG, newirq,
+				pci_name(dev));
 	}
 	if (!newirq && assign) {
 		for (i = 0; i < 16; i++) {
 			if (!(mask & (1 << i)))
 				continue;
-			if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED))
+			if (pirq_penalty[i] < pirq_penalty[newirq] &&
+				can_request_irq(i, IRQF_SHARED))
 				newirq = i;
 		}
 	}
@@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		DBG(" -> got IRQ %d\n", irq);
 		msg = "Found";
 		eisa_set_level_irq(irq);
-	} else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+	} else if (newirq && r->set &&
+		(dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
 		DBG(" -> assigning IRQ %d", newirq);
 		if (r->set(pirq_router_dev, dev, pirq, newirq)) {
 			eisa_set_level_irq(newirq);
@@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		} else
 			return 0;
 	}
-	printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
+	printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq,
+		pci_name(dev));
 
 	/* Update IRQ for all devices with the same pirq value */
 	while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		if (!info)
 			continue;
 		if (info->irq[pin].link == pirq) {
-			/* We refuse to override the dev->irq information. Give a warning! */
+			/*
+			 * We refuse to override the dev->irq
+			 * information. Give a warning!
+			 */
 			if (dev2->irq && dev2->irq != irq && \
 			(!(pci_probe & PCI_USE_PIRQ_MASK) || \
 			((1 << dev2->irq) & mask))) {
@@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 			dev2->irq = irq;
 			pirq_penalty[irq]++;
 			if (dev != dev2)
-				printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
+				printk(KERN_INFO
+					"PCI: Sharing IRQ %d with %s\n",
+					irq, pci_name(dev2));
 		}
 	}
 	return 1;
@@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void)
 	DBG(KERN_DEBUG "PCI: IRQ fixup\n");
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		/*
-		 * If the BIOS has set an out of range IRQ number, just ignore it.
-		 * Also keep track of which IRQ's are already in use.
+		 * If the BIOS has set an out of range IRQ number, just
+		 * ignore it.  Also keep track of which IRQ's are
+		 * already in use.
 		 */
 		if (dev->irq >= 16) {
-			DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
+			DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n",
+				pci_name(dev), dev->irq);
 			dev->irq = 0;
 		}
-		/* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
-		if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+		/*
+		 * If the IRQ is already assigned to a PCI device,
+		 * ignore its ISA use penalty
+		 */
+		if (pirq_penalty[dev->irq] >= 100 &&
+				pirq_penalty[dev->irq] < 100000)
 			pirq_penalty[dev->irq] = 0;
 		pirq_penalty[dev->irq]++;
 	}
@@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void)
 			int irq;
 
 			if (pin) {
-				pin--;		/* interrupt pins are numbered starting from 1 */
-				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+				/*
+				 * interrupt pins are numbered starting
+				 * from 1
+				 */
+				pin--;
+				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+					PCI_SLOT(dev->devfn), pin);
 	/*
 	 * Busses behind bridges are typically not listed in the MP-table.
 	 * In this case we have to look up the IRQ based on the parent bus,
@@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d)
 {
 	if (!broken_hp_bios_irq9) {
 		broken_hp_bios_irq9 = 1;
-		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+			d->ident);
 	}
 	return 0;
 }
@@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
 {
 	if (!acer_tm360_irqrouting) {
 		acer_tm360_irqrouting = 1;
-		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+		printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+			d->ident);
 	}
 	return 0;
 }
@@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
 			DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
+			DMI_MATCH(DMI_PRODUCT_VERSION,
+				"HP Pavilion Notebook Model GE"),
 			DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
 		},
 	},
@@ -1107,7 +1140,7 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
 	{ }
 };
 
-static int __init pcibios_irq_init(void)
+int __init pcibios_irq_init(void)
 {
 	DBG(KERN_DEBUG "PCI: IRQ init\n");
 
@@ -1131,7 +1164,10 @@ static int __init pcibios_irq_init(void)
 				if (!(pirq_table->exclusive_irqs & (1 << i)))
 					pirq_penalty[i] += 100;
 		}
-		/* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+		/*
+		 * If we're using the I/O APIC, avoid using the PCI IRQ
+		 * routing table
+		 */
 		if (io_apic_assign_pci_irqs)
 			pirq_table = NULL;
 	}
@@ -1142,9 +1178,6 @@ static int __init pcibios_irq_init(void)
 	return 0;
 }
 
-subsys_initcall(pcibios_irq_init);
-
-
 static void pirq_penalize_isa_irq(int irq, int active)
 {
 	/*
@@ -1178,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
 		char *msg = "";
 
-		pin--;		/* interrupt pins are numbered starting from 1 */
+		pin--; /* interrupt pins are numbered starting from 1 */
 
 		if (io_apic_assign_pci_irqs) {
 			int irq;
@@ -1198,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn), pin);
 				if (irq >= 0)
-					printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
-						pci_name(bridge), 'A' + pin, irq);
+					printk(KERN_WARNING
+						"PCI: using PPB %s[%c] to get irq %d\n",
+						pci_name(bridge),
+						'A' + pin, irq);
 				dev = bridge;
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+				printk(KERN_INFO
+					"PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
 					pci_name(dev), 'A' + pin, irq);
 				dev->irq = irq;
 				return 0;
@@ -1215,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev)
 		else
 			msg = " Please try using pci=biosirq.";
 
-		/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
-		if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+		/*
+		 * With IDE legacy devices the IRQ lookup failure is not
+		 * a problem..
+		 */
+		if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
+				!(dev->class & 0x5))
 			return 0;
 
-		printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
-		       'A' + pin, pci_name(dev), msg);
+		printk(KERN_WARNING
+			"PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+			'A' + pin, pci_name(dev), msg);
 	}
 	return 0;
 }
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index a67921ce60a..132876cc6fc 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -55,4 +55,18 @@ static int __init pci_legacy_init(void)
 	return 0;
 }
 
-subsys_initcall(pci_legacy_init);
+int __init pci_subsys_init(void)
+{
+#ifdef CONFIG_ACPI
+	pci_acpi_init();
+#endif
+	pci_legacy_init();
+	pcibios_irq_init();
+#ifdef CONFIG_X86_NUMAQ
+	pci_numa_init();
+#endif
+	pcibios_init();
+
+	return 0;
+}
+subsys_initcall(pci_subsys_init);
diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c
deleted file mode 100644
index 022943999b8..00000000000
--- a/arch/x86/pci/mp_bus_to_node.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/topology.h>
-
-#define BUS_NR 256
-
-static unsigned char mp_bus_to_node[BUS_NR];
-
-void set_mp_bus_to_node(int busnum, int node)
-{
-	if (busnum >= 0 &&  busnum < BUS_NR)
-	mp_bus_to_node[busnum] = (unsigned char) node;
-}
-
-int get_mp_bus_to_node(int busnum)
-{
-	int node;
-
-	if (busnum < 0 || busnum > (BUS_NR - 1))
-		return 0;
-	node = mp_bus_to_node[busnum];
-	return node;
-}
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index 99f1ecd485b..8b5ca196673 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
 
-static int __init pci_numa_init(void)
+int __init pci_numa_init(void)
 {
 	int quad;
 
@@ -176,5 +176,3 @@ static int __init pci_numa_init(void)
 		}
 	return 0;
 }
-
-subsys_initcall(pci_numa_init);
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index ba263e626a6..3e25deb821a 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -28,6 +28,7 @@
 #define PCI_USE__CRS		0x10000
 #define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
 #define PCI_HAS_IO_ECS		0x40000
+#define PCI_NOASSIGN_ROMS	0x80000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
@@ -39,9 +40,6 @@ enum pci_bf_sort_state {
 	pci_dmi_bf,
 };
 
-extern void __init dmi_check_pciprobe(void);
-extern void __init dmi_check_skip_isa_align(void);
-
 /* pci-i386.c */
 
 extern unsigned int pcibios_max_latency;
@@ -99,10 +97,19 @@ extern struct pci_raw_ops *raw_pci_ext_ops;
 
 extern struct pci_raw_ops pci_direct_conf1;
 
+/* arch_initcall level */
 extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
 extern void pci_pcbios_init(void);
 extern int pci_olpc_init(void);
+extern void __init dmi_check_pciprobe(void);
+extern void __init dmi_check_skip_isa_align(void);
+
+/* some common used subsys_initcalls */
+extern int __init pci_acpi_init(void);
+extern int __init pcibios_irq_init(void);
+extern int __init pci_numa_init(void);
+extern int __init pcibios_init(void);
 
 /* pci-mmconfig.c */
 
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index c2df4e97eed..1a7bed492bb 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -8,18 +8,19 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 
-#include "cobalt.h"
-#include "lithium.h"
+#include <asm/setup.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
 
 #include "pci.h"
 
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
-int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
-void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq;
+/* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */
+/* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */
 
-void __init pcibios_penalize_isa_irq(int irq, int active) {}
+/* void __init pcibios_penalize_isa_irq(int irq, int active) {} */
 
 
 unsigned int pci_bus0, pci_bus1;
@@ -85,7 +86,7 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
 
-static int __init pcibios_init(void)
+static int __init pci_visws_init(void)
 {
 	/* The VISWS supports configuration access type 1 only */
 	pci_probe = (pci_probe | PCI_PROBE_CONF1) &
@@ -105,4 +106,17 @@ static int __init pcibios_init(void)
 	return 0;
 }
 
-subsys_initcall(pcibios_init);
+static __init int pci_subsys_init(void)
+{
+	if (!is_visws_box())
+		return -1;
+
+	pcibios_enable_irq = &pci_visws_enable_irq;
+	pcibios_disable_irq = &pci_visws_disable_irq;
+
+	pci_visws_init();
+	pcibios_init();
+
+	return 0;
+}
+subsys_initcall(pci_subsys_init);
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c600..1ef0f90813d 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
 
 #define gtod vdso_vsyscall_gtod_data
 
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
 	asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
 	return ret;
 }
 
-static inline long vgetns(void)
+notrace static inline long vgetns(void)
 {
 	long v;
 	cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
 	return (v * gtod->clock.mult) >> gtod->clock.shift;
 }
 
-static noinline int do_realtime(struct timespec *ts)
+notrace static noinline int do_realtime(struct timespec *ts)
 {
 	unsigned long seq, ns;
 	do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
 }
 
 /* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+notrace static void
+vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
 {
 	while (nsec >= NSEC_PER_SEC) {
 		nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
 	ts->tv_nsec = nsec;
 }
 
-static noinline int do_monotonic(struct timespec *ts)
+notrace static noinline int do_monotonic(struct timespec *ts)
 {
 	unsigned long seq, ns, secs;
 	do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
 	return 0;
 }
 
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
 	if (likely(gtod->sysctl_enabled && gtod->clock.vread))
 		switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 int clock_gettime(clockid_t, struct timespec *)
 	__attribute__((weak, alias("__vdso_clock_gettime")));
 
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 	if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index cf058fecfce..0bce5429a51 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -203,20 +203,11 @@ static struct page *vdso32_pages[1];
 
 #ifdef CONFIG_X86_64
 
-static int use_sysenter __read_mostly = -1;
-
-#define	vdso32_sysenter()	(use_sysenter > 0)
+#define	vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SYSENTER32))
 
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
 {
-	if (use_sysenter < 0) {
-		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-			use_sysenter = 1;
-		if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
-			use_sysenter = 1;
-	}
-
 	/* Load these always in case some future AMD CPU supports
 	   SYSENTER from compat mode too. */
 	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a..9fbc6b20026 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
 #include <asm/vgtod.h>
 #include "vextern.h"
 
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+notrace long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
 	unsigned int p;
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3b980831602..bb508456ef5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1062,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = {
 
 	.set_wallclock = xen_set_wallclock,
 	.get_wallclock = xen_get_wallclock,
-	.get_cpu_khz = xen_cpu_khz,
+	.get_tsc_khz = xen_tsc_khz,
 	.sched_clock = xen_sched_clock,
 };
 
@@ -1214,7 +1214,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
-	.smp_call_function_mask = xen_smp_call_function_mask,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 };
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42b3b9ed641..ff0aa74afaa 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -796,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm)
 	}
 
 	if (!cpus_empty(mask))
-		xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d2e3c20127d..233156f39b7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,27 +36,14 @@
 #include "mmu.h"
 
 cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
  * Reschedule call back. Nothing to do,
@@ -128,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(debug_irq, cpu) = rc;
 
+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+				    cpu,
+				    xen_call_function_single_interrupt,
+				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				    callfunc_name,
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfuncsingle_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -137,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(debug_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
 	return rc;
 }
 
@@ -336,7 +337,7 @@ static void stop_self(void *v)
 
 void xen_smp_send_stop(void)
 {
-	smp_call_function(stop_self, NULL, 0, 0);
+	smp_call_function(stop_self, NULL, 0);
 }
 
 void xen_smp_send_reschedule(int cpu)
@@ -344,7 +345,6 @@ void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-
 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
@@ -355,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+	int cpu;
+
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+	/* Make sure other vcpus get a chance to run if they need to. */
+	for_each_cpu_mask(cpu, mask) {
+		if (xen_vcpu_stolen(cpu)) {
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			break;
+		}
+	}
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 
-	if (wait) {
-		mb();		/* commit everything before setting finished */
-		atomic_inc(&call_data->finished);
-	}
-
 	return IRQ_HANDLED;
 }
 
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
-	struct call_data_struct data;
-	int cpus, cpu;
-	bool yield;
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();			/* write everything before IPI */
-
-	/* Send a message to other CPUs and wait for them to respond */
-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
-	/* Make sure other vcpus get a chance to run if they need to. */
-	yield = false;
-	for_each_cpu_mask(cpu, mask)
-		if (xen_vcpu_stolen(cpu))
-			yield = true;
-
-	if (yield)
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus ||
-	       (wait && atomic_read(&data.finished) != cpus))
-		cpu_relax();
-
-	spin_unlock(&call_lock);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 
-	return 0;
+	return IRQ_HANDLED;
 }
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 64f0038b955..685b77470fc 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void)
 }
 
 
-/* Get the CPU speed from Xen */
-unsigned long xen_cpu_khz(void)
+/* Get the TSC speed from Xen */
+unsigned long xen_tsc_khz(void)
 {
 	u64 xen_khz = 1000000ULL << 32;
 	const struct pvclock_vcpu_time_info *info =
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a055592a30..6f4b1045c1c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -32,7 +32,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
-unsigned long xen_cpu_khz(void);
+unsigned long xen_tsc_khz(void);
 void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
@@ -55,13 +55,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
 
 void xen_smp_send_stop(void);
 void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			   int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				 int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
 
 extern cpumask_t xen_cpu_initialized_map;