46 files changed, 1666 insertions, 974 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index de5657c039e..339ce35648e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
-obj-y				+= vsmp_64.o
+obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module_$(BITS).o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@ -111,7 +111,7 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb_64.o # NB rename without _64
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-	obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
+	obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
 	obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 3b002995e14..f933822dba1 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -222,7 +222,6 @@ struct apic apic_flat =  {
 	.send_IPI_all			= flat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
@@ -373,7 +372,6 @@ struct apic apic_physflat =  {
 	.send_IPI_all			= physflat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0b1093394fd..d806ecaa948 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -16,17 +16,17 @@
 #include <asm/apic.h>
 #include <asm/ipi.h>
 
-static inline unsigned bigsmp_get_apic_id(unsigned long x)
+static unsigned bigsmp_get_apic_id(unsigned long x)
 {
 	return (x >> 24) & 0xFF;
 }
 
-static inline int bigsmp_apic_id_registered(void)
+static int bigsmp_apic_id_registered(void)
 {
 	return 1;
 }
 
-static inline const cpumask_t *bigsmp_target_cpus(void)
+static const cpumask_t *bigsmp_target_cpus(void)
 {
 #ifdef CONFIG_SMP
 	return &cpu_online_map;
@@ -35,13 +35,12 @@ static inline const cpumask_t *bigsmp_target_cpus(void)
 #endif
 }
 
-static inline unsigned long
-bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
 {
 	return 0;
 }
 
-static inline unsigned long bigsmp_check_apicid_present(int bit)
+static unsigned long bigsmp_check_apicid_present(int bit)
 {
 	return 1;
 }
@@ -64,7 +63,7 @@ static inline unsigned long calculate_ldr(int cpu)
  * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
  * document number 292116).  So here it goes...
  */
-static inline void bigsmp_init_apic_ldr(void)
+static void bigsmp_init_apic_ldr(void)
 {
 	unsigned long val;
 	int cpu = smp_processor_id();
@@ -74,19 +73,19 @@ static inline void bigsmp_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static inline void bigsmp_setup_apic_routing(void)
+static void bigsmp_setup_apic_routing(void)
 {
 	printk(KERN_INFO
 		"Enabling APIC mode:  Physflat.  Using %d I/O APICs\n",
 		nr_ioapics);
 }
 
-static inline int bigsmp_apicid_to_node(int logical_apicid)
+static int bigsmp_apicid_to_node(int logical_apicid)
 {
 	return apicid_2_node[hard_smp_processor_id()];
 }
 
-static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
+static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
@@ -94,7 +93,7 @@ static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
 	return BAD_APICID;
 }
 
-static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
+static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
 {
 	return physid_mask_of_physid(phys_apicid);
 }
@@ -107,29 +106,24 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
 	return cpu_physical_id(cpu);
 }
 
-static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
 	return physids_promote(0xFFL);
 }
 
-static inline void bigsmp_setup_portio_remap(void)
-{
-}
-
-static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
+static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
 {
 	return 1;
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
 }
 
-static inline unsigned int
-bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 			      const struct cpumask *andmask)
 {
 	int cpu;
@@ -148,7 +142,7 @@ bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	return BAD_APICID;
 }
 
-static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
+static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
 }
@@ -158,12 +152,12 @@ static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
 	default_send_IPI_mask_sequence_phys(mask, vector);
 }
 
-static inline void bigsmp_send_IPI_allbutself(int vector)
+static void bigsmp_send_IPI_allbutself(int vector)
 {
 	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
 }
 
-static inline void bigsmp_send_IPI_all(int vector)
+static void bigsmp_send_IPI_all(int vector)
 {
 	bigsmp_send_IPI_mask(cpu_online_mask, vector);
 }
@@ -256,7 +250,6 @@ struct apic apic_bigsmp = {
 	.send_IPI_all			= bigsmp_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 320f2d2e4e5..19588f2770e 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -163,22 +163,17 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 	return 0;
 }
 
-static int __init es7000_update_apic(void)
+static int es7000_apic_is_cluster(void)
 {
-	apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
-
 	/* MPENTIUMIII */
 	if (boot_cpu_data.x86 == 6 &&
-	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
-		es7000_update_apic_to_cluster();
-		apic->wait_for_init_deassert = NULL;
-		apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
-	}
+	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
+		return 1;
 
 	return 0;
 }
 
-static void __init setup_unisys(void)
+static void setup_unisys(void)
 {
 	/*
 	 * Determine the generation of the ES7000 currently running.
@@ -192,14 +187,12 @@ static void __init setup_unisys(void)
 	else
 		es7000_plat = ES7000_CLASSIC;
 	ioapic_renumber_irq = es7000_rename_gsi;
-
-	x86_quirks->update_apic = es7000_update_apic;
 }
 
 /*
  * Parse the OEM Table:
  */
-static int __init parse_unisys_oem(char *oemptr)
+static int parse_unisys_oem(char *oemptr)
 {
 	int			i;
 	int 			success = 0;
@@ -261,7 +254,7 @@ static int __init parse_unisys_oem(char *oemptr)
 }
 
 #ifdef CONFIG_ACPI
-static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
 	struct acpi_table_header *header = NULL;
 	struct es7000_oem_table *table;
@@ -292,7 +285,7 @@ static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
 	return 0;
 }
 
-static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
 {
 	if (!oem_addr)
 		return;
@@ -310,8 +303,10 @@ static int es7000_check_dsdt(void)
 	return 0;
 }
 
+static int es7000_acpi_ret;
+
 /* Hook from generic ACPI tables.c */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	unsigned long oem_addr = 0;
 	int check_dsdt;
@@ -332,10 +327,26 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 		 */
 		unmap_unisys_acpi_oem_table(oem_addr);
 	}
-	return ret;
+
+	es7000_acpi_ret = ret;
+
+	return ret && !es7000_apic_is_cluster();
 }
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+	int ret = es7000_acpi_ret;
+
+	return ret && es7000_apic_is_cluster();
+}
+
 #else /* !CONFIG_ACPI: */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return 0;
+}
+
+static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
 {
 	return 0;
 }
@@ -349,8 +360,7 @@ static void es7000_spin(int n)
 		rep_nop();
 }
 
-static int __init
-es7000_mip_write(struct mip_reg *mip_reg)
+static int es7000_mip_write(struct mip_reg *mip_reg)
 {
 	int status = 0;
 	int spin;
@@ -383,7 +393,7 @@ es7000_mip_write(struct mip_reg *mip_reg)
 	return status;
 }
 
-static void __init es7000_enable_apic_mode(void)
+static void es7000_enable_apic_mode(void)
 {
 	struct mip_reg es7000_mip_reg;
 	int mip_status;
@@ -416,11 +426,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
 
 static void es7000_wait_for_init_deassert(atomic_t *deassert)
 {
-#ifndef CONFIG_ES7000_CLUSTERED_APIC
 	while (!atomic_read(deassert))
 		cpu_relax();
-#endif
-	return;
 }
 
 static unsigned int es7000_get_apic_id(unsigned long x)
@@ -565,72 +572,24 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
 	return 1;
 }
 
-static unsigned int
-es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
-{
-	int cpus_found = 0;
-	int num_bits_set;
-	int apicid;
-	int cpu;
-
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == nr_cpu_ids)
-		return 0xFF;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of target_cpus():
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = es7000_cpu_to_logical_apicid(cpu);
-
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = es7000_cpu_to_logical_apicid(cpu);
-
-			if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
-				WARN(1, "Not a valid mask!");
-
-				return 0xFF;
-			}
-			apicid = new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-	return apicid;
-}
-
 static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	int cpus_found = 0;
-	int num_bits_set;
-	int apicid;
-	int cpu;
+	unsigned int round = 0;
+	int cpu, uninitialized_var(apicid);
 
-	num_bits_set = cpus_weight(*cpumask);
-	/* Return id to all */
-	if (num_bits_set == nr_cpu_ids)
-		return es7000_cpu_to_logical_apicid(0);
 	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of target_cpus():
+	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	cpu = first_cpu(*cpumask);
-	apicid = es7000_cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask)) {
-			int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+	for_each_cpu(cpu, cpumask) {
+		int new_apicid = es7000_cpu_to_logical_apicid(cpu);
 
-			if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
-				printk("%s: Not a valid mask!\n", __func__);
+		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+			WARN(1, "Not a valid mask!");
 
-				return es7000_cpu_to_logical_apicid(0);
-			}
-			apicid = new_apicid;
-			cpus_found++;
+			return BAD_APICID;
 		}
-		cpu++;
+		apicid = new_apicid;
+		round++;
 	}
 	return apicid;
 }
@@ -659,37 +618,103 @@ static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-void __init es7000_update_apic_to_cluster(void)
-{
-	apic->target_cpus = target_cpus_cluster;
-	apic->irq_delivery_mode = dest_LowestPrio;
-	/* logical delivery broadcast to all procs: */
-	apic->irq_dest_mode = 1;
-
-	apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
-
-	apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
-}
-
 static int probe_es7000(void)
 {
 	/* probed later in mptable/ACPI hooks */
 	return 0;
 }
 
-static __init int
-es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+static int es7000_mps_ret;
+static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
+		char *productid)
 {
+	int ret = 0;
+
 	if (mpc->oemptr) {
 		struct mpc_oemtable *oem_table =
 			(struct mpc_oemtable *)mpc->oemptr;
 
 		if (!strncmp(oem, "UNISYS", 6))
-			return parse_unisys_oem((char *)oem_table);
+			ret = parse_unisys_oem((char *)oem_table);
 	}
-	return 0;
+
+	es7000_mps_ret = ret;
+
+	return ret && !es7000_apic_is_cluster();
 }
 
+static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
+		char *productid)
+{
+	int ret = es7000_mps_ret;
+
+	return ret && es7000_apic_is_cluster();
+}
+
+struct apic apic_es7000_cluster = {
+
+	.name				= "es7000",
+	.probe				= probe_es7000,
+	.acpi_madt_oem_check		= es7000_acpi_madt_oem_check_cluster,
+	.apic_id_registered		= es7000_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	/* logical delivery broadcast to all procs: */
+	.irq_dest_mode			= 1,
+
+	.target_cpus			= target_cpus_cluster,
+	.disable_esr			= 1,
+	.dest_logical			= 0,
+	.check_apicid_used		= es7000_check_apicid_used,
+	.check_apicid_present		= es7000_check_apicid_present,
+
+	.vector_allocation_domain	= es7000_vector_allocation_domain,
+	.init_apic_ldr			= es7000_init_apic_ldr_cluster,
+
+	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
+	.setup_apic_routing		= es7000_setup_apic_routing,
+	.multi_timer_check		= NULL,
+	.apicid_to_node			= es7000_apicid_to_node,
+	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
+	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
+	.setup_portio_remap		= NULL,
+	.check_phys_apicid_present	= es7000_check_phys_apicid_present,
+	.enable_apic_mode		= es7000_enable_apic_mode,
+	.phys_pkg_id			= es7000_phys_pkg_id,
+	.mps_oem_check			= es7000_mps_oem_check_cluster,
+
+	.get_apic_id			= es7000_get_apic_id,
+	.set_apic_id			= NULL,
+	.apic_id_mask			= 0xFF << 24,
+
+	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= es7000_send_IPI_mask,
+	.send_IPI_mask_allbutself	= NULL,
+	.send_IPI_allbutself		= es7000_send_IPI_allbutself,
+	.send_IPI_all			= es7000_send_IPI_all,
+	.send_IPI_self			= default_send_IPI_self,
+
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_mip,
+
+	.trampoline_phys_low		= 0x467,
+	.trampoline_phys_high		= 0x469,
+
+	.wait_for_init_deassert		= NULL,
+
+	/* Nothing to do for most platforms, since cleared by the INIT cycle: */
+	.smp_callin_clear_local_apic	= NULL,
+	.inquire_remote_apic		= default_inquire_remote_apic,
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+};
 
 struct apic apic_es7000 = {
 
@@ -737,8 +762,6 @@ struct apic apic_es7000 = {
 	.send_IPI_all			= es7000_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
-
 	.trampoline_phys_low		= 0x467,
 	.trampoline_phys_high		= 0x469,
 
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index d9d6d61eed8..ba2fc646553 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -69,7 +69,7 @@ struct mpc_trans {
 /* x86_quirks member */
 static int				mpc_record;
 
-static __cpuinitdata struct mpc_trans	*translation_table[MAX_MPC_ENTRY];
+static struct mpc_trans			*translation_table[MAX_MPC_ENTRY];
 
 int					mp_bus_id_to_node[MAX_MP_BUSSES];
 int					mp_bus_id_to_local[MAX_MP_BUSSES];
@@ -256,13 +256,6 @@ static int __init numaq_setup_ioapic_ids(void)
 	return 1;
 }
 
-static int __init numaq_update_apic(void)
-{
-	apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
-
-	return 0;
-}
-
 static struct x86_quirks numaq_x86_quirks __initdata = {
 	.arch_pre_time_init		= numaq_pre_time_init,
 	.arch_time_init			= NULL,
@@ -278,7 +271,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
 	.mpc_oem_pci_bus		= mpc_oem_pci_bus,
 	.smp_read_mpc_oem		= smp_read_mpc_oem,
 	.setup_ioapic_ids		= numaq_setup_ioapic_ids,
-	.update_apic			= numaq_update_apic,
 };
 
 static __init void early_check_numaq(void)
@@ -546,7 +538,7 @@ struct apic apic_numaq = {
 	.send_IPI_all			= numaq_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_nmi,
 	.trampoline_phys_low		= NUMAQ_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= NUMAQ_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 3a730fa574b..141c99a1c26 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -138,7 +138,6 @@ struct apic apic_default = {
 	.send_IPI_all			= default_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
@@ -159,6 +158,7 @@ extern struct apic apic_numaq;
 extern struct apic apic_summit;
 extern struct apic apic_bigsmp;
 extern struct apic apic_es7000;
+extern struct apic apic_es7000_cluster;
 extern struct apic apic_default;
 
 struct apic *apic = &apic_default;
@@ -176,6 +176,7 @@ static struct apic *apic_probe[] __initdata = {
 #endif
 #ifdef CONFIG_X86_ES7000
 	&apic_es7000,
+	&apic_es7000_cluster,
 #endif
 	&apic_default,	/* must be last */
 	NULL,
@@ -197,9 +198,6 @@ static int __init parse_apic(char *arg)
 		}
 	}
 
-	if (x86_quirks->update_apic)
-		x86_quirks->update_apic();
-
 	/* Parsed again by __setup for debug/verbose */
 	return 0;
 }
@@ -218,8 +216,6 @@ void __init generic_bigsmp_probe(void)
 	if (!cmdline_apic && apic == &apic_default) {
 		if (apic_bigsmp.probe()) {
 			apic = &apic_bigsmp;
-			if (x86_quirks->update_apic)
-				x86_quirks->update_apic();
 			printk(KERN_INFO "Overriding APIC driver with %s\n",
 			       apic->name);
 		}
@@ -240,9 +236,6 @@ void __init generic_apic_probe(void)
 		/* Not visible without early console */
 		if (!apic_probe[i])
 			panic("Didn't find an APIC driver");
-
-		if (x86_quirks->update_apic)
-			x86_quirks->update_apic();
 	}
 	printk(KERN_INFO "Using APIC driver %s\n", apic->name);
 }
@@ -262,8 +255,6 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 
 		if (!cmdline_apic) {
 			apic = apic_probe[i];
-			if (x86_quirks->update_apic)
-				x86_quirks->update_apic();
 			printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 			       apic->name);
 		}
@@ -284,8 +275,6 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 		if (!cmdline_apic) {
 			apic = apic_probe[i];
-			if (x86_quirks->update_apic)
-				x86_quirks->update_apic();
 			printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 			       apic->name);
 		}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index e7c163661c7..8d7748efe6a 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -68,9 +68,6 @@ void __init default_setup_apic_routing(void)
 			apic = &apic_physflat;
 		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
 	}
-
-	if (x86_quirks->update_apic)
-		x86_quirks->update_apic();
 }
 
 /* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 32838b57a94..aac52fa873f 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -77,9 +77,9 @@ static void summit_send_IPI_all(int vector)
 extern int use_cyclone;
 
 #ifdef CONFIG_X86_SUMMIT_NUMA
-extern void setup_summit(void);
+static void setup_summit(void);
 #else
-#define setup_summit()	{}
+static inline void setup_summit(void) {}
 #endif
 
 static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
@@ -291,33 +291,21 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
 
 static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	int cpus_found = 0;
-	int num_bits_set;
-	int apicid;
-	int cpu;
+	unsigned int round = 0;
+	int cpu, apicid = 0;
 
-	num_bits_set = cpus_weight(*cpumask);
-	if (num_bits_set >= nr_cpu_ids)
-		return BAD_APICID;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.
 	 */
-	cpu = first_cpu(*cpumask);
-	apicid = summit_cpu_to_logical_apicid(cpu);
-
-	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask)) {
-			int new_apicid = summit_cpu_to_logical_apicid(cpu);
+	for_each_cpu(cpu, cpumask) {
+		int new_apicid = summit_cpu_to_logical_apicid(cpu);
 
-			if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
-				printk("%s: Not a valid mask!\n", __func__);
-
-				return BAD_APICID;
-			}
-			apicid = apicid | new_apicid;
-			cpus_found++;
+		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
+			printk("%s: Not a valid mask!\n", __func__);
+			return BAD_APICID;
 		}
-		cpu++;
+		apicid |= new_apicid;
+		round++;
 	}
 	return apicid;
 }
@@ -372,15 +360,15 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
 }
 
 #ifdef CONFIG_X86_SUMMIT_NUMA
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
+static struct rio_table_hdr *rio_table_hdr;
+static struct scal_detail   *scal_devs[MAX_NUMNODES];
+static struct rio_detail    *rio_devs[MAX_NUMNODES*4];
 
 #ifndef CONFIG_X86_NUMAQ
-static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+static int mp_bus_id_to_node[MAX_MP_BUSSES];
 #endif
 
-static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 {
 	int twister = 0, node = 0;
 	int i, bus, num_buses;
@@ -442,7 +430,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 	return bus;
 }
 
-static int __init build_detail_arrays(void)
+static int build_detail_arrays(void)
 {
 	unsigned long ptr;
 	int i, scal_detail_size, rio_detail_size;
@@ -476,7 +464,7 @@ static int __init build_detail_arrays(void)
 	return 1;
 }
 
-void __init setup_summit(void)
+void setup_summit(void)
 {
 	unsigned long		ptr;
 	unsigned short		offset;
@@ -574,7 +562,6 @@ struct apic apic_summit = {
 	.send_IPI_all			= summit_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 354b9c45601..8fb87b6dd63 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 5bcb174409b..23625b9f98b 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 20b4ad07c3a..1bd6da1f8fa 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,28 +7,28 @@
  *
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -91,24 +91,28 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	cpumask_set_cpu(cpu, retmask);
 }
 
-int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
+#ifdef CONFIG_SMP
 	unsigned long val;
 	int pnode;
 
 	pnode = uv_apicid_to_pnode(phys_apicid);
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_INIT;
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 	mdelay(10);
 
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_STARTUP;
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+	atomic_set(&init_deasserted, 1);
+#endif
 	return 0;
 }
 
@@ -285,7 +289,7 @@ struct apic apic_x2apic_uv_x = {
 	.send_IPI_all			= uv_send_IPI_all,
 	.send_IPI_self			= uv_send_IPI_self,
 
-	.wakeup_cpu			= NULL,
+	.wakeup_secondary_cpu		= uv_wakeup_secondary,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
@@ -365,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
 	paddr = base << shift;
 	bytes = (1UL << shift) * (max_pnode + 1);
 	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
-	       					paddr + bytes);
+						paddr + bytes);
 	if (map_type == map_uc)
 		init_extra_mapping_uc(paddr, bytes);
 	else
@@ -528,7 +532,7 @@ late_initcall(uv_init_heartbeat);
 
 /*
  * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
+ * FIXME: hotplug not supported yet
  */
 void __cpuinit uv_cpu_init(void)
 {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 25423a5b80e..f47df59016c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/numa_64.h>
@@ -141,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 	}
 }
 
+static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	/* calling is from identify_secondary_cpu() ? */
+	if (c->cpu_index == boot_cpu_id)
+		return;
+
+	/*
+	 * Certain Athlons might work (for various values of 'work') in SMP
+	 * but they are not certified as MP capable.
+	 */
+	/* Athlon 660/661 is valid. */
+	if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
+	    (c->x86_mask == 1)))
+		goto valid_k7;
+
+	/* Duron 670 is valid */
+	if ((c->x86_model == 7) && (c->x86_mask == 0))
+		goto valid_k7;
+
+	/*
+	 * Athlon 662, Duron 671, and Athlon >model 7 have capability
+	 * bit. It's worth noting that the A5 stepping (662) of some
+	 * Athlon XP's have the MP bit set.
+	 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
+	 * more.
+	 */
+	if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
+	    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+	     (c->x86_model > 7))
+		if (cpu_has_mp)
+			goto valid_k7;
+
+	/* If we get here, not a certified SMP capable AMD system. */
+
+	/*
+	 * Don't taint if we are running SMP kernel on a single non-MP
+	 * approved Athlon
+	 */
+	WARN_ONCE(1, "WARNING: This combination of AMD"
+		"processors is not suitable for SMP.\n");
+	if (!test_taint(TAINT_UNSAFE_SMP))
+		add_taint(TAINT_UNSAFE_SMP);
+
+valid_k7:
+	;
+#endif
+}
+
 static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -175,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
 	}
 
 	set_cpu_cap(c, X86_FEATURE_K7);
+
+	amd_k7_smp_check(c);
 }
 #endif
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c..22590cf688a 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (!data)
 		return -ENOMEM;
 
-	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
 	per_cpu(drv_data, cpu) = data;
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9..3178c3acd97 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
 	.name		= "p4-clockmod",
 	.owner		= THIS_MODULE,
 	.attr		= p4clockmod_attr,
-	.hide_interface	= 1,
 };
 
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 25c559ba8d5..191117f1ad5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
 #include <asm/uaccess.h>
 #include <asm/ds.h>
 #include <asm/bugs.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/topology.h>
@@ -110,6 +111,28 @@ static void __cpuinit trap_init_f00f_bug(void)
 }
 #endif
 
+static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	/* calling is from identify_secondary_cpu() ? */
+	if (c->cpu_index == boot_cpu_id)
+		return;
+
+	/*
+	 * Mask B, Pentium, but not Pentium MMX
+	 */
+	if (c->x86 == 5 &&
+	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+	    c->x86_model <= 3) {
+		/*
+		 * Remember we have B step Pentia with bugs
+		 */
+		WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
+				    "with B stepping processors.\n");
+	}
+#endif
+}
+
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 {
 	unsigned long lo, hi;
@@ -186,6 +209,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_NUMAQ
 	numaq_tsc_disable();
 #endif
+
+	intel_smp_check(c);
 }
 #else
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 01b1244ef1c..d67e0e48bc2 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,11 +7,10 @@
 /*
  *	Get CPU information for use by the procfs.
  */
-#ifdef CONFIG_X86_32
 static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 			      unsigned int cpu)
 {
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
 	if (c->x86_max_cores * smp_num_siblings > 1) {
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
 		seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
 #endif
 }
 
+#ifdef CONFIG_X86_32
 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
 	/*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 		   c->wp_works_ok ? "yes" : "no");
 }
 #else
-static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
-			      unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	if (c->x86_max_cores * smp_num_siblings > 1) {
-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n",
-			   cpus_weight(per_cpu(cpu_core_map, cpu)));
-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
-		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
-	}
-#endif
-}
-
 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
 	seq_printf(m,
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 169a120587b..87b67e3a765 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
 	ds_resume_pebs(tracer);
 
 	return tracer;
@@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
 
 void ds_exit_thread(struct task_struct *tsk)
 {
-	WARN_ON(tsk->thread.ds_ctx);
 }
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index b205272ad39..1736acc4d7a 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -469,7 +469,7 @@ void __init efi_enter_virtual_mode(void)
 	efi_memory_desc_t *md;
 	efi_status_t status;
 	unsigned long size;
-	u64 end, systab, addr, npages;
+	u64 end, systab, addr, npages, end_pfn;
 	void *p, *va;
 
 	efi.systab = NULL;
@@ -481,7 +481,10 @@ void __init efi_enter_virtual_mode(void)
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		if (PFN_UP(end) <= max_low_pfn_mapped)
+		end_pfn = PFN_UP(end);
+		if (end_pfn <= max_low_pfn_mapped
+		    || (end_pfn > (1UL << (32 - PAGE_SHIFT))
+			&& end_pfn <= max_pfn_mapped))
 			va = __va(md->phys_addr);
 		else
 			va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index a4ee29127fd..22c3b7828c5 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -100,24 +100,11 @@ void __init efi_call_phys_epilog(void)
 
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
-	static unsigned pages_mapped __initdata;
-	unsigned i, pages;
-	unsigned long offset;
+	unsigned long last_map_pfn;
 
-	pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-
-	if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
 		return NULL;
 
-	for (i = 0; i < pages; i++) {
-		__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
-			     phys_addr, PAGE_KERNEL);
-		phys_addr += PAGE_SIZE;
-		pages_mapped++;
-	}
-
-	return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
-					     (pages_mapped - pages)) + offset;
+	return (void __iomem *)__va(phys_addr);
 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 83d1836b946..7ba4621c0df 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -984,6 +984,8 @@ apicinterrupt UV_BAU_MESSAGE \
 #endif
 apicinterrupt LOCAL_TIMER_VECTOR \
 	apic_timer_interrupt smp_apic_timer_interrupt
+apicinterrupt GENERIC_INTERRUPT_VECTOR \
+	generic_interrupt smp_generic_interrupt
 
 #ifdef CONFIG_SMP
 apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b0f61f0dcd0..f2f8540a7f3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk)
 #ifdef CONFIG_X86_32
 	if (!HAVE_HWFP) {
 		memset(tsk->thread.xstate, 0, xstate_size);
-		finit();
+		finit_task(tsk);
 		set_stopped_child_used_math(tsk);
 		return 0;
 	}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e41980a373a..99c4d308f16 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	t->io_bitmap_max = bytes;
 
-#ifdef CONFIG_X86_32
-	/*
-	 * Sets the lazy trigger so that the next I/O operation will
-	 * reload the correct bitmap.
-	 * Reset the owner so that a process switch will not set
-	 * tss->io_bitmap_base to IO_BITMAP_OFFSET.
-	 */
-	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-	tss->io_bitmap_owner = NULL;
-#else
 	/* Update the TSS: */
 	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-#endif
 
 	put_cpu();
 
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f13ca1650aa..b864341dcc4 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -15,6 +15,9 @@
 
 atomic_t irq_err_count;
 
+/* Function pointer for generic interrupt vector handling */
+void (*generic_interrupt_extension)(void) = NULL;
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -56,6 +59,12 @@ static int show_other_interrupts(struct seq_file *p)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
 	seq_printf(p, "  Local timer interrupts\n");
 #endif
+	if (generic_interrupt_extension) {
+		seq_printf(p, "PLT: ");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+		seq_printf(p, "  Platform interrupts\n");
+	}
 #ifdef CONFIG_SMP
 	seq_printf(p, "RES: ");
 	for_each_online_cpu(j)
@@ -163,6 +172,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 #endif
+	if (generic_interrupt_extension)
+		sum += irq_stats(cpu)->generic_irqs;
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
 	sum += irq_stats(cpu)->irq_call_count;
@@ -226,4 +237,27 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 	return 1;
 }
 
+/*
+ * Handler for GENERIC_INTERRUPT_VECTOR.
+ */
+void smp_generic_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	ack_APIC_irq();
+
+	exit_idle();
+
+	irq_enter();
+
+	inc_irq_stat(generic_irqs);
+
+	if (generic_interrupt_extension)
+		generic_interrupt_extension();
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9dc6b2b2427..3b09634a515 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -16,6 +16,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
+#include <linux/percpu.h>
 
 #include <asm/apic.h>
 
@@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { }
 union irq_ctx {
 	struct thread_info      tinfo;
 	u32                     stack[THREAD_SIZE/sizeof(u32)];
-};
+} __attribute__((aligned(PAGE_SIZE)));
 
-static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
-static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
+static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
 
-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
+static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
 
 static void call_on_stack(void *func, void *stack)
 {
@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 	u32 *isp, arg1, arg2;
 
 	curctx = (union irq_ctx *) current_thread_info();
-	irqctx = hardirq_ctx[smp_processor_id()];
+	irqctx = __get_cpu_var(hardirq_ctx);
 
 	/*
 	 * this is where we switch to the IRQ stack. However, if we are
@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu)
 {
 	union irq_ctx *irqctx;
 
-	if (hardirq_ctx[cpu])
+	if (per_cpu(hardirq_ctx, cpu))
 		return;
 
-	irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+	irqctx = &per_cpu(hardirq_stack, cpu);
 	irqctx->tinfo.task		= NULL;
 	irqctx->tinfo.exec_domain	= NULL;
 	irqctx->tinfo.cpu		= cpu;
 	irqctx->tinfo.preempt_count	= HARDIRQ_OFFSET;
 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
 
-	hardirq_ctx[cpu] = irqctx;
+	per_cpu(hardirq_ctx, cpu) = irqctx;
 
-	irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
+	irqctx = &per_cpu(softirq_stack, cpu);
 	irqctx->tinfo.task		= NULL;
 	irqctx->tinfo.exec_domain	= NULL;
 	irqctx->tinfo.cpu		= cpu;
 	irqctx->tinfo.preempt_count	= 0;
 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
 
-	softirq_ctx[cpu] = irqctx;
+	per_cpu(softirq_ctx, cpu) = irqctx;
 
 	printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
-	       cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+	       cpu, per_cpu(hardirq_ctx, cpu),  per_cpu(softirq_ctx, cpu));
 }
 
 void irq_ctx_exit(int cpu)
 {
-	hardirq_ctx[cpu] = NULL;
+	per_cpu(hardirq_ctx, cpu) = NULL;
 }
 
 asmlinkage void do_softirq(void)
@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void)
 
 	if (local_softirq_pending()) {
 		curctx = current_thread_info();
-		irqctx = softirq_ctx[smp_processor_id()];
+		irqctx = __get_cpu_var(softirq_ctx);
 		irqctx->tinfo.task = curctx->task;
 		irqctx->tinfo.previous_esp = current_stack_pointer;
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 50b8c3a3006..bc132610544 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -175,6 +175,9 @@ void __init native_init_IRQ(void)
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
+	/* generic IPI for platform specific use */
+	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index da481a1e3f3..c7a49e0ffbf 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -147,6 +147,9 @@ static void __init apic_intr_init(void)
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
+	/* generic IPI for platform specific use */
+	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+
 	/* IPI vectors for APIC spurious and error interrupts */
 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a6..e7368c1da01 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
 #include <linux/gfp.h>
+#include <linux/io.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
-#include <asm/io.h>
 #include <asm/apic.h>
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
 		"\tmovl %%eax,%%fs\n"
 		"\tmovl %%eax,%%gs\n"
 		"\tmovl %%eax,%%ss\n"
-		::: "eax", "memory");
+		: : : "eax", "memory");
 #undef STR
 #undef __STR
 }
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
 
 	if (image->preserve_context) {
 #ifdef CONFIG_X86_IO_APIC
-		/* We need to put APICs in legacy mode so that we can
+		/*
+		 * We need to put APICs in legacy mode so that we can
 		 * get timer interrupts in second kernel. kexec/kdump
 		 * paths already have calls to disable_IO_APIC() in
 		 * one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
 		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
 						<< PAGE_SHIFT);
 
-	/* The segment registers are funny things, they have both a
+	/*
+	 * The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
 	 * set to a specific selector, the invisible part is loaded
 	 * with from a table in memory.  At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
 	 * segments, before I zap the gdt with an invalid value.
 	 */
 	load_segments();
-	/* The gdt & idt are now invalid.
+	/*
+	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	set_gdt(phys_to_virt(0),0);
-	set_idt(phys_to_virt(0),0);
+	set_gdt(phys_to_virt(0), 0);
+	set_idt(phys_to_virt(0), 0);
 
 	/* now call it */
 	image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd..89cea4d4467 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
 #include <linux/reboot.h>
 #include <linux/numa.h>
 #include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
-#include <asm/io.h>
+
+static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
+				unsigned long addr)
+{
+	pud_t *pud;
+	pmd_t *pmd;
+	struct page *page;
+	int result = -ENOMEM;
+
+	addr &= PMD_MASK;
+	pgd += pgd_index(addr);
+	if (!pgd_present(*pgd)) {
+		page = kimage_alloc_control_pages(image, 0);
+		if (!page)
+			goto out;
+		pud = (pud_t *)page_address(page);
+		memset(pud, 0, PAGE_SIZE);
+		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+	}
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud)) {
+		page = kimage_alloc_control_pages(image, 0);
+		if (!page)
+			goto out;
+		pmd = (pmd_t *)page_address(page);
+		memset(pmd, 0, PAGE_SIZE);
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+	}
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+	result = 0;
+out:
+	return result;
+}
 
 static void init_level2_page(pmd_t *level2p, unsigned long addr)
 {
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
 		}
 		level3p = (pud_t *)page_address(page);
 		result = init_level3_page(image, level3p, addr, last_addr);
-		if (result) {
+		if (result)
 			goto out;
-		}
 		set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
 		addr += PGDIR_SIZE;
 	}
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
 	if (result)
 		return result;
+	/*
+	 * image->start may be outside 0 ~ max_pfn, for example when
+	 * jump back to original kernel from kexeced kernel
+	 */
+	result = init_one_level2_page(image, level4p, image->start);
+	if (result)
+		return result;
 	return init_transition_pgtable(image, level4p);
 }
 
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list[PAGES_NR];
 	void *control_page;
+	int save_ftrace_enabled;
 
-	tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+	if (kexec_image->preserve_context)
+		save_processor_state();
+#endif
+
+	save_ftrace_enabled = __ftrace_enabled_save();
 
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
+	if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+		/*
+		 * We need to put APICs in legacy mode so that we can
+		 * get timer interrupts in second kernel. kexec/kdump
+		 * paths already have calls to disable_IO_APIC() in
+		 * one form or other. kexec jump path also need
+		 * one.
+		 */
+		disable_IO_APIC();
+#endif
+	}
+
 	control_page = page_address(image->control_code_page) + PAGE_SIZE;
-	memcpy(control_page, relocate_kernel, PAGE_SIZE);
+	memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
 
 	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
+	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
 	page_list[PA_TABLE_PAGE] =
 	  (unsigned long)__pa(page_address(image->control_code_page));
 
-	/* The segment registers are funny things, they have both a
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+						<< PAGE_SHIFT);
+
+	/*
+	 * The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
 	 * set to a specific selector, the invisible part is loaded
 	 * with from a table in memory.  At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
 	 * segments, before I zap the gdt with an invalid value.
 	 */
 	load_segments();
-	/* The gdt & idt are now invalid.
+	/*
+	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	set_gdt(phys_to_virt(0),0);
-	set_idt(phys_to_virt(0),0);
+	set_gdt(phys_to_virt(0), 0);
+	set_idt(phys_to_virt(0), 0);
 
 	/* now call it */
-	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-			image->start);
+	image->start = relocate_kernel((unsigned long)image->head,
+				       (unsigned long)page_list,
+				       image->start,
+				       image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+	if (kexec_image->preserve_context)
+		restore_processor_state();
+#endif
+
+	__ftrace_enabled_restore(save_ftrace_enabled);
 }
 
 void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 37cb1bda1ba..e8192401da4 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -558,6 +558,19 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 
 static struct mpf_intel *mpf_found;
 
+static unsigned long __init get_mpc_size(unsigned long physptr)
+{
+	struct mpc_table *mpc;
+	unsigned long size;
+
+	mpc = early_ioremap(physptr, PAGE_SIZE);
+	size = mpc->length;
+	early_iounmap(mpc, PAGE_SIZE);
+	apic_printk(APIC_VERBOSE, "  mpc: %lx-%lx\n", physptr, physptr + size);
+
+	return size;
+}
+
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
@@ -611,12 +624,16 @@ static void __init __get_smp_config(unsigned int early)
 		construct_default_ISA_mptable(mpf->feature1);
 
 	} else if (mpf->physptr) {
+		struct mpc_table *mpc;
+		unsigned long size;
 
+		size = get_mpc_size(mpf->physptr);
+		mpc = early_ioremap(mpf->physptr, size);
 		/*
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 */
-		if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
+		if (!smp_read_mpc(mpc, early)) {
 #ifdef CONFIG_X86_LOCAL_APIC
 			smp_found_config = 0;
 #endif
@@ -624,8 +641,10 @@ static void __init __get_smp_config(unsigned int early)
 			       "BIOS bug, MP table errors detected!...\n");
 			printk(KERN_ERR "... disabling SMP support. "
 			       "(tell your hw vendor)\n");
+			early_iounmap(mpc, size);
 			return;
 		}
+		early_iounmap(mpc, size);
 
 		if (early)
 			return;
@@ -697,10 +716,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 
 			if (!reserve)
 				return 1;
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
+			reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
 					BOOTMEM_DEFAULT);
 			if (mpf->physptr) {
-				unsigned long size = PAGE_SIZE;
+				unsigned long size = get_mpc_size(mpf->physptr);
 #ifdef CONFIG_X86_32
 				/*
 				 * We cannot access to MPC table to compute
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 87b69d4fac1..6afa5232dbb 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,8 +1,8 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <asm/idle.h>
 #include <linux/smp.h>
+#include <linux/prctl.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/module.h>
@@ -11,6 +11,9 @@
 #include <linux/ftrace.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -56,6 +59,192 @@ void arch_task_cache_init(void)
 }
 
 /*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	struct task_struct *me = current;
+	struct thread_struct *t = &me->thread;
+
+	if (me->thread.io_bitmap_ptr) {
+		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+
+		kfree(t->io_bitmap_ptr);
+		t->io_bitmap_ptr = NULL;
+		clear_thread_flag(TIF_IO_BITMAP);
+		/*
+		 * Careful, clear this in the TSS too:
+		 */
+		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+		t->io_bitmap_max = 0;
+		put_cpu();
+	}
+
+	ds_exit_thread(current);
+}
+
+void flush_thread(void)
+{
+	struct task_struct *tsk = current;
+
+#ifdef CONFIG_X86_64
+	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
+		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
+		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+			clear_tsk_thread_flag(tsk, TIF_IA32);
+		} else {
+			set_tsk_thread_flag(tsk, TIF_IA32);
+			current_thread_info()->status |= TS_COMPAT;
+		}
+	}
+#endif
+
+	clear_tsk_thread_flag(tsk, TIF_DEBUG);
+
+	tsk->thread.debugreg0 = 0;
+	tsk->thread.debugreg1 = 0;
+	tsk->thread.debugreg2 = 0;
+	tsk->thread.debugreg3 = 0;
+	tsk->thread.debugreg6 = 0;
+	tsk->thread.debugreg7 = 0;
+	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+	/*
+	 * Forget coprocessor state..
+	 */
+	tsk->fpu_counter = 0;
+	clear_fpu(tsk);
+	clear_used_math();
+}
+
+static void hard_disable_TSC(void)
+{
+	write_cr4(read_cr4() | X86_CR4_TSD);
+}
+
+void disable_TSC(void)
+{
+	preempt_disable();
+	if (!test_and_set_thread_flag(TIF_NOTSC))
+		/*
+		 * Must flip the CPU state synchronously with
+		 * TIF_NOTSC in the current running context.
+		 */
+		hard_disable_TSC();
+	preempt_enable();
+}
+
+static void hard_enable_TSC(void)
+{
+	write_cr4(read_cr4() & ~X86_CR4_TSD);
+}
+
+static void enable_TSC(void)
+{
+	preempt_disable();
+	if (test_and_clear_thread_flag(TIF_NOTSC))
+		/*
+		 * Must flip the CPU state synchronously with
+		 * TIF_NOTSC in the current running context.
+		 */
+		hard_enable_TSC();
+	preempt_enable();
+}
+
+int get_tsc_mode(unsigned long adr)
+{
+	unsigned int val;
+
+	if (test_thread_flag(TIF_NOTSC))
+		val = PR_TSC_SIGSEGV;
+	else
+		val = PR_TSC_ENABLE;
+
+	return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+	if (val == PR_TSC_SIGSEGV)
+		disable_TSC();
+	else if (val == PR_TSC_ENABLE)
+		enable_TSC();
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+		      struct tss_struct *tss)
+{
+	struct thread_struct *prev, *next;
+
+	prev = &prev_p->thread;
+	next = &next_p->thread;
+
+	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+		ds_switch_to(prev_p, next_p);
+	else if (next->debugctlmsr != prev->debugctlmsr)
+		update_debugctlmsr(next->debugctlmsr);
+
+	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+		set_debugreg(next->debugreg0, 0);
+		set_debugreg(next->debugreg1, 1);
+		set_debugreg(next->debugreg2, 2);
+		set_debugreg(next->debugreg3, 3);
+		/* no 4 and 5 */
+		set_debugreg(next->debugreg6, 6);
+		set_debugreg(next->debugreg7, 7);
+	}
+
+	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+		/* prev and next are different */
+		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+			hard_disable_TSC();
+		else
+			hard_enable_TSC();
+	}
+
+	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+		/*
+		 * Copy the relevant range of the IO bitmap.
+		 * Normally this is 128 bytes or less:
+		 */
+		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+		       max(prev->io_bitmap_max, next->io_bitmap_max));
+	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+		/*
+		 * Clear any possible leftover bits:
+		 */
+		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+	}
+}
+
+int sys_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+int sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
+		       NULL, NULL);
+}
+
+
+/*
  * Idle related variables and functions
  */
 unsigned long boot_option_idle_override = 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 646da41a620..14014d766ca 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -230,55 +230,6 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 }
 EXPORT_SYMBOL(kernel_thread);
 
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
-	/* The process may have allocated an io port bitmap... nuke it. */
-	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
-		struct task_struct *tsk = current;
-		struct thread_struct *t = &tsk->thread;
-		int cpu = get_cpu();
-		struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
-		kfree(t->io_bitmap_ptr);
-		t->io_bitmap_ptr = NULL;
-		clear_thread_flag(TIF_IO_BITMAP);
-		/*
-		 * Careful, clear this in the TSS too:
-		 */
-		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
-		t->io_bitmap_max = 0;
-		tss->io_bitmap_owner = NULL;
-		tss->io_bitmap_max = 0;
-		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
-		put_cpu();
-	}
-
-	ds_exit_thread(current);
-}
-
-void flush_thread(void)
-{
-	struct task_struct *tsk = current;
-
-	tsk->thread.debugreg0 = 0;
-	tsk->thread.debugreg1 = 0;
-	tsk->thread.debugreg2 = 0;
-	tsk->thread.debugreg3 = 0;
-	tsk->thread.debugreg6 = 0;
-	tsk->thread.debugreg7 = 0;
-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
-	/*
-	 * Forget coprocessor state..
-	 */
-	tsk->fpu_counter = 0;
-	clear_fpu(tsk);
-	clear_used_math();
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 	BUG_ON(dead_task->mm);
@@ -366,127 +317,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
-static void hard_disable_TSC(void)
-{
-	write_cr4(read_cr4() | X86_CR4_TSD);
-}
-
-void disable_TSC(void)
-{
-	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOTSC))
-		/*
-		 * Must flip the CPU state synchronously with
-		 * TIF_NOTSC in the current running context.
-		 */
-		hard_disable_TSC();
-	preempt_enable();
-}
-
-static void hard_enable_TSC(void)
-{
-	write_cr4(read_cr4() & ~X86_CR4_TSD);
-}
-
-static void enable_TSC(void)
-{
-	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOTSC))
-		/*
-		 * Must flip the CPU state synchronously with
-		 * TIF_NOTSC in the current running context.
-		 */
-		hard_enable_TSC();
-	preempt_enable();
-}
-
-int get_tsc_mode(unsigned long adr)
-{
-	unsigned int val;
-
-	if (test_thread_flag(TIF_NOTSC))
-		val = PR_TSC_SIGSEGV;
-	else
-		val = PR_TSC_ENABLE;
-
-	return put_user(val, (unsigned int __user *)adr);
-}
-
-int set_tsc_mode(unsigned int val)
-{
-	if (val == PR_TSC_SIGSEGV)
-		disable_TSC();
-	else if (val == PR_TSC_ENABLE)
-		enable_TSC();
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static noinline void
-__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-		 struct tss_struct *tss)
-{
-	struct thread_struct *prev, *next;
-
-	prev = &prev_p->thread;
-	next = &next_p->thread;
-
-	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
-	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
-		ds_switch_to(prev_p, next_p);
-	else if (next->debugctlmsr != prev->debugctlmsr)
-		update_debugctlmsr(next->debugctlmsr);
-
-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg0, 0);
-		set_debugreg(next->debugreg1, 1);
-		set_debugreg(next->debugreg2, 2);
-		set_debugreg(next->debugreg3, 3);
-		/* no 4 and 5 */
-		set_debugreg(next->debugreg6, 6);
-		set_debugreg(next->debugreg7, 7);
-	}
-
-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-		/* prev and next are different */
-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-			hard_disable_TSC();
-		else
-			hard_enable_TSC();
-	}
-
-	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-		/*
-		 * Disable the bitmap via an invalid offset. We still cache
-		 * the previous bitmap owner and the IO bitmap contents:
-		 */
-		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
-		return;
-	}
-
-	if (likely(next == tss->io_bitmap_owner)) {
-		/*
-		 * Previous owner of the bitmap (hence the bitmap content)
-		 * matches the next task, we dont have to do anything but
-		 * to set a valid offset in the TSS:
-		 */
-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-		return;
-	}
-	/*
-	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
-	 * and we let the task to get a GPF in case an I/O instruction
-	 * is performed.  The handler of the GPF will verify that the
-	 * faulting task has a valid I/O bitmap and, it true, does the
-	 * real copy and restart the instruction.  This will save us
-	 * redundant copies when the currently switched task does not
-	 * perform any I/O during its timeslice.
-	 */
-	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-}
 
 /*
  *	switch_to(x,yn) should switch tasks from x to y.
@@ -600,11 +430,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	return prev_p;
 }
 
-int sys_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
 int sys_clone(struct pt_regs *regs)
 {
 	unsigned long clone_flags;
@@ -621,21 +446,6 @@ int sys_clone(struct pt_regs *regs)
 }
 
 /*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-int sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
-/*
  * sys_execve() executes a new program.
  */
 int sys_execve(struct pt_regs *regs)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 836ef6575f0..abb7e6a7f0c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -237,61 +237,6 @@ void show_regs(struct pt_regs *regs)
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
 
-/*
- * Free current thread data structures etc..
- */
-void exit_thread(void)
-{
-	struct task_struct *me = current;
-	struct thread_struct *t = &me->thread;
-
-	if (me->thread.io_bitmap_ptr) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
-
-		kfree(t->io_bitmap_ptr);
-		t->io_bitmap_ptr = NULL;
-		clear_thread_flag(TIF_IO_BITMAP);
-		/*
-		 * Careful, clear this in the TSS too:
-		 */
-		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
-		t->io_bitmap_max = 0;
-		put_cpu();
-	}
-
-	ds_exit_thread(current);
-}
-
-void flush_thread(void)
-{
-	struct task_struct *tsk = current;
-
-	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
-		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
-		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
-			clear_tsk_thread_flag(tsk, TIF_IA32);
-		} else {
-			set_tsk_thread_flag(tsk, TIF_IA32);
-			current_thread_info()->status |= TS_COMPAT;
-		}
-	}
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
-	tsk->thread.debugreg0 = 0;
-	tsk->thread.debugreg1 = 0;
-	tsk->thread.debugreg2 = 0;
-	tsk->thread.debugreg3 = 0;
-	tsk->thread.debugreg6 = 0;
-	tsk->thread.debugreg7 = 0;
-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-	/*
-	 * Forget coprocessor state..
-	 */
-	tsk->fpu_counter = 0;
-	clear_fpu(tsk);
-	clear_used_math();
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 	if (dead_task->mm) {
@@ -425,118 +370,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
-static void hard_disable_TSC(void)
-{
-	write_cr4(read_cr4() | X86_CR4_TSD);
-}
-
-void disable_TSC(void)
-{
-	preempt_disable();
-	if (!test_and_set_thread_flag(TIF_NOTSC))
-		/*
-		 * Must flip the CPU state synchronously with
-		 * TIF_NOTSC in the current running context.
-		 */
-		hard_disable_TSC();
-	preempt_enable();
-}
-
-static void hard_enable_TSC(void)
-{
-	write_cr4(read_cr4() & ~X86_CR4_TSD);
-}
-
-static void enable_TSC(void)
-{
-	preempt_disable();
-	if (test_and_clear_thread_flag(TIF_NOTSC))
-		/*
-		 * Must flip the CPU state synchronously with
-		 * TIF_NOTSC in the current running context.
-		 */
-		hard_enable_TSC();
-	preempt_enable();
-}
-
-int get_tsc_mode(unsigned long adr)
-{
-	unsigned int val;
-
-	if (test_thread_flag(TIF_NOTSC))
-		val = PR_TSC_SIGSEGV;
-	else
-		val = PR_TSC_ENABLE;
-
-	return put_user(val, (unsigned int __user *)adr);
-}
-
-int set_tsc_mode(unsigned int val)
-{
-	if (val == PR_TSC_SIGSEGV)
-		disable_TSC();
-	else if (val == PR_TSC_ENABLE)
-		enable_TSC();
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
-
-static inline void __switch_to_xtra(struct task_struct *prev_p,
-				    struct task_struct *next_p,
-				    struct tss_struct *tss)
-{
-	struct thread_struct *prev, *next;
-
-	prev = &prev_p->thread,
-	next = &next_p->thread;
-
-	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
-	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
-		ds_switch_to(prev_p, next_p);
-	else if (next->debugctlmsr != prev->debugctlmsr)
-		update_debugctlmsr(next->debugctlmsr);
-
-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		loaddebug(next, 0);
-		loaddebug(next, 1);
-		loaddebug(next, 2);
-		loaddebug(next, 3);
-		/* no 4 and 5 */
-		loaddebug(next, 6);
-		loaddebug(next, 7);
-	}
-
-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-		/* prev and next are different */
-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-			hard_disable_TSC();
-		else
-			hard_enable_TSC();
-	}
-
-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-		/*
-		 * Copy the relevant range of the IO bitmap.
-		 * Normally this is 128 bytes or less:
-		 */
-		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-		       max(prev->io_bitmap_max, next->io_bitmap_max));
-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-		/*
-		 * Clear any possible leftover bits:
-		 */
-		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-	}
-}
-
 /*
  *	switch_to(x,y) should switch tasks from x to y.
  *
@@ -694,11 +527,6 @@ void set_personality_64bit(void)
 	current->personality &= ~READ_IMPLIES_EXEC;
 }
 
-asmlinkage long sys_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
 asmlinkage long
 sys_clone(unsigned long clone_flags, unsigned long newsp,
 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
@@ -708,22 +536,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage long sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
-		    NULL, NULL);
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fb2159a5c81..3d9672e59c1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1383,7 +1383,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
 #elif defined CONFIG_IA32_EMULATION
-# define IS_IA32	test_thread_flag(TIF_IA32)
+# define IS_IA32	is_compat_task()
 #else
 # define IS_IA32	0
 #endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1cc18d439bb..2aef36d8aca 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -216,6 +216,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
 		},
 	},
+	{	/* Handle problems with rebooting on Dell XPS710 */
+		.callback = set_bios_reboot,
+		.ident = "Dell XPS710",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+		},
+	},
 	{ }
 };
 
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d2..41235531b11 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
 
 #define PTR(x) (x << 2)
 
-/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
  * ~ control_page + PAGE_SIZE are used as data storage and stack for
  * jumping back
  */
@@ -76,8 +77,10 @@ relocate_kernel:
 	movl	%eax, CP_PA_SWAP_PAGE(%edi)
 	movl	%ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
 
-	/* get physical address of control page now */
-	/* this is impossible after page table switch */
+	/*
+	 * get physical address of control page now
+	 * this is impossible after page table switch
+	 */
 	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
 
 	/* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
 	/* store the start address on the stack */
 	pushl   %edx
 
-	/* Set cr0 to a known state:
+	/*
+	 * Set cr0 to a known state:
 	 *  - Paging disabled
 	 *  - Alignment check disabled
 	 *  - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
 	/* clear cr4 if applicable */
 	testl	%ecx, %ecx
 	jz	1f
-	/* Set cr4 to a known state:
+	/*
+	 * Set cr4 to a known state:
 	 * Setting everything to zero seems safe.
 	 */
 	xorl	%eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
 	call	swap_pages
 	addl	$8, %esp
 
-	/* To be certain of avoiding problems with self-modifying code
+	/*
+	 * To be certain of avoiding problems with self-modifying code
 	 * I need to execute a serializing instruction here.
 	 * So I flush the TLB, it's handy, and not processor dependent.
 	 */
 	xorl	%eax, %eax
 	movl	%eax, %cr3
 
-	/* set all of the registers to known values */
-	/* leave %esp alone */
+	/*
+	 * set all of the registers to known values
+	 * leave %esp alone
+	 */
 
 	testl	%esi, %esi
 	jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a47..4de8f5b3d47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
 #define PTR(x) (x << 3)
 #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define RSP			DATA(0x0)
+#define CR0			DATA(0x8)
+#define CR3			DATA(0x10)
+#define CR4			DATA(0x18)
+
+/* other data */
+#define CP_PA_TABLE_PAGE	DATA(0x20)
+#define CP_PA_SWAP_PAGE		DATA(0x28)
+#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
+
 	.text
 	.align PAGE_SIZE
 	.code64
 	.globl relocate_kernel
 relocate_kernel:
-	/* %rdi indirection_page
+	/*
+	 * %rdi indirection_page
 	 * %rsi page_list
 	 * %rdx start address
+	 * %rcx preserve_context
 	 */
 
+	/* Save the CPU context, used for jumping back */
+	pushq %rbx
+	pushq %rbp
+	pushq %r12
+	pushq %r13
+	pushq %r14
+	pushq %r15
+	pushf
+
+	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
+	movq	%rsp, RSP(%r11)
+	movq	%cr0, %rax
+	movq	%rax, CR0(%r11)
+	movq	%cr3, %rax
+	movq	%rax, CR3(%r11)
+	movq	%cr4, %rax
+	movq	%rax, CR4(%r11)
+
 	/* zero out flags, and disable interrupts */
 	pushq $0
 	popfq
 
-	/* get physical address of control page now */
-	/* this is impossible after page table switch */
+	/*
+	 * get physical address of control page now
+	 * this is impossible after page table switch
+	 */
 	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
 
 	/* get physical address of page table now too */
-	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
+	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
+
+	/* get physical address of swap page now */
+	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
+
+	/* save some information for jumping back */
+	movq	%r9, CP_PA_TABLE_PAGE(%r11)
+	movq	%r10, CP_PA_SWAP_PAGE(%r11)
+	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
 
 	/* Switch to the identity mapped page tables */
-	movq	%rcx, %cr3
+	movq	%r9, %cr3
 
 	/* setup a new stack at the end of the physical control page */
 	lea	PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
 	/* store the start address on the stack */
 	pushq   %rdx
 
-	/* Set cr0 to a known state:
+	/*
+	 * Set cr0 to a known state:
 	 *  - Paging enabled
 	 *  - Alignment check disabled
 	 *  - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
 	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movq	%rax, %cr0
 
-	/* Set cr4 to a known state:
+	/*
+	 * Set cr4 to a known state:
 	 *  - physical address extension enabled
 	 */
 	movq	$X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
 1:
 
 	/* Flush the TLB (needed?) */
-	movq	%rcx, %cr3
+	movq	%r9, %cr3
+
+	movq	%rcx, %r11
+	call	swap_pages
+
+	/*
+	 * To be certain of avoiding problems with self-modifying code
+	 * I need to execute a serializing instruction here.
+	 * So I flush the TLB by reloading %cr3 here, it's handy,
+	 * and not processor dependent.
+	 */
+	movq	%cr3, %rax
+	movq	%rax, %cr3
+
+	/*
+	 * set all of the registers to known values
+	 * leave %rsp alone
+	 */
+
+	testq	%r11, %r11
+	jnz 1f
+	xorq	%rax, %rax
+	xorq	%rbx, %rbx
+	xorq    %rcx, %rcx
+	xorq    %rdx, %rdx
+	xorq    %rsi, %rsi
+	xorq    %rdi, %rdi
+	xorq    %rbp, %rbp
+	xorq	%r8,  %r8
+	xorq	%r9,  %r9
+	xorq	%r10, %r9
+	xorq	%r11, %r11
+	xorq	%r12, %r12
+	xorq	%r13, %r13
+	xorq	%r14, %r14
+	xorq	%r15, %r15
+
+	ret
+
+1:
+	popq	%rdx
+	leaq	PAGE_SIZE(%r10), %rsp
+	call	*%rdx
+
+	/* get the re-entry point of the peer system */
+	movq	0(%rsp), %rbp
+	call	1f
+1:
+	popq	%r8
+	subq	$(1b - relocate_kernel), %r8
+	movq	CP_PA_SWAP_PAGE(%r8), %r10
+	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
+	movq	CP_PA_TABLE_PAGE(%r8), %rax
+	movq	%rax, %cr3
+	lea	PAGE_SIZE(%r8), %rsp
+	call	swap_pages
+	movq	$virtual_mapped, %rax
+	pushq	%rax
+	ret
+
+virtual_mapped:
+	movq	RSP(%r8), %rsp
+	movq	CR4(%r8), %rax
+	movq	%rax, %cr4
+	movq	CR3(%r8), %rax
+	movq	CR0(%r8), %r8
+	movq	%rax, %cr3
+	movq	%r8, %cr0
+	movq	%rbp, %rax
+
+	popf
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbp
+	popq	%rbx
+	ret
 
 	/* Do the copies */
+swap_pages:
 	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
 	xorq	%rdi, %rdi
 	xorq	%rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
 	movq	%rcx,   %rsi  /* For ever source page do a copy */
 	andq	$0xfffffffffffff000, %rsi
 
+	movq	%rdi, %rdx
+	movq	%rsi, %rax
+
+	movq	%r10, %rdi
 	movq	$512,   %rcx
 	rep ; movsq
-	jmp	0b
-3:
-
-	/* To be certain of avoiding problems with self-modifying code
-	 * I need to execute a serializing instruction here.
-	 * So I flush the TLB by reloading %cr3 here, it's handy,
-	 * and not processor dependent.
-	 */
-	movq	%cr3, %rax
-	movq	%rax, %cr3
 
-	/* set all of the registers to known values */
-	/* leave %rsp alone */
+	movq	%rax, %rdi
+	movq	%rdx, %rsi
+	movq	$512,   %rcx
+	rep ; movsq
 
-	xorq	%rax, %rax
-	xorq	%rbx, %rbx
-	xorq    %rcx, %rcx
-	xorq    %rdx, %rdx
-	xorq    %rsi, %rsi
-	xorq    %rdi, %rdi
-	xorq    %rbp, %rbp
-	xorq	%r8,  %r8
-	xorq	%r9,  %r9
-	xorq	%r10, %r9
-	xorq	%r11, %r11
-	xorq	%r12, %r12
-	xorq	%r13, %r13
-	xorq	%r14, %r14
-	xorq	%r15, %r15
+	movq	%rdx, %rdi
+	movq	%r10, %rsi
+	movq	$512,   %rcx
+	rep ; movsq
 
+	lea	PAGE_SIZE(%rax), %rsi
+	jmp	0b
+3:
 	ret
+
+	.globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5b85759e797..f28c56e6bf9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -202,7 +202,9 @@ struct ist_info ist_info;
 #endif
 
 #else
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
+struct cpuinfo_x86 boot_cpu_data __read_mostly = {
+	.x86_phys_bits = MAX_PHYSMEM_BITS,
+};
 EXPORT_SYMBOL(boot_cpu_data);
 #endif
 
@@ -600,19 +602,7 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static int __init default_update_apic(void)
-{
-#ifdef CONFIG_SMP
-	if (!apic->wakeup_cpu)
-		apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
-#endif
-
-	return 0;
-}
-
-static struct x86_quirks default_x86_quirks __initdata = {
-	.update_apic         = default_update_apic,
-};
+static struct x86_quirks default_x86_quirks __initdata;
 
 struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
 
@@ -782,6 +772,9 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	if (efi_enabled)
+		efi_init();
+
 	dmi_scan_machine();
 
 	dmi_check_system(bad_bios_dmi_table);
@@ -801,8 +794,6 @@ void __init setup_arch(char **cmdline_p)
 	insert_resource(&iomem_resource, &data_resource);
 	insert_resource(&iomem_resource, &bss_resource);
 
-	if (efi_enabled)
-		efi_init();
 
 #ifdef CONFIG_X86_32
 	if (ppro_with_ram_bug()) {
@@ -875,9 +866,7 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_initrd();
 
-#ifdef CONFIG_X86_64
 	vsmp_init();
-#endif
 
 	io_delay_init();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d992e6cff73..efa615f2bf4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -7,6 +7,7 @@
 #include <linux/crash_dump.h>
 #include <linux/smp.h>
 #include <linux/topology.h>
+#include <linux/pfn.h>
 #include <asm/sections.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
@@ -41,6 +42,352 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
 };
 EXPORT_SYMBOL(__per_cpu_offset);
 
+/*
+ * On x86_64 symbols referenced from code should be reachable using
+ * 32bit relocations.  Reserve space for static percpu variables in
+ * modules so that they are always served from the first chunk which
+ * is located at the percpu segment base.  On x86_32, anything can
+ * address anywhere.  No need to reserve space in the first chunk.
+ */
+#ifdef CONFIG_X86_64
+#define PERCPU_FIRST_CHUNK_RESERVE	PERCPU_MODULE_RESERVE
+#else
+#define PERCPU_FIRST_CHUNK_RESERVE	0
+#endif
+
+/**
+ * pcpu_need_numa - determine percpu allocation needs to consider NUMA
+ *
+ * If NUMA is not configured or there is only one NUMA node available,
+ * there is no reason to consider NUMA.  This function determines
+ * whether percpu allocation should consider NUMA or not.
+ *
+ * RETURNS:
+ * true if NUMA should be considered; otherwise, false.
+ */
+static bool __init pcpu_need_numa(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	pg_data_t *last = NULL;
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		int node = early_cpu_to_node(cpu);
+
+		if (node_online(node) && NODE_DATA(node) &&
+		    last && last != NODE_DATA(node))
+			return true;
+
+		last = NODE_DATA(node);
+	}
+#endif
+	return false;
+}
+
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
+					unsigned long align)
+{
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = early_cpu_to_node(cpu);
+	void *ptr;
+
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = __alloc_bootmem_nopanic(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
+						   size, align, goal);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return __alloc_bootmem_nopanic(size, align, goal);
+#endif
+}
+
+/*
+ * Remap allocator
+ *
+ * This allocator uses PMD page as unit.  A PMD page is allocated for
+ * each cpu and each is remapped into vmalloc area using PMD mapping.
+ * As PMD page is quite large, only part of it is used for the first
+ * chunk.  Unused part is returned to the bootmem allocator.
+ *
+ * So, the PMD pages are mapped twice - once to the physical mapping
+ * and to the vmalloc area for the first percpu chunk.  The double
+ * mapping does add one more PMD TLB entry pressure but still is much
+ * better than only using 4k mappings while still being NUMA friendly.
+ */
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+static size_t pcpur_size __initdata;
+static void **pcpur_ptrs __initdata;
+
+static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
+{
+	size_t off = (size_t)pageno << PAGE_SHIFT;
+
+	if (off >= pcpur_size)
+		return NULL;
+
+	return virt_to_page(pcpur_ptrs[cpu] + off);
+}
+
+static ssize_t __init setup_pcpu_remap(size_t static_size)
+{
+	static struct vm_struct vm;
+	pg_data_t *last;
+	size_t ptrs_size, dyn_size;
+	unsigned int cpu;
+	ssize_t ret;
+
+	/*
+	 * If large page isn't supported, there's no benefit in doing
+	 * this.  Also, on non-NUMA, embedding is better.
+	 */
+	if (!cpu_has_pse || pcpu_need_numa())
+		return -EINVAL;
+
+	last = NULL;
+	for_each_possible_cpu(cpu) {
+		int node = early_cpu_to_node(cpu);
+
+		if (node_online(node) && NODE_DATA(node) &&
+		    last && last != NODE_DATA(node))
+			goto proceed;
+
+		last = NODE_DATA(node);
+	}
+	return -EINVAL;
+
+proceed:
+	/*
+	 * Currently supports only single page.  Supporting multiple
+	 * pages won't be too difficult if it ever becomes necessary.
+	 */
+	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			       PERCPU_DYNAMIC_RESERVE);
+	if (pcpur_size > PMD_SIZE) {
+		pr_warning("PERCPU: static data is larger than large page, "
+			   "can't use large page\n");
+		return -EINVAL;
+	}
+	dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
+
+	/* allocate pointer array and alloc large pages */
+	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
+	pcpur_ptrs = alloc_bootmem(ptrs_size);
+
+	for_each_possible_cpu(cpu) {
+		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE);
+		if (!pcpur_ptrs[cpu])
+			goto enomem;
+
+		/*
+		 * Only use pcpur_size bytes and give back the rest.
+		 *
+		 * Ingo: The 2MB up-rounding bootmem is needed to make
+		 * sure the partial 2MB page is still fully RAM - it's
+		 * not well-specified to have a PAT-incompatible area
+		 * (unmapped RAM, device memory, etc.) in that hole.
+		 */
+		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
+			     PMD_SIZE - pcpur_size);
+
+		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+	}
+
+	/* allocate address and map */
+	vm.flags = VM_ALLOC;
+	vm.size = num_possible_cpus() * PMD_SIZE;
+	vm_area_register_early(&vm, PMD_SIZE);
+
+	for_each_possible_cpu(cpu) {
+		pmd_t *pmd;
+
+		pmd = populate_extra_pmd((unsigned long)vm.addr
+					 + cpu * PMD_SIZE);
+		set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])),
+				     PAGE_KERNEL_LARGE));
+	}
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Remapped at %p with large pages, static data "
+		"%zu bytes\n", vm.addr, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+				     PERCPU_FIRST_CHUNK_RESERVE,
+				     PMD_SIZE, dyn_size, vm.addr, NULL);
+	goto out_free_ar;
+
+enomem:
+	for_each_possible_cpu(cpu)
+		if (pcpur_ptrs[cpu])
+			free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE);
+	ret = -ENOMEM;
+out_free_ar:
+	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+	return ret;
+}
+#else
+static ssize_t __init setup_pcpu_remap(size_t static_size)
+{
+	return -EINVAL;
+}
+#endif
+
+/*
+ * Embedding allocator
+ *
+ * The first chunk is sized to just contain the static area plus
+ * module and dynamic reserves, and allocated as a contiguous area
+ * using bootmem allocator and used as-is without being mapped into
+ * vmalloc area.  This enables the first chunk to piggy back on the
+ * linear physical PMD mapping and doesn't add any additional pressure
+ * to TLB.  Note that if the needed size is smaller than the minimum
+ * unit size, the leftover is returned to the bootmem allocator.
+ */
+static void *pcpue_ptr __initdata;
+static size_t pcpue_size __initdata;
+static size_t pcpue_unit_size __initdata;
+
+static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
+{
+	size_t off = (size_t)pageno << PAGE_SHIFT;
+
+	if (off >= pcpue_size)
+		return NULL;
+
+	return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
+}
+
+static ssize_t __init setup_pcpu_embed(size_t static_size)
+{
+	unsigned int cpu;
+	size_t dyn_size;
+
+	/*
+	 * If large page isn't supported, there's no benefit in doing
+	 * this.  Also, embedding allocation doesn't play well with
+	 * NUMA.
+	 */
+	if (!cpu_has_pse || pcpu_need_numa())
+		return -EINVAL;
+
+	/* allocate and copy */
+	pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			       PERCPU_DYNAMIC_RESERVE);
+	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
+	dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
+
+	pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
+				       PAGE_SIZE);
+	if (!pcpue_ptr)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
+
+		free_bootmem(__pa(ptr + pcpue_size),
+			     pcpue_unit_size - pcpue_size);
+		memcpy(ptr, __per_cpu_load, static_size);
+	}
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
+		pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+
+	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
+				      PERCPU_FIRST_CHUNK_RESERVE,
+				      pcpue_unit_size, dyn_size,
+				      pcpue_ptr, NULL);
+}
+
+/*
+ * 4k page allocator
+ *
+ * This is the basic allocator.  Static percpu area is allocated
+ * page-by-page and most of initialization is done by the generic
+ * setup function.
+ */
+static struct page **pcpu4k_pages __initdata;
+static int pcpu4k_nr_static_pages __initdata;
+
+static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
+{
+	if (pageno < pcpu4k_nr_static_pages)
+		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+	return NULL;
+}
+
+static void __init pcpu4k_populate_pte(unsigned long addr)
+{
+	populate_extra_pte(addr);
+}
+
+static ssize_t __init setup_pcpu_4k(size_t static_size)
+{
+	size_t pages_size;
+	unsigned int cpu;
+	int i, j;
+	ssize_t ret;
+
+	pcpu4k_nr_static_pages = PFN_UP(static_size);
+
+	/* unaligned allocations can't be freed, round up to page size */
+	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
+			       * sizeof(pcpu4k_pages[0]));
+	pcpu4k_pages = alloc_bootmem(pages_size);
+
+	/* allocate and copy */
+	j = 0;
+	for_each_possible_cpu(cpu)
+		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+			void *ptr;
+
+			ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
+			if (!ptr)
+				goto enomem;
+
+			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
+			pcpu4k_pages[j++] = virt_to_page(ptr);
+		}
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
+		pcpu4k_nr_static_pages, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
+				     PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL,
+				     pcpu4k_populate_pte);
+	goto out_free_ar;
+
+enomem:
+	while (--j >= 0)
+		free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
+	ret = -ENOMEM;
+out_free_ar:
+	free_bootmem(__pa(pcpu4k_pages), pages_size);
+	return ret;
+}
+
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -61,38 +408,35 @@ static inline void setup_percpu_segment(int cpu)
  */
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t size;
-	char *ptr;
-	int cpu;
-
-	/* Copy section for each CPU (we discard the original) */
-	size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+	size_t static_size = __per_cpu_end - __per_cpu_start;
+	unsigned int cpu;
+	unsigned long delta;
+	size_t pcpu_unit_size;
+	ssize_t ret;
 
 	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
 
-	pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
+	/*
+	 * Allocate percpu area.  If PSE is supported, try to make use
+	 * of large page mappings.  Please read comments on top of
+	 * each allocator for details.
+	 */
+	ret = setup_pcpu_remap(static_size);
+	if (ret < 0)
+		ret = setup_pcpu_embed(static_size);
+	if (ret < 0)
+		ret = setup_pcpu_4k(static_size);
+	if (ret < 0)
+		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
+		      static_size, ret);
 
-	for_each_possible_cpu(cpu) {
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-		ptr = alloc_bootmem_pages(size);
-#else
-		int node = early_cpu_to_node(cpu);
-		if (!node_online(node) || !NODE_DATA(node)) {
-			ptr = alloc_bootmem_pages(size);
-			pr_info("cpu %d has no node %d or node-local memory\n",
-				cpu, node);
-			pr_debug("per cpu data for cpu%d at %016lx\n",
-				 cpu, __pa(ptr));
-		} else {
-			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
-			pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
-				cpu, node, __pa(ptr));
-		}
-#endif
+	pcpu_unit_size = ret;
 
-		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
-		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+	/* alrighty, percpu areas up and running */
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu) {
+		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
@@ -125,8 +469,6 @@ void __init setup_per_cpu_areas(void)
 		 */
 		if (cpu == boot_cpu_id)
 			switch_to_new_gdt(cpu);
-
-		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
 
 	/* indicate the early static arrays will soon be gone */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 7cdcd16885e..d2cc6428c58 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -187,40 +187,35 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 /*
  * Set up a signal frame.
  */
-#ifdef CONFIG_X86_32
-static const struct {
-	u16 poplmovl;
-	u32 val;
-	u16 int80;
-} __attribute__((packed)) retcode = {
-	0xb858,		/* popl %eax; movl $..., %eax */
-	__NR_sigreturn,
-	0x80cd,		/* int $0x80 */
-};
-
-static const struct {
-	u8  movl;
-	u32 val;
-	u16 int80;
-	u8  pad;
-} __attribute__((packed)) rt_retcode = {
-	0xb8,		/* movl $..., %eax */
-	__NR_rt_sigreturn,
-	0x80cd,		/* int $0x80 */
-	0
-};
 
 /*
  * Determine which stack to use..
  */
+static unsigned long align_sigframe(unsigned long sp)
+{
+#ifdef CONFIG_X86_32
+	/*
+	 * Align the stack pointer according to the i386 ABI,
+	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
+	 */
+	sp = ((sp + 4) & -16ul) - 4;
+#else /* !CONFIG_X86_32 */
+	sp = round_down(sp, 16) - 8;
+#endif
+	return sp;
+}
+
 static inline void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
-	     void **fpstate)
+	     void __user **fpstate)
 {
-	unsigned long sp;
-
 	/* Default to using normal stack */
-	sp = regs->sp;
+	unsigned long sp = regs->sp;
+
+#ifdef CONFIG_X86_64
+	/* redzone */
+	sp -= 128;
+#endif /* CONFIG_X86_64 */
 
 	/*
 	 * If we are on the alternate signal stack and would overflow it, don't.
@@ -234,30 +229,52 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		if (sas_ss_flags(sp) == 0)
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	} else {
+#ifdef CONFIG_X86_32
 		/* This is the legacy signal stack switching. */
 		if ((regs->ss & 0xffff) != __USER_DS &&
 			!(ka->sa.sa_flags & SA_RESTORER) &&
 				ka->sa.sa_restorer)
 			sp = (unsigned long) ka->sa.sa_restorer;
+#endif /* CONFIG_X86_32 */
 	}
 
 	if (used_math()) {
-		sp = sp - sig_xstate_size;
-		*fpstate = (struct _fpstate *) sp;
+		sp -= sig_xstate_size;
+#ifdef CONFIG_X86_64
+		sp = round_down(sp, 64);
+#endif /* CONFIG_X86_64 */
+		*fpstate = (void __user *)sp;
+
 		if (save_i387_xstate(*fpstate) < 0)
 			return (void __user *)-1L;
 	}
 
-	sp -= frame_size;
-	/*
-	 * Align the stack pointer according to the i386 ABI,
-	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
-	 */
-	sp = ((sp + 4) & -16ul) - 4;
-
-	return (void __user *) sp;
+	return (void __user *)align_sigframe(sp - frame_size);
 }
 
+#ifdef CONFIG_X86_32
+static const struct {
+	u16 poplmovl;
+	u32 val;
+	u16 int80;
+} __attribute__((packed)) retcode = {
+	0xb858,		/* popl %eax; movl $..., %eax */
+	__NR_sigreturn,
+	0x80cd,		/* int $0x80 */
+};
+
+static const struct {
+	u8  movl;
+	u32 val;
+	u16 int80;
+	u8  pad;
+} __attribute__((packed)) rt_retcode = {
+	0xb8,		/* movl $..., %eax */
+	__NR_rt_sigreturn,
+	0x80cd,		/* int $0x80 */
+	0
+};
+
 static int
 __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	      struct pt_regs *regs)
@@ -388,24 +405,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	return 0;
 }
 #else /* !CONFIG_X86_32 */
-/*
- * Determine which stack to use..
- */
-static void __user *
-get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
-{
-	/* Default to using normal stack - redzone*/
-	sp -= 128;
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(sp) == 0)
-			sp = current->sas_ss_sp + current->sas_ss_size;
-	}
-
-	return (void __user *)round_down(sp - size, 64);
-}
-
 static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			    sigset_t *set, struct pt_regs *regs)
 {
@@ -414,15 +413,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	int err = 0;
 	struct task_struct *me = current;
 
-	if (used_math()) {
-		fp = get_stack(ka, regs->sp, sig_xstate_size);
-		frame = (void __user *)round_down(
-			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
-		if (save_i387_xstate(fp) < 0)
-			return -EFAULT;
-	} else
-		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return -EFAULT;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9ce666387f3..ef7d10170c3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -112,11 +112,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-static atomic_t init_deasserted;
-
-
-/* Set if we find a B stepping CPU */
-static int __cpuinitdata smp_b_stepping;
+atomic_t init_deasserted;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
@@ -271,8 +267,6 @@ static void __cpuinit smp_callin(void)
 	cpumask_set_cpu(cpuid, cpu_callin_mask);
 }
 
-static int __cpuinitdata unsafe_smp;
-
 /*
  * Activate a secondary processor.
  */
@@ -340,76 +334,6 @@ notrace static void __cpuinit start_secondary(void *unused)
 	cpu_idle();
 }
 
-static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
-{
-	/*
-	 * Mask B, Pentium, but not Pentium MMX
-	 */
-	if (c->x86_vendor == X86_VENDOR_INTEL &&
-	    c->x86 == 5 &&
-	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
-	    c->x86_model <= 3)
-		/*
-		 * Remember we have B step Pentia with bugs
-		 */
-		smp_b_stepping = 1;
-
-	/*
-	 * Certain Athlons might work (for various values of 'work') in SMP
-	 * but they are not certified as MP capable.
-	 */
-	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
-		if (num_possible_cpus() == 1)
-			goto valid_k7;
-
-		/* Athlon 660/661 is valid. */
-		if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
-		    (c->x86_mask == 1)))
-			goto valid_k7;
-
-		/* Duron 670 is valid */
-		if ((c->x86_model == 7) && (c->x86_mask == 0))
-			goto valid_k7;
-
-		/*
-		 * Athlon 662, Duron 671, and Athlon >model 7 have capability
-		 * bit. It's worth noting that the A5 stepping (662) of some
-		 * Athlon XP's have the MP bit set.
-		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
-		 * more.
-		 */
-		if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
-		    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
-		     (c->x86_model > 7))
-			if (cpu_has_mp)
-				goto valid_k7;
-
-		/* If we get here, not a certified SMP capable AMD system. */
-		unsafe_smp = 1;
-	}
-
-valid_k7:
-	;
-}
-
-static void __cpuinit smp_checks(void)
-{
-	if (smp_b_stepping)
-		printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
-				    "with B stepping processors.\n");
-
-	/*
-	 * Don't taint if we are running SMP kernel on a single non-MP
-	 * approved Athlon
-	 */
-	if (unsafe_smp && num_online_cpus() > 1) {
-		printk(KERN_INFO "WARNING: This combination of AMD"
-			"processors is not suitable for SMP.\n");
-		add_taint(TAINT_UNSAFE_SMP);
-	}
-}
-
 /*
  * The bootstrap kernel entry code has set these up. Save them for
  * a given CPU
@@ -423,7 +347,6 @@ void __cpuinit smp_store_cpu_info(int id)
 	c->cpu_index = id;
 	if (id != 0)
 		identify_secondary_cpu(c);
-	smp_apply_quirks(c);
 }
 
 
@@ -614,12 +537,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	unsigned long send_status, accept_status = 0;
 	int maxlvt, num_starts, j;
 
-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
-		send_status = uv_wakeup_secondary(phys_apicid, start_eip);
-		atomic_set(&init_deasserted, 1);
-		return send_status;
-	}
-
 	maxlvt = lapic_get_maxlvt();
 
 	/*
@@ -748,7 +665,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
  */
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 {
@@ -835,9 +753,13 @@ do_rest:
 	}
 
 	/*
-	 * Starting actual IPI sequence...
+	 * Kick the secondary CPU. Use the method in the APIC driver
+	 * if it's defined - or use an INIT boot APIC message otherwise:
 	 */
-	boot_error = apic->wakeup_cpu(apicid, start_ip);
+	if (apic->wakeup_secondary_cpu)
+		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+	else
+		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
 	if (!boot_error) {
 		/*
@@ -1194,7 +1116,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	pr_debug("Boot done.\n");
 
 	impress_friends();
-	smp_checks();
 #ifdef CONFIG_X86_IO_APIC
 	setup_ioapic_dest();
 #endif
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f04549afcfe..d038b9c45cf 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	int locals = 0;
 	struct bau_desc *bau_desc;
 
-	WARN_ON(!in_atomic());
-
 	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
 	uv_cpu = uv_blade_processor_id();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c05430ac1b4..a1d288327ff 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -118,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
 	if (!user_mode_vm(regs))
 		die(str, regs, err);
 }
-
-/*
- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
- * invalid offset set (the LAZY one) and the faulting thread has
- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
- * we set the offset field correctly and return 1.
- */
-static int lazy_iobitmap_copy(void)
-{
-	struct thread_struct *thread;
-	struct tss_struct *tss;
-	int cpu;
-
-	cpu = get_cpu();
-	tss = &per_cpu(init_tss, cpu);
-	thread = &current->thread;
-
-	if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
-	    thread->io_bitmap_ptr) {
-		memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
-		       thread->io_bitmap_max);
-		/*
-		 * If the previously set map was extending to higher ports
-		 * than the current one, pad extra space with 0xff (no access).
-		 */
-		if (thread->io_bitmap_max < tss->io_bitmap_max) {
-			memset((char *) tss->io_bitmap +
-				thread->io_bitmap_max, 0xff,
-				tss->io_bitmap_max - thread->io_bitmap_max);
-		}
-		tss->io_bitmap_max = thread->io_bitmap_max;
-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-		tss->io_bitmap_owner = thread;
-		put_cpu();
-
-		return 1;
-	}
-	put_cpu();
-
-	return 0;
-}
 #endif
 
 static void __kprobes
@@ -309,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
-	if (lazy_iobitmap_copy()) {
-		/* restart the faulting instruction */
-		return;
-	}
-
 	if (regs->flags & X86_VM_MASK)
 		goto gp_in_vm86;
 #endif
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
new file mode 100644
index 00000000000..2ffb6c53326
--- /dev/null
+++ b/arch/x86/kernel/uv_time.c
@@ -0,0 +1,393 @@
+/*
+ * SGI RTC clock/timer routines.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2009 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Dimitri Sivanich
+ */
+#include <linux/clockchips.h>
+
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/cpu.h>
+
+#define RTC_NAME		"sgi_rtc"
+
+static cycle_t uv_read_rtc(void);
+static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
+static void uv_rtc_timer_setup(enum clock_event_mode,
+				struct clock_event_device *);
+
+static struct clocksource clocksource_uv = {
+	.name		= RTC_NAME,
+	.rating		= 400,
+	.read		= uv_read_rtc,
+	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
+	.shift		= 10,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static struct clock_event_device clock_event_device_uv = {
+	.name		= RTC_NAME,
+	.features	= CLOCK_EVT_FEAT_ONESHOT,
+	.shift		= 20,
+	.rating		= 400,
+	.irq		= -1,
+	.set_next_event	= uv_rtc_next_event,
+	.set_mode	= uv_rtc_timer_setup,
+	.event_handler	= NULL,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
+
+/* There is one of these allocated per node */
+struct uv_rtc_timer_head {
+	spinlock_t	lock;
+	/* next cpu waiting for timer, local node relative: */
+	int		next_cpu;
+	/* number of cpus on this node: */
+	int		ncpus;
+	struct {
+		int	lcpu;		/* systemwide logical cpu number */
+		u64	expires;	/* next timer expiration for this cpu */
+	} cpu[1];
+};
+
+/*
+ * Access to uv_rtc_timer_head via blade id.
+ */
+static struct uv_rtc_timer_head		**blade_info __read_mostly;
+
+static int				uv_rtc_enable;
+
+/*
+ * Hardware interface routines
+ */
+
+/* Send IPIs to another node */
+static void uv_rtc_send_IPI(int cpu)
+{
+	unsigned long apicid, val;
+	int pnode;
+
+	apicid = cpu_physical_id(cpu);
+	pnode = uv_apicid_to_pnode(apicid);
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	      (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	      (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
+
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+}
+
+/* Check for an RTC interrupt pending */
+static int uv_intr_pending(int pnode)
+{
+	return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
+		UVH_EVENT_OCCURRED0_RTC1_MASK;
+}
+
+/* Setup interrupt and return non-zero if early expiration occurred. */
+static int uv_setup_intr(int cpu, u64 expires)
+{
+	u64 val;
+	int pnode = uv_cpu_to_pnode(cpu);
+
+	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+		UVH_RTC1_INT_CONFIG_M_MASK);
+	uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
+
+	uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
+		UVH_EVENT_OCCURRED0_RTC1_MASK);
+
+	val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+		((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+
+	/* Set configuration */
+	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
+	/* Initialize comparator value */
+	uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
+
+	return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
+}
+
+/*
+ * Per-cpu timer tracking routines
+ */
+
+static __init void uv_rtc_deallocate_timers(void)
+{
+	int bid;
+
+	for_each_possible_blade(bid) {
+		kfree(blade_info[bid]);
+	}
+	kfree(blade_info);
+}
+
+/* Allocate per-node list of cpu timer expiration times. */
+static __init int uv_rtc_allocate_timers(void)
+{
+	int cpu;
+
+	blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
+	if (!blade_info)
+		return -ENOMEM;
+	memset(blade_info, 0, uv_possible_blades * sizeof(void *));
+
+	for_each_present_cpu(cpu) {
+		int nid = cpu_to_node(cpu);
+		int bid = uv_cpu_to_blade_id(cpu);
+		int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+		struct uv_rtc_timer_head *head = blade_info[bid];
+
+		if (!head) {
+			head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
+				(uv_blade_nr_possible_cpus(bid) *
+					2 * sizeof(u64)),
+				GFP_KERNEL, nid);
+			if (!head) {
+				uv_rtc_deallocate_timers();
+				return -ENOMEM;
+			}
+			spin_lock_init(&head->lock);
+			head->ncpus = uv_blade_nr_possible_cpus(bid);
+			head->next_cpu = -1;
+			blade_info[bid] = head;
+		}
+
+		head->cpu[bcpu].lcpu = cpu;
+		head->cpu[bcpu].expires = ULLONG_MAX;
+	}
+
+	return 0;
+}
+
+/* Find and set the next expiring timer.  */
+static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
+{
+	u64 lowest = ULLONG_MAX;
+	int c, bcpu = -1;
+
+	head->next_cpu = -1;
+	for (c = 0; c < head->ncpus; c++) {
+		u64 exp = head->cpu[c].expires;
+		if (exp < lowest) {
+			bcpu = c;
+			lowest = exp;
+		}
+	}
+	if (bcpu >= 0) {
+		head->next_cpu = bcpu;
+		c = head->cpu[bcpu].lcpu;
+		if (uv_setup_intr(c, lowest))
+			/* If we didn't set it up in time, trigger */
+			uv_rtc_send_IPI(c);
+	} else {
+		uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
+			UVH_RTC1_INT_CONFIG_M_MASK);
+	}
+}
+
+/*
+ * Set expiration time for current cpu.
+ *
+ * Returns 1 if we missed the expiration time.
+ */
+static int uv_rtc_set_timer(int cpu, u64 expires)
+{
+	int pnode = uv_cpu_to_pnode(cpu);
+	int bid = uv_cpu_to_blade_id(cpu);
+	struct uv_rtc_timer_head *head = blade_info[bid];
+	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+	u64 *t = &head->cpu[bcpu].expires;
+	unsigned long flags;
+	int next_cpu;
+
+	spin_lock_irqsave(&head->lock, flags);
+
+	next_cpu = head->next_cpu;
+	*t = expires;
+	/* Will this one be next to go off? */
+	if (next_cpu < 0 || bcpu == next_cpu ||
+			expires < head->cpu[next_cpu].expires) {
+		head->next_cpu = bcpu;
+		if (uv_setup_intr(cpu, expires)) {
+			*t = ULLONG_MAX;
+			uv_rtc_find_next_timer(head, pnode);
+			spin_unlock_irqrestore(&head->lock, flags);
+			return 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&head->lock, flags);
+	return 0;
+}
+
+/*
+ * Unset expiration time for current cpu.
+ *
+ * Returns 1 if this timer was pending.
+ */
+static int uv_rtc_unset_timer(int cpu)
+{
+	int pnode = uv_cpu_to_pnode(cpu);
+	int bid = uv_cpu_to_blade_id(cpu);
+	struct uv_rtc_timer_head *head = blade_info[bid];
+	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+	u64 *t = &head->cpu[bcpu].expires;
+	unsigned long flags;
+	int rc = 0;
+
+	spin_lock_irqsave(&head->lock, flags);
+
+	if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
+		rc = 1;
+
+	*t = ULLONG_MAX;
+
+	/* Was the hardware setup for this timer? */
+	if (head->next_cpu == bcpu)
+		uv_rtc_find_next_timer(head, pnode);
+
+	spin_unlock_irqrestore(&head->lock, flags);
+
+	return rc;
+}
+
+
+/*
+ * Kernel interface routines.
+ */
+
+/*
+ * Read the RTC.
+ */
+static cycle_t uv_read_rtc(void)
+{
+	return (cycle_t)uv_read_local_mmr(UVH_RTC);
+}
+
+/*
+ * Program the next event, relative to now
+ */
+static int uv_rtc_next_event(unsigned long delta,
+			     struct clock_event_device *ced)
+{
+	int ced_cpu = cpumask_first(ced->cpumask);
+
+	return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+}
+
+/*
+ * Setup the RTC timer in oneshot mode
+ */
+static void uv_rtc_timer_setup(enum clock_event_mode mode,
+			       struct clock_event_device *evt)
+{
+	int ced_cpu = cpumask_first(evt->cpumask);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here yet */
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		uv_rtc_unset_timer(ced_cpu);
+		break;
+	}
+}
+
+static void uv_rtc_interrupt(void)
+{
+	struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+	int cpu = smp_processor_id();
+
+	if (!ced || !ced->event_handler)
+		return;
+
+	if (uv_rtc_unset_timer(cpu) != 1)
+		return;
+
+	ced->event_handler(ced);
+}
+
+static int __init uv_enable_rtc(char *str)
+{
+	uv_rtc_enable = 1;
+
+	return 1;
+}
+__setup("uvrtc", uv_enable_rtc);
+
+static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
+{
+	struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
+
+	*ced = clock_event_device_uv;
+	ced->cpumask = cpumask_of(smp_processor_id());
+	clockevents_register_device(ced);
+}
+
+static __init int uv_rtc_setup_clock(void)
+{
+	int rc;
+
+	if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension)
+		return -ENODEV;
+
+	generic_interrupt_extension = uv_rtc_interrupt;
+
+	clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
+				clocksource_uv.shift);
+
+	rc = clocksource_register(&clocksource_uv);
+	if (rc) {
+		generic_interrupt_extension = NULL;
+		return rc;
+	}
+
+	/* Setup and register clockevents */
+	rc = uv_rtc_allocate_timers();
+	if (rc) {
+		clocksource_unregister(&clocksource_uv);
+		generic_interrupt_extension = NULL;
+		return rc;
+	}
+
+	clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
+				NSEC_PER_SEC, clock_event_device_uv.shift);
+
+	clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
+						sn_rtc_cycles_per_second;
+
+	clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
+				(NSEC_PER_SEC / sn_rtc_cycles_per_second);
+
+	rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
+	if (rc) {
+		clocksource_unregister(&clocksource_uv);
+		generic_interrupt_extension = NULL;
+		uv_rtc_deallocate_timers();
+	}
+
+	return rc;
+}
+arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e8..31ffc24eec4 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
 static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 {
 	int realirq;
-	irq_desc_t *desc;
+	struct irq_desc *desc;
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f680..5bf54e40c6e 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
 ASSERT((per_cpu__irq_stack_union == 0),
         "irq_stack_union is not at start of per-cpu area");
 #endif
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+       "kexec control code size is too big")
+#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index c609205df59..74de562812c 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -22,7 +22,7 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT
 /*
  * Interrupt control on vSMPowered systems:
  * ~AC is a shadow of IF.  If IF is 'on' AC should be 'off'
@@ -114,7 +114,6 @@ static void __init set_vsmp_pv_ops(void)
 }
 #endif
 
-#ifdef CONFIG_PCI
 static int is_vsmp = -1;
 
 static void __init detect_vsmp_box(void)
@@ -139,15 +138,6 @@ int is_vsmp_box(void)
 		return 0;
 	}
 }
-#else
-static void __init detect_vsmp_box(void)
-{
-}
-int is_vsmp_box(void)
-{
-	return 0;
-}
-#endif
 
 void __init vsmp_init(void)
 {