summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/boot/compressed/Makefile5
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c2
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/amd_nb.h13
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/boot.h6
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/gpio.h5
-rw-r--r--arch/x86/include/asm/hypervisor.h12
-rw-r--r--arch/x86/include/asm/io_apic.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mach_traps.h12
-rw-r--r--arch/x86/include/asm/nmi.h20
-rw-r--r--arch/x86/include/asm/numa_64.h2
-rw-r--r--arch/x86/include/asm/olpc.h10
-rw-r--r--arch/x86/include/asm/percpu.h158
-rw-r--r--arch/x86/include/asm/perf_event_p4.h3
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h35
-rw-r--r--arch/x86/kernel/acpi/boot.c48
-rw-r--r--arch/x86/kernel/amd_nb.c7
-rw-r--r--arch/x86/kernel/aperture_64.c44
-rw-r--r--arch/x86/kernel/apic/apic.c73
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c3
-rw-r--r--arch/x86/kernel/apic/io_apic.c32
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c4
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/mpparse.c114
-rw-r--r--arch/x86/kernel/process.c4
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/rtc.c2
-rw-r--r--arch/x86/kernel/smpboot.c18
-rw-r--r--arch/x86/kernel/traps.c102
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/mm/amdtopology_64.c87
-rw-r--r--arch/x86/mm/numa_64.c157
-rw-r--r--arch/x86/mm/srat_64.c26
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c8
-rw-r--r--arch/x86/pci/amd_bus.c33
-rw-r--r--arch/x86/platform/mrst/mrst.c30
-rw-r--r--arch/x86/platform/sfi/sfi.c13
-rw-r--r--arch/x86/xen/enlighten.c44
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/spinlock.c8
-rw-r--r--arch/x86/xen/time.c8
69 files changed, 848 insertions, 563 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b6fccb07123..6e15fa9455e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -51,6 +51,7 @@ config X86
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+ select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
@@ -65,6 +66,7 @@ config X86
select HAVE_SPARSE_IRQ
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
+ select USE_GENERIC_SMP_HELPERS if SMP
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
@@ -203,10 +205,6 @@ config HAVE_INTEL_TXT
def_bool y
depends on EXPERIMENTAL && DMAR && ACPI
-config USE_GENERIC_SMP_HELPERS
- def_bool y
- depends on SMP
-
config X86_32_SMP
def_bool y
depends on X86_32 && SMP
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ac9069890c..15588a0ef46 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
+config CMPXCHG_LOCAL
+ def_bool X86_64 || (X86_32 && !M386)
+
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0c229551eea..09664efb9ce 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -49,12 +49,15 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
$(call if_changed,bzip2)
$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzma)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,xzkern)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
+suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
quiet_cmd_mkpiggy = MKPIGGY $@
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 325c05294fc..3a19d04cebe 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -139,6 +139,10 @@ static int lines, cols;
#include "../../../../lib/decompress_unlzma.c"
#endif
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
#ifdef CONFIG_KERNEL_LZO
#include "../../../../lib/decompress_unlzo.c"
#endif
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 5c228129d17..646aa78ba5f 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -74,7 +74,7 @@ int main(int argc, char *argv[])
offs = (olen > ilen) ? olen - ilen : 0;
offs += olen >> 12; /* Add 8 bytes for each 32K block */
- offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */
+ offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 55d106b5e31..211ca3f7fd1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -185,17 +185,16 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
-extern int acpi_get_nodes(struct bootnode *physnodes);
+extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
+ unsigned long end);
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+
+#ifdef CONFIG_NUMA_EMU
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
int num_nodes);
-#else
-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes)
-{
-}
#endif
+#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 6aee50d655d..64dc82ee19f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,16 +3,27 @@
#include <linux/pci.h>
+struct amd_nb_bus_dev_range {
+ u8 bus;
+ u8 dev_base;
+ u8 dev_limit;
+};
+
extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
-extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
+#ifdef CONFIG_NUMA_EMU
+extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
+extern void amd_get_nodes(struct bootnode *nodes);
+#endif
+
struct amd_northbridge {
struct pci_dev *misc;
};
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index cf12007796d..5e3969c36d7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -234,6 +234,7 @@ extern void init_bsp_APIC(void);
extern void setup_local_APIC(void);
extern void end_local_APIC_setup(void);
extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
@@ -244,7 +245,6 @@ extern int apic_force_enable(void);
* On 32bit this is mach-xxx local
*/
#ifdef CONFIG_X86_64
-extern void early_init_lapic_mapping(void);
extern int apic_is_clustered_box(void);
#else
static inline int apic_is_clustered_box(void)
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 3b62ab56c7a..5e1a2eef3e7 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -32,11 +32,7 @@
#define BOOT_HEAP_SIZE 0x400000
#else /* !CONFIG_KERNEL_BZIP2 */
-#ifdef CONFIG_X86_64
-#define BOOT_HEAP_SIZE 0x7000
-#else
-#define BOOT_HEAP_SIZE 0x4000
-#endif
+#define BOOT_HEAP_SIZE 0x8000
#endif /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b81002f2361..078ad0caefc 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -94,7 +94,7 @@ static inline void hw_breakpoint_disable(void)
static inline int hw_breakpoint_active(void)
{
- return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
+ return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
}
extern void aout_dump_debugregs(struct user *dump);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0141b234406..4729b2b6311 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -116,11 +116,11 @@ enum fixed_addresses {
#endif
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
- __end_of_permanent_fixed_addresses,
-
#ifdef CONFIG_X86_MRST
FIX_LNW_VRTC,
#endif
+ __end_of_permanent_fixed_addresses,
+
/*
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h
index 49dbfdfa50f..91d915a6525 100644
--- a/arch/x86/include/asm/gpio.h
+++ b/arch/x86/include/asm/gpio.h
@@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio)
return __gpio_cansleep(gpio);
}
-/*
- * Not implemented, yet.
- */
static inline int gpio_to_irq(unsigned int gpio)
{
- return -ENOSYS;
+ return __gpio_to_irq(gpio);
}
static inline int irq_to_gpio(unsigned int irq)
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index ff2546ce717..7a15153c675 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,6 +20,9 @@
#ifndef _ASM_X86_HYPERVISOR_H
#define _ASM_X86_HYPERVISOR_H
+#include <asm/kvm_para.h>
+#include <asm/xen/hypervisor.h>
+
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
@@ -47,4 +50,13 @@ extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+static inline bool hypervisor_x2apic_available(void)
+{
+ if (kvm_para_available())
+ return true;
+ if (xen_x2apic_para_available())
+ return true;
+ return false;
+}
+
#endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 0c5ca4e30d7..f327d386d6c 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int get_nr_irqs_gsi(void);
+
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
@@ -183,6 +184,8 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi);
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void __init pre_init_apic_IRQ0(void);
+extern void mp_save_irq(struct mpc_intsrc *m);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f23eb252846..ca242d35e87 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
DIE_TRAP,
DIE_GPF,
DIE_CALL,
- DIE_NMI_IPI,
DIE_PAGE_FAULT,
DIE_NMIUNKNOWN,
};
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index f7920601e47..72a8b52e7df 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
#include <asm/mc146818rtc.h>
+#define NMI_REASON_PORT 0x61
+
+#define NMI_REASON_SERR 0x80
+#define NMI_REASON_IOCHK 0x40
+#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_SERR 0x04
+#define NMI_REASON_CLEAR_IOCHK 0x08
+#define NMI_REASON_CLEAR_MASK 0x0f
+
static inline unsigned char get_nmi_reason(void)
{
- return inb(0x61);
+ return inb(NMI_REASON_PORT);
}
static inline void reassert_nmi(void)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c4021b95351..c76f5b92b84 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
+/*
+ * Define some priorities for the nmi notifier call chain.
+ *
+ * Create a local nmi bit that has a higher priority than
+ * external nmis, because the local ones are more frequent.
+ *
+ * Also setup some default high/normal/low settings for
+ * subsystems to registers with. Using 4 bits to seperate
+ * the priorities. This can go alot higher if needed be.
+ */
+
+#define NMI_LOCAL_SHIFT 16 /* randomly picked */
+#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT)
+#define NMI_HIGH_PRIOR (1ULL << 8)
+#define NMI_NORMAL_PRIOR (1ULL << 4)
+#define NMI_LOW_PRIOR (1ULL << 0)
+#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
+#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
+#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
+
void stop_nmi(void);
void restart_nmi(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070e7c2..5ae87285a50 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -38,7 +38,7 @@ extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)64 << 20)
+#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
#endif /* CONFIG_NUMA_EMU */
#else
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 42a978c0c1b..f482010350f 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -107,10 +107,14 @@ extern int olpc_ec_mask_unset(uint8_t bits);
/* GPIO assignments */
#define OLPC_GPIO_MIC_AC 1
-#define OLPC_GPIO_DCON_IRQ geode_gpio(7)
+#define OLPC_GPIO_DCON_STAT0 5
+#define OLPC_GPIO_DCON_STAT1 6
+#define OLPC_GPIO_DCON_IRQ 7
#define OLPC_GPIO_THRM_ALRM geode_gpio(10)
-#define OLPC_GPIO_SMB_CLK geode_gpio(14)
-#define OLPC_GPIO_SMB_DATA geode_gpio(15)
+#define OLPC_GPIO_DCON_LOAD 11
+#define OLPC_GPIO_DCON_BLANK 12
+#define OLPC_GPIO_SMB_CLK 14
+#define OLPC_GPIO_SMB_DATA 15
#define OLPC_GPIO_WORKAUX geode_gpio(24)
#define OLPC_GPIO_LID geode_gpio(26)
#define OLPC_GPIO_ECSCI geode_gpio(27)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index f899e01a8ac..8ee45167e81 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -230,6 +230,125 @@ do { \
})
/*
+ * Add return operation
+ */
+#define percpu_add_return_op(var, val) \
+({ \
+ typeof(var) paro_ret__ = val; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("xaddb %0, "__percpu_arg(1) \
+ : "+q" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 2: \
+ asm("xaddw %0, "__percpu_arg(1) \
+ : "+r" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 4: \
+ asm("xaddl %0, "__percpu_arg(1) \
+ : "+r" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ case 8: \
+ asm("xaddq %0, "__percpu_arg(1) \
+ : "+re" (paro_ret__), "+m" (var) \
+ : : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ paro_ret__ += val; \
+ paro_ret__; \
+})
+
+/*
+ * xchg is implemented using cmpxchg without a lock prefix. xchg is
+ * expensive due to the implied lock prefix. The processor cannot prefetch
+ * cachelines if xchg is used.
+ */
+#define percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) pxo_ret__; \
+ typeof(var) pxo_new__ = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("\n1:mov "__percpu_arg(1)",%%al" \
+ "\n\tcmpxchgb %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "q" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("\n1:mov "__percpu_arg(1)",%%ax" \
+ "\n\tcmpxchgw %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("\n1:mov "__percpu_arg(1)",%%eax" \
+ "\n\tcmpxchgl %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("\n1:mov "__percpu_arg(1)",%%rax" \
+ "\n\tcmpxchgq %2, "__percpu_arg(1) \
+ "\n\tjnz 1b" \
+ : "=a" (pxo_ret__), "+m" (var) \
+ : "r" (pxo_new__) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ pxo_ret__; \
+})
+
+/*
+ * cmpxchg has no such implied lock semantics as a result it is much
+ * more efficient for cpu local operations.
+ */
+#define percpu_cmpxchg_op(var, oval, nval) \
+({ \
+ typeof(var) pco_ret__; \
+ typeof(var) pco_old__ = (oval); \
+ typeof(var) pco_new__ = (nval); \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "q" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %2, "__percpu_arg(1) \
+ : "=a" (pco_ret__), "+m" (var) \
+ : "r" (pco_new__), "0" (pco_old__) \
+ : "memory"); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ pco_ret__; \
+})
+
+/*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
* percpu_read_stable() is more efficient and can be used if its value
@@ -267,6 +386,12 @@ do { \
#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+/*
+ * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
+ * faster than an xchg with forced lock semantics.
+ */
+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -286,6 +411,11 @@ do { \
#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
@@ -299,6 +429,31 @@ do { \
#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+#ifndef CONFIG_M386
+#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#endif /* !CONFIG_M386 */
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
@@ -311,6 +466,7 @@ do { \
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -318,12 +474,12 @@ do { \
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
-
#endif
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 295e2ff18a6..e2f6a99f14a 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -20,6 +20,9 @@
#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
#define ARCH_P4_MAX_CCCR (18)
+#define ARCH_P4_CNTRVAL_BITS (40)
+#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
+
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cae9c3cb95c..c6efecf85a6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -141,10 +141,9 @@ extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#define current_cpu_data __get_cpu_var(cpu_info)
#else
+#define cpu_info boot_cpu_data
#define cpu_data(cpu) boot_cpu_data
-#define current_cpu_data boot_cpu_data
#endif
extern const struct seq_operations cpuinfo_op;
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 396ff4cc8ed..66d0fff1ee8 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -37,4 +37,39 @@
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
+#include <asm/processor.h>
+
+static inline uint32_t xen_cpuid_base(void)
+{
+ uint32_t base, eax, ebx, ecx, edx;
+ char signature[13];
+
+ for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ cpuid(base, &eax, &ebx, &ecx, &edx);
+ *(uint32_t *)(signature + 0) = ebx;
+ *(uint32_t *)(signature + 4) = ecx;
+ *(uint32_t *)(signature + 8) = edx;
+ signature[12] = 0;
+
+ if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
+ return base;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_XEN
+extern bool xen_hvm_need_lapic(void);
+
+static inline bool xen_x2apic_para_available(void)
+{
+ return xen_hvm_need_lapic();
+}
+#else
+static inline bool xen_x2apic_para_available(void)
+{
+ return (xen_cpuid_base() != 0);
+}
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 17c8090fabd..ec881c6bfee 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -852,18 +852,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
* returns 0 on success, < 0 on error
*/
-static void __init acpi_register_lapic_address(unsigned long address)
-{
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, address);
- if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
-}
-
static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
{
int count;
@@ -885,7 +873,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
return count;
}
- acpi_register_lapic_address(acpi_lapic_addr);
+ register_lapic_address(acpi_lapic_addr);
return count;
}
@@ -912,7 +900,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}
- acpi_register_lapic_address(acpi_lapic_addr);
+ register_lapic_address(acpi_lapic_addr);
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
acpi_parse_sapic, MAX_LOCAL_APIC);
@@ -954,32 +942,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
extern int es7000_plat;
#endif
-static void assign_to_mp_irq(struct mpc_intsrc *m,
- struct mpc_intsrc *mp_irq)
-{
- memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static void save_mp_irq(struct mpc_intsrc *m)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- if (!mp_irq_cmp(&mp_irqs[i], m))
- return;
- }
-
- assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
@@ -1010,7 +972,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
mp_irq.dstirq = pin; /* INTIN# */
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
isa_irq_to_gsi[bus_irq] = gsi;
}
@@ -1085,7 +1047,7 @@ void __init mp_config_acpi_legacy_irqs(void)
mp_irq.srcbusirq = i; /* Identity mapped */
mp_irq.dstirq = pin;
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
}
}
@@ -1122,7 +1084,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
mp_irq.dstapic = mp_ioapics[ioapic].apicid;
mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
#endif
return 0;
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index affacb5e006..0a99f7198bc 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,13 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
+ { 0x00, 0x18, 0x20 },
+ { 0xff, 0x00, 0x20 },
+ { 0xfe, 0x00, 0x20 },
+ { }
+};
+
struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index dcd7c83e165..5955a7800a9 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -39,18 +39,6 @@ int fallback_aper_force __initdata;
int fix_aperture __initdata = 1;
-struct bus_dev_range {
- int bus;
- int dev_base;
- int dev_limit;
-};
-
-static struct bus_dev_range bus_dev_ranges[] __initdata = {
- { 0x00, 0x18, 0x20},
- { 0xff, 0x00, 0x20},
- { 0xfe, 0x00, 0x20}
-};
-
static struct resource gart_resource = {
.name = "GART",
.flags = IORESOURCE_MEM,
@@ -294,13 +282,13 @@ void __init early_gart_iommu_check(void)
search_agp_bridge(&agp_aper_order, &valid_agp);
fix = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -349,13 +337,13 @@ void __init early_gart_iommu_check(void)
return;
/* disable them all at first */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -390,14 +378,14 @@ int __init gart_iommu_hole_init(void)
fix = 0;
node = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
u32 ctl;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
@@ -505,7 +493,7 @@ out:
}
/* Fix up the north bridges */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus, dev_base, dev_limit;
/*
@@ -514,9 +502,9 @@ out:
*/
u32 ctl = DISTLBWALKPRB | aper_order << 1;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 879999a5230..06c196d7e59 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,8 +49,8 @@
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
-#include <asm/kvm_para.h>
#include <asm/tsc.h>
+#include <asm/hypervisor.h>
unsigned int num_processors;
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
- if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;
@@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void)
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
lapic_clockevent.min_delta_ns =
clockevent_delta2ns(0xF, &lapic_clockevent);
@@ -1191,12 +1191,15 @@ static void __cpuinit lapic_setup_esr(void)
oldvalue, value);
}
-
/**
* setup_local_APIC - setup the local APIC
+ *
+ * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Always called with preemption disabled.
*/
void __cpuinit setup_local_APIC(void)
{
+ int cpu = smp_processor_id();
unsigned int value, queued;
int i, j, acked = 0;
unsigned long long tsc = 0, ntsc;
@@ -1221,8 +1224,6 @@ void __cpuinit setup_local_APIC(void)
#endif
perf_events_lapic_init();
- preempt_disable();
-
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
@@ -1338,21 +1339,19 @@ void __cpuinit setup_local_APIC(void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id() && (pic_mode || !value)) {
+ if (!cpu && (pic_mode || !value)) {
value = APIC_DM_EXTINT;
- apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
- smp_processor_id());
+ apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
- smp_processor_id());
+ apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
}
apic_write(APIC_LVT0, value);
/*
* only the BP should see the LINT1 NMI signal, obviously.
*/
- if (!smp_processor_id())
+ if (!cpu)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -1360,11 +1359,9 @@ void __cpuinit setup_local_APIC(void)
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
- preempt_enable();
-
#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
- if (smp_processor_id() == 0)
+ if (!cpu)
cmci_recheck();
#endif
}
@@ -1479,7 +1476,8 @@ void __init enable_IR_x2apic(void)
/* IR is required if there is APIC ID > 255 even when running
* under KVM
*/
- if (max_physical_apicid > 255 || !kvm_para_available())
+ if (max_physical_apicid > 255 ||
+ !hypervisor_x2apic_available())
goto nox2apic;
/*
* without IR all CPUs can be addressed by IOAPIC/MSI
@@ -1633,28 +1631,6 @@ no_apic:
}
#endif
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
- /*
- * If no local APIC can be found then go out
- * : it means there is no mpatable and MADT
- */
- if (!smp_found_config)
- return;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
- apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
- APIC_BASE, mp_lapic_addr);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
/**
* init_apic_mappings - initialize APIC mappings
*/
@@ -1680,10 +1656,7 @@ void __init init_apic_mappings(void)
* acpi_register_lapic_address()
*/
if (!acpi_lapic && !smp_found_config)
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-
- apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
- APIC_BASE, apic_phys);
+ register_lapic_address(apic_phys);
}
/*
@@ -1705,6 +1678,22 @@ void __init init_apic_mappings(void)
}
}
+void __init register_lapic_address(unsigned long address)
+{
+ mp_lapic_addr = address;
+
+ if (!x2apic_mode) {
+ set_fixmap_nocache(FIX_APIC_BASE, address);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+ APIC_BASE, mp_lapic_addr);
+ }
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+}
+
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 72ec29e1ae0..79fd43ca6f9 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
default:
@@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
static __read_mostly struct notifier_block backtrace_notifier = {
.notifier_call = arch_trigger_all_cpu_backtrace_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static int __init register_trigger_all_cpu_backtrace(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f6cd5b41077..697dc34b7b8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -125,6 +125,26 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
+/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+void mp_save_irq(struct mpc_intsrc *m)
+{
+ int i;
+
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+ m->srcbusirq, m->dstapic, m->dstirq);
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
+ return;
+ }
+
+ memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
struct irq_pin_list {
int apic, pin;
struct irq_pin_list *next;
@@ -135,6 +155,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
+
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -2006,9 +2027,12 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
= mp_ioapics[apic_id].apicid;
/*
- * Read the right value from the MPC table and
- * write it into the ID register.
+ * Update the ID register according to the right value
+ * from the MPC table if they are different.
*/
+ if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
+ continue;
+
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
mp_ioapics[apic_id].apicid);
@@ -2305,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
unsigned int irr;
struct irq_desc *desc;
struct irq_cfg *cfg;
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
if (irq == -1)
continue;
@@ -2339,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
- __get_cpu_var(vector_irq)[vector] = -1;
+ __this_cpu_write(vector_irq[vector], -1);
unlock:
raw_spin_unlock(&desc->lock);
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2a3f2a7db24..bd16b58b885 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -120,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
- __get_cpu_var(x2apic_extra_bits) =
- pnodeid << uvh_apicid.s.pnode_shift;
+ __this_cpu_write(x2apic_extra_bits,
+ pnodeid << uvh_apicid.s.pnode_shift);
uv_system_type = UV_NON_UNIQUE_APIC;
uv_set_apicid_hibit();
return 1;
@@ -286,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
unsigned int id;
WARN_ON(preemptible() && num_online_cpus() > 1);
- id = x | __get_cpu_var(x2apic_extra_bits);
+ id = x | __this_cpu_read(x2apic_extra_bits);
return id;
}
@@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
static __cpuinit void set_x2apic_extra_bits(int pnode)
{
- __get_cpu_var(x2apic_extra_bits) = (pnode << 6);
+ __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
}
/*
@@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
*/
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
- if (reason != DIE_NMI_IPI)
+ if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK;
if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8fe78..7c7bedb83c5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383);
bool cpu_has_amd_erratum(const int *erratum)
{
- struct cpuinfo_x86 *cpu = &current_cpu_data;
+ struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
int osvw_id = *erratum++;
u32 range;
u32 ms;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 491977baf6c..35c7e65e59b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc)
*rc = -ENODEV;
- if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
return;
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
static void query_values_on_cpu(void *_err)
{
int *err = _err;
- struct powernow_k8_data *data = __get_cpu_var(powernow_data);
+ struct powernow_k8_data *data = __this_cpu_read(powernow_data);
*err = query_current_values_with_pending_wait(data);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9ecf81f9b90..7283e98deaa 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -265,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
line_size = l2.line_size;
lines_per_tag = l2.lines_per_tag;
/* cpu_data has errata corrections for K7 applied */
- size_in_kb = current_cpu_data.x86_cache_size;
+ size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
break;
case 3:
if (!l3.val)
@@ -287,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+ eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
if (assoc == 0xffff)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfed..a7797197956 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
#include <linux/gfp.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+ if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
static struct notifier_block mce_raise_nb = {
.notifier_call = mce_raise_notify,
- .priority = 1000,
+ .priority = NMI_LOCAL_NORMAL_PRIOR,
};
/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a35b72d7c0..d916183b7f9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
- unsigned bank = __get_cpu_var(injectm.bank);
+ unsigned bank = __this_cpu_read(injectm.bank);
if (msr == rip_msr)
return offsetof(struct mce, ip);
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset < 0)
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr)
static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset >= 0)
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data)
WARN_ON(smp_processor_id() != data);
- if (mce_available(&current_cpu_data)) {
+ if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
}
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev)
static int mce_resume(struct sys_device *dev)
{
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_vendor(&current_cpu_data);
+ __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
return 0;
}
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev)
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
@@ -1790,7 +1790,7 @@ static void mce_restart(void)
/* Toggle features for corrected errors */
static void mce_disable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (all)
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all)
static void mce_enable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
cmci_reenable();
cmci_recheck();
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd0936194..8694ef56459 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
unsigned long flags;
int banks;
- if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0a360d14659..9d977a2ea69 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -997,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event)
static void x86_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (cpuc->enabled)
+ if (__this_cpu_read(cpu_hw_events.enabled))
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
}
@@ -1268,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self,
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
case DIE_NMIUNKNOWN:
this_nmi = percpu_read(irq_stat.__nmi_count);
- if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ if (this_nmi != __this_cpu_read(pmu_nmi.marked))
/* let the kernel handle the unknown nmi */
return NOTIFY_DONE;
/*
@@ -1296,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self,
this_nmi = percpu_read(irq_stat.__nmi_count);
if ((handled > 1) ||
/* the next nmi could be a back-to-back nmi */
- ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
- (__get_cpu_var(pmu_nmi).handled > 1))) {
+ ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
+ (__this_cpu_read(pmu_nmi.handled) > 1))) {
/*
* We could have two subsequent back-to-back nmis: The
* first handles more than one counter, the 2nd
@@ -1308,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self,
* handling more than one counter. We will mark the
* next (3rd) and then drop it if unhandled.
*/
- __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
- __get_cpu_var(pmu_nmi).handled = handled;
+ __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
+ __this_cpu_write(pmu_nmi.handled, handled);
}
return NOTIFY_STOP;
@@ -1318,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static struct event_constraint unconstrained;
@@ -1484,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event)
*/
static void x86_pmu_start_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
perf_pmu_disable(pmu);
- cpuc->group_flag |= PERF_EVENT_TXN;
- cpuc->n_txn = 0;
+ __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
+ __this_cpu_write(cpu_hw_events.n_txn, 0);
}
/*
@@ -1498,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu)
*/
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
- cpuc->group_flag &= ~PERF_EVENT_TXN;
+ __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
/*
* Truncate the collected events.
*/
- cpuc->n_added -= cpuc->n_txn;
- cpuc->n_events -= cpuc->n_txn;
+ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
+ __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
perf_pmu_enable(pmu);
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 24e390e40f2..008835c1d79 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
- if (!__get_cpu_var(cpu_hw_events).enabled)
+ if (!__this_cpu_read(cpu_hw_events.enabled))
return;
intel_pmu_enable_bts(hwc->config);
@@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event)
static void intel_pmu_reset(void)
{
- struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
unsigned long flags;
int idx;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b93e69..e56b9bfbabd 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
{
- int overflow = 0;
- u32 low, high;
+ u64 v;
- rdmsr(hwc->config_base + hwc->idx, low, high);
-
- /* we need to check high bit for unflagged overflows */
- if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
- overflow = 1;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
- ((u64)low) & ~P4_CCCR_OVF);
+ /* an official way for overflow indication */
+ rdmsrl(hwc->config_base + hwc->idx, v);
+ if (v & P4_CCCR_OVF) {
+ wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ return 1;
}
- return overflow;
+ /* it might be unflagged overflow */
+ rdmsrl(hwc->event_base + hwc->idx, v);
+ if (!(v & ARCH_P4_CNTRVAL_MASK))
+ return 1;
+
+ return 0;
}
static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
*/
.num_counters = ARCH_P4_MAX_CCCR,
.apic = 1,
- .cntval_bits = 40,
- .cntval_mask = (1ULL << 40) - 1,
- .max_period = (1ULL << 39) - 1,
+ .cntval_bits = ARCH_P4_CNTRVAL_BITS,
+ .cntval_mask = ARCH_P4_CNTRVAL_MASK,
+ .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
/*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8474c998cbd..d6fb146c0d8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
*/
void dump_stack(void)
{
- unsigned long bp = 0;
unsigned long stack;
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e869..d3b895f375d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
ENTRY(save_args)
XCPT_FRAME
cld
- movq_cfi rdi, RDI+16-ARGOFFSET
- movq_cfi rsi, RSI+16-ARGOFFSET
- movq_cfi rdx, RDX+16-ARGOFFSET
- movq_cfi rcx, RCX+16-ARGOFFSET
- movq_cfi rax, RAX+16-ARGOFFSET
- movq_cfi r8, R8+16-ARGOFFSET
- movq_cfi r9, R9+16-ARGOFFSET
- movq_cfi r10, R10+16-ARGOFFSET
- movq_cfi r11, R11+16-ARGOFFSET
-
- leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
+ /*
+ * start from rbp in pt_regs and jump over
+ * return address.
+ */
+ movq_cfi rdi, RDI+8-RBP
+ movq_cfi rsi, RSI+8-RBP
+ movq_cfi rdx, RDX+8-RBP
+ movq_cfi rcx, RCX+8-RBP
+ movq_cfi rax, RAX+8-RBP
+ movq_cfi r8, R8+8-RBP
+ movq_cfi r9, R9+8-RBP
+ movq_cfi r10, R10+8-RBP
+ movq_cfi r11, R11+8-RBP
+
+ leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
movq_cfi rbp, 8 /* push %rbp */
leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- subq $ORIG_RAX-ARGOFFSET+8, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
+ /* reserve pt_regs for scratch regs and rbp */
+ subq $ORIG_RAX-RBP, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
call save_args
PARTIAL_FRAME 0
call \func
@@ -808,9 +813,14 @@ ret_from_intr:
TRACE_IRQS_OFF
decl PER_CPU_VAR(irq_count)
leaveq
+
CFI_RESTORE rbp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
+
+ /* we did not save rbx, restore only from ARGOFFSET */
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 298448656b6..382eb2936d4 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -170,9 +170,9 @@ static void ftrace_mod_code(void)
void ftrace_nmi_enter(void)
{
- __get_cpu_var(save_modifying_code) = modifying_code;
+ __this_cpu_write(save_modifying_code, modifying_code);
- if (!__get_cpu_var(save_modifying_code))
+ if (!__this_cpu_read(save_modifying_code))
return;
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
@@ -186,7 +186,7 @@ void ftrace_nmi_enter(void)
void ftrace_nmi_exit(void)
{
- if (!__get_cpu_var(save_modifying_code))
+ if (!__this_cpu_read(save_modifying_code))
return;
/* Finish all executions before clearing nmi_running */
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 42c59425450..02f07634d26 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
return -EBUSY;
set_debugreg(info->address, i);
- __get_cpu_var(cpu_debugreg[i]) = info->address;
+ __this_cpu_write(cpu_debugreg[i], info->address);
dr7 = &__get_cpu_var(cpu_dr7);
*dr7 |= encode_dr7(i, info->len, info->type);
@@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
void hw_breakpoint_restore(void)
{
- set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
- set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
- set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
- set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+ set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
+ set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
+ set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
+ set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(__get_cpu_var(cpu_dr7), 7);
+ set_debugreg(__this_cpu_read(cpu_dr7), 7);
}
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 83ec0175f98..3a43caa3beb 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -234,7 +234,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
exit_idle();
irq_enter();
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
if (!handle_irq(irq, regs)) {
ack_APIC_irq();
@@ -350,12 +350,12 @@ void fixup_irqs(void)
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irr;
- if (__get_cpu_var(vector_irq)[vector] < 0)
+ if (__this_cpu_read(vector_irq[vector]) < 0)
continue;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
if (irr & (1 << (vector % 32))) {
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
data = irq_get_irq_data(irq);
raw_spin_lock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 96656f20775..48ff6dcffa0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
u32 *isp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info();
- irqctx = __get_cpu_var(hardirq_ctx);
+ irqctx = __this_cpu_read(hardirq_ctx);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -166,7 +166,7 @@ asmlinkage void do_softirq(void)
if (local_softirq_pending()) {
curctx = current_thread_info();
- irqctx = __get_cpu_var(softirq_ctx);
+ irqctx = __this_cpu_read(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b654dec..a4130005028 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMI_IPI:
- /* Just ignore, we will handle the roundup on DIE_NMI. */
- return NOTIFY_DONE;
-
case DIE_NMIUNKNOWN:
if (was_in_debug_nmi[raw_smp_processor_id()]) {
was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
/*
* Lowest-prio notifier priority, we want to be notified last:
*/
- .priority = -INT_MAX,
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
/**
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5940282bd2f..d91c477b3f6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_flags = kcb->kprobe_old_flags
= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
if (is_IF_modifier(p->ainsn.insn))
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_enable_no_resched();
return 1;
} else if (kprobe_running()) {
- p = __get_cpu_var(current_kprobe);
+ p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
return 1;
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
- __get_cpu_var(current_kprobe) = &ri->rp->kp;
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __get_cpu_var(current_kprobe) = NULL;
+ __this_cpu_write(current_kprobe, NULL);
}
recycle_rp_inst(ri, &empty_rp);
@@ -1202,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
regs->orig_ax = ~0UL;
- __get_cpu_var(current_kprobe) = &op->kp;
+ __this_cpu_write(current_kprobe, &op->kp);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
- __get_cpu_var(current_kprobe) = NULL;
+ __this_cpu_write(current_kprobe, NULL);
}
preempt_enable_no_resched();
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9af64d9c4b6..01b0f6d0645 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -118,21 +118,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
- if (!(m->flags & MPC_APIC_USABLE))
- return;
-
- printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
- m->apicid, m->apicver, m->apicaddr);
-
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
-}
-
-static void print_MP_intsrc_info(struct mpc_intsrc *m)
-{
- apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
- m->srcbusirq, m->dstapic, m->dstirq);
+ if (m->flags & MPC_APIC_USABLE)
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
}
static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
@@ -144,73 +131,11 @@ static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
}
-static void __init assign_to_mp_irq(struct mpc_intsrc *m,
- struct mpc_intsrc *mp_irq)
-{
- mp_irq->dstapic = m->dstapic;
- mp_irq->type = m->type;
- mp_irq->irqtype = m->irqtype;
- mp_irq->irqflag = m->irqflag;
- mp_irq->srcbus = m->srcbus;
- mp_irq->srcbusirq = m->srcbusirq;
- mp_irq->dstirq = m->dstirq;
-}
-
-static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- m->dstapic = mp_irq->dstapic;
- m->type = mp_irq->type;
- m->irqtype = mp_irq->irqtype;
- m->irqflag = mp_irq->irqflag;
- m->srcbus = mp_irq->srcbus;
- m->srcbusirq = mp_irq->srcbusirq;
- m->dstirq = mp_irq->dstirq;
-}
-
-static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- if (mp_irq->dstapic != m->dstapic)
- return 1;
- if (mp_irq->type != m->type)
- return 2;
- if (mp_irq->irqtype != m->irqtype)
- return 3;
- if (mp_irq->irqflag != m->irqflag)
- return 4;
- if (mp_irq->srcbus != m->srcbus)
- return 5;
- if (mp_irq->srcbusirq != m->srcbusirq)
- return 6;
- if (mp_irq->dstirq != m->dstirq)
- return 7;
-
- return 0;
-}
-
-static void __init MP_intsrc_info(struct mpc_intsrc *m)
-{
- int i;
-
- print_MP_intsrc_info(m);
-
- for (i = 0; i < mp_irq_entries; i++) {
- if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
- return;
- }
-
- assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
#else /* CONFIG_X86_IO_APIC */
static inline void __init MP_bus_info(struct mpc_bus *m) {}
static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
-static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
#endif /* CONFIG_X86_IO_APIC */
-
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
{
apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
@@ -222,7 +147,6 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
/*
* Read/parse the MPC
*/
-
static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
{
@@ -275,18 +199,6 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
-static void __init smp_register_lapic_address(unsigned long address)
-{
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, address);
- if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
-}
-
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -301,17 +213,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
#ifdef CONFIG_X86_32
generic_mps_oem_check(mpc, oem, str);
#endif
- /* save the local APIC address, it might be non-default */
+ /* Initialize the lapic mapping */
if (!acpi_lapic)
- mp_lapic_addr = mpc->lapic;
+ register_lapic_address(mpc->lapic);
if (early)
return 1;
- /* Initialize the lapic mapping */
- if (!acpi_lapic)
- smp_register_lapic_address(mpc->lapic);
-
if (mpc->oemptr)
x86_init.mpparse.smp_read_mpc_oem(mpc);
@@ -337,7 +245,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
break;
case MP_INTSRC:
- MP_intsrc_info((struct mpc_intsrc *)mpt);
+ mp_save_irq((struct mpc_intsrc *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
break;
case MP_LINTSRC:
@@ -429,13 +337,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
intsrc.srcbusirq = i;
intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
+ mp_save_irq(&intsrc);
}
intsrc.irqtype = mp_ExtINT;
intsrc.srcbusirq = 0;
intsrc.dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
+ mp_save_irq(&intsrc);
}
@@ -784,11 +692,11 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
int i;
apic_printk(APIC_VERBOSE, "OLD ");
- print_MP_intsrc_info(m);
+ print_mp_irq_info(m);
i = get_MP_intsrc_index(m);
if (i > 0) {
- assign_to_mpc_intsrc(&mp_irqs[i], m);
+ memcpy(m, &mp_irqs[i], sizeof(*m));
apic_printk(APIC_VERBOSE, "NEW ");
print_mp_irq_info(&mp_irqs[i]);
return;
@@ -875,14 +783,14 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
if (nr_m_spare > 0) {
apic_printk(APIC_VERBOSE, "*NEW* found\n");
nr_m_spare--;
- assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+ memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
m_spare[nr_m_spare] = NULL;
} else {
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
count += sizeof(struct mpc_intsrc);
if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
goto out;
- assign_to_mpc_intsrc(&mp_irqs[i], m);
+ memcpy(m, &mp_irqs[i], sizeof(*m));
mpc->length = count;
mpt += sizeof(struct mpc_intsrc);
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c852041bfc3..09c08a1c706 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -446,7 +446,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
trace_cpu_idle((ax>>4)+1, smp_processor_id());
if (!need_resched()) {
- if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -462,7 +462,7 @@ static void mwait_idle(void)
if (!need_resched()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle(1, smp_processor_id());
- if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d481..fc7aae1e2bc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
#include <asm/pci_x86.h>
#include <asm/virtext.h>
#include <asm/cpu.h>
+#include <asm/nmi.h>
#ifdef CONFIG_X86_32
# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
{
int cpu;
- if (val != DIE_NMI_IPI)
+ if (val != DIE_NMI)
return NOTIFY_OK;
cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
static struct notifier_block crash_nmi_nb = {
.notifier_call = crash_nmi_callback,
+ /* we want to be the first one called */
+ .priority = NMI_LOCAL_HIGH_PRIOR+1,
};
/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 1cfbbfc3ae2..6f39cab052d 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -76,7 +76,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
} else {
- printk(KERN_WARNING
+ printk_once(KERN_NOTICE
"set_rtc_mmss: can't update from %d to %d\n",
cmos_minutes, real_minutes);
retval = -1;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ee886fe10ef..763df77343d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
*/
static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
-void cpu_hotplug_driver_lock()
+void cpu_hotplug_driver_lock(void)
{
mutex_lock(&x86_cpu_hotplug_driver_mutex);
}
-void cpu_hotplug_driver_unlock()
+void cpu_hotplug_driver_unlock(void)
{
mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}
@@ -427,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, c->llc_shared_map);
- if (current_cpu_data.x86_max_cores == 1) {
+ if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
c->booted_cores = 1;
return;
@@ -1089,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- current_cpu_data = boot_cpu_data;
+ memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
/*
@@ -1383,7 +1383,7 @@ void play_dead_common(void)
mb();
/* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
+ __this_cpu_write(cpu_state, CPU_DEAD);
/*
* With physical CPU hotplug, we should halt the cpu
@@ -1403,11 +1403,11 @@ static inline void mwait_play_dead(void)
int i;
void *mwait_ptr;
- if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
return;
- if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
return;
- if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;
eax = CPUID_MWAIT_LEAF;
@@ -1458,7 +1458,7 @@ static inline void mwait_play_dead(void)
static inline void hlt_play_dead(void)
{
- if (current_cpu_data.x86 >= 4)
+ if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
while (1) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c76aaca5694..b9b67166f9d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors);
static int ignore_nmis;
int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
static inline void conditional_sti(struct pt_regs *regs)
{
@@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str)
__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
-
- printk(KERN_EMERG
- "You have some hardware problem, likely on the PCI bus.\n");
+ pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+ /*
+ * On some machines, PCI SERR line is used to report memory
+ * errors. EDAC makes use of it.
+ */
#if defined(CONFIG_EDAC)
if (edac_handler_set()) {
edac_atomic_assert_error();
@@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
+ /* Clear and disable the PCI SERR error line. */
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
{
unsigned long i;
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ pr_emerg(
+ "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
show_registers(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
i = 20000;
while (--i) {
@@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
udelay(100);
}
- reason &= ~8;
- outb(reason, 0x61);
+ reason &= ~NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
return;
}
#endif
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+ pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
- int cpu;
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
+ /*
+ * CPU-specific NMI must be processed before non-CPU-specific
+ * NMI, otherwise we may lose it, because the CPU-specific
+ * NMI can not be detected/processed on other CPUs.
+ */
+ if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+ raw_spin_lock(&nmi_reason_lock);
+ reason = get_nmi_reason();
-#ifdef CONFIG_X86_LOCAL_APIC
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ if (reason & NMI_REASON_MASK) {
+ if (reason & NMI_REASON_SERR)
+ pci_serr_error(reason, regs);
+ else if (reason & NMI_REASON_IOCHK)
+ io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+ /*
+ * Reassert NMI in case it became active
+ * meanwhile as it's edge-triggered:
+ */
+ reassert_nmi();
#endif
- unknown_nmi_error(reason, regs);
-
+ raw_spin_unlock(&nmi_reason_lock);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
+ raw_spin_unlock(&nmi_reason_lock);
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered:
- */
- reassert_nmi();
-#endif
+ unknown_nmi_error(reason, regs);
}
dotraplinkage notrace __kprobes void
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 356a0d455cf..823f79a17ad 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -659,7 +659,7 @@ void restore_sched_clock_state(void)
local_irq_save(flags);
- __get_cpu_var(cyc2ns_offset) = 0;
+ __this_cpu_write(cyc2ns_offset, 0);
offset = cyc2ns_suspend - sched_clock();
for_each_possible_cpu(cpu)
@@ -965,7 +965,7 @@ out:
static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0)
+ if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
return 0;
if (tsc_clocksource_reliable)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b989e1f1e5d..46a368cb651 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -976,7 +976,7 @@ static inline u64 nsec_to_cycles(u64 nsec)
if (kvm_tsc_changes_freq())
printk_once(KERN_WARNING
"kvm: unreliable cycle conversion on adjustable rate TSC\n");
- ret = nsec * __get_cpu_var(cpu_tsc_khz);
+ ret = nsec * __this_cpu_read(cpu_tsc_khz);
do_div(ret, USEC_PER_SEC);
return ret;
}
@@ -1061,7 +1061,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_save(flags);
kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
kernel_ns = get_kernel_ns();
- this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+ this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
@@ -4427,7 +4427,7 @@ EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
static void tsc_bad(void *info)
{
- __get_cpu_var(cpu_tsc_khz) = 0;
+ __this_cpu_write(cpu_tsc_khz, 0);
}
static void tsc_khz_changed(void *data)
@@ -4441,7 +4441,7 @@ static void tsc_khz_changed(void *data)
khz = cpufreq_quick_get(raw_smp_processor_id());
if (!khz)
khz = tsc_khz;
- __get_cpu_var(cpu_tsc_khz) = khz;
+ __this_cpu_write(cpu_tsc_khz, khz);
}
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index ff485d36118..fc45ba887d0 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -121,7 +121,7 @@ inline void __const_udelay(unsigned long xloops)
asm("mull %%edx"
:"=d" (xloops), "=&a" (d0)
:"1" (xloops), "0"
- (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
+ (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4)));
__delay(++xloops);
}
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 51fae9cfdec..f21962c435e 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -27,6 +27,7 @@
#include <asm/amd_nb.h>
static struct bootnode __initdata nodes[8];
+static unsigned char __initdata nodeids[8];
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void)
@@ -66,20 +67,6 @@ static __init void early_get_boot_cpu_id(void)
if (smp_found_config)
early_get_smp_config();
#endif
- early_init_lapic_mapping();
-}
-
-int __init amd_get_nodes(struct bootnode *physnodes)
-{
- int i;
- int ret = 0;
-
- for_each_node_mask(i, nodes_parsed) {
- physnodes[ret].start = nodes[i].start;
- physnodes[ret].end = nodes[i].end;
- ret++;
- }
- return ret;
}
int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
@@ -114,7 +101,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
- nodeid = limit & 7;
+ nodeids[i] = nodeid = limit & 7;
if ((base & 3) == 0) {
if (i < numnodes)
pr_info("Skipping disabled node %d\n", i);
@@ -194,6 +181,76 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
return 0;
}
+#ifdef CONFIG_NUMA_EMU
+static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
+void __init amd_get_nodes(struct bootnode *physnodes)
+{
+ int i;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[i].start = nodes[i].start;
+ physnodes[i].end = nodes[i].end;
+ }
+}
+
+static int __init find_node_by_addr(unsigned long addr)
+{
+ int ret = NUMA_NO_NODE;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ if (addr >= nodes[i].start && addr < nodes[i].end) {
+ ret = i;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
+ * setup to represent the physical topology but reflect the emulated
+ * environment. For each emulated node, the real node which it appears on is
+ * found and a fake pxm to nid mapping is created which mirrors the actual
+ * locality. node_distance() then represents the correct distances between
+ * emulated nodes by using the fake acpi mappings to pxms.
+ */
+void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
+{
+ unsigned int bits;
+ unsigned int cores;
+ unsigned int apicid_base = 0;
+ int i;
+
+ bits = boot_cpu_data.x86_coreid_bits;
+ cores = 1 << bits;
+ early_get_boot_cpu_id();
+ if (boot_cpu_physical_apicid > 0)
+ apicid_base = boot_cpu_physical_apicid;
+
+ for (i = 0; i < nr_nodes; i++) {
+ int index;
+ int nid;
+ int j;
+
+ nid = find_node_by_addr(nodes[i].start);
+ if (nid == NUMA_NO_NODE)
+ continue;
+
+ index = nodeids[nid] << bits;
+ if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
+ for (j = apicid_base; j < cores + apicid_base; j++)
+ fake_apicid_to_node[index + j] = i;
+#ifdef CONFIG_ACPI_NUMA
+ __acpi_map_pxm_to_node(nid, i);
+#endif
+ }
+ memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+}
+#endif /* CONFIG_NUMA_EMU */
+
int __init amd_scan_nodes(void)
{
unsigned int bits;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7762a517d69..1e72102e80c 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -260,30 +260,30 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode physnodes[MAX_NUMNODES] __initdata;
+static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
static char *cmdline __initdata;
static int __init setup_physnodes(unsigned long start, unsigned long end,
int acpi, int amd)
{
- int nr_nodes = 0;
int ret = 0;
int i;
+ memset(physnodes, 0, sizeof(physnodes));
#ifdef CONFIG_ACPI_NUMA
if (acpi)
- nr_nodes = acpi_get_nodes(physnodes);
+ acpi_get_nodes(physnodes, start, end);
#endif
#ifdef CONFIG_AMD_NUMA
if (amd)
- nr_nodes = amd_get_nodes(physnodes);
+ amd_get_nodes(physnodes);
#endif
/*
* Basic sanity checking on the physical node map: there may be errors
* if the SRAT or AMD code incorrectly reported the topology or the mem=
* kernel parameter is used.
*/
- for (i = 0; i < nr_nodes; i++) {
+ for (i = 0; i < MAX_NUMNODES; i++) {
if (physnodes[i].start == physnodes[i].end)
continue;
if (physnodes[i].start > end) {
@@ -298,17 +298,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
physnodes[i].start = start;
if (physnodes[i].end > end)
physnodes[i].end = end;
- }
-
- /*
- * Remove all nodes that have no memory or were truncated because of the
- * limited address range.
- */
- for (i = 0; i < nr_nodes; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- physnodes[ret].start = physnodes[i].start;
- physnodes[ret].end = physnodes[i].end;
ret++;
}
@@ -324,6 +313,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
return ret;
}
+static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
+{
+ int i;
+
+ BUG_ON(acpi && amd);
+#ifdef CONFIG_ACPI_NUMA
+ if (acpi)
+ acpi_fake_nodes(nodes, nr_nodes);
+#endif
+#ifdef CONFIG_AMD_NUMA
+ if (amd)
+ amd_fake_nodes(nodes, nr_nodes);
+#endif
+ if (!acpi && !amd)
+ for (i = 0; i < nr_cpu_ids; i++)
+ numa_set_node(i, 0);
+}
+
/*
* Setups up nid to range from addr to addr + size. If the end
* boundary is greater than max_addr, then max_addr is used instead.
@@ -352,8 +359,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
* to max_addr. The return value is the number of nodes allocated.
*/
-static int __init split_nodes_interleave(u64 addr, u64 max_addr,
- int nr_phys_nodes, int nr_nodes)
+static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
{
nodemask_t physnode_mask = NODE_MASK_NONE;
u64 size;
@@ -384,7 +390,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
return -1;
}
- for (i = 0; i < nr_phys_nodes; i++)
+ for (i = 0; i < MAX_NUMNODES; i++)
if (physnodes[i].start != physnodes[i].end)
node_set(i, physnode_mask);
@@ -553,11 +559,9 @@ static int __init numa_emulation(unsigned long start_pfn,
{
u64 addr = start_pfn << PAGE_SHIFT;
u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_phys_nodes;
int num_nodes;
int i;
- num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
/*
* If the numa=fake command-line contains a 'M' or 'G', it represents
* the fixed node size. Otherwise, if it is just a single number N,
@@ -572,7 +576,7 @@ static int __init numa_emulation(unsigned long start_pfn,
unsigned long n;
n = simple_strtoul(cmdline, NULL, 0);
- num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
+ num_nodes = split_nodes_interleave(addr, max_addr, n);
}
if (num_nodes < 0)
@@ -595,7 +599,8 @@ static int __init numa_emulation(unsigned long start_pfn,
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
- acpi_fake_nodes(nodes, num_nodes);
+ setup_physnodes(addr, max_addr, acpi, amd);
+ fake_physnodes(acpi, amd, num_nodes);
numa_init_array();
return 0;
}
@@ -610,8 +615,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU
+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
+ acpi, amd);
if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
return;
+ setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
+ acpi, amd);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
@@ -767,6 +776,7 @@ void __cpuinit numa_clear_node(int cpu)
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+#ifndef CONFIG_NUMA_EMU
void __cpuinit numa_add_cpu(int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -776,34 +786,115 @@ void __cpuinit numa_remove_cpu(int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
+#else
+void __cpuinit numa_add_cpu(int cpu)
+{
+ unsigned long addr;
+ u16 apicid;
+ int physnid;
+ int nid = NUMA_NO_NODE;
+
+ apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+ if (apicid != BAD_APICID)
+ nid = apicid_to_node[apicid];
+ if (nid == NUMA_NO_NODE)
+ nid = early_cpu_to_node(cpu);
+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
+
+ /*
+ * Use the starting address of the emulated node to find which physical
+ * node it is allocated on.
+ */
+ addr = node_start_pfn(nid) << PAGE_SHIFT;
+ for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
+ if (addr >= physnodes[physnid].start &&
+ addr < physnodes[physnid].end)
+ break;
+
+ /*
+ * Map the cpu to each emulated node that is allocated on the physical
+ * node of the cpu's apic id.
+ */
+ for_each_online_node(nid) {
+ addr = node_start_pfn(nid) << PAGE_SHIFT;
+ if (addr >= physnodes[physnid].start &&
+ addr < physnodes[physnid].end)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+ }
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ int i;
+
+ for_each_online_node(i)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
+}
+#endif /* !CONFIG_NUMA_EMU */
#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
/*
* --------- debug versions of the numa functions ---------
*/
+#ifndef CONFIG_NUMA_EMU
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
- int node = early_cpu_to_node(cpu);
struct cpumask *mask;
- char buf[64];
- mask = node_to_cpumask_map[node];
- if (mask == NULL) {
- printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
return;
- }
if (enable)
cpumask_set_cpu(cpu, mask);
else
cpumask_clear_cpu(cpu, mask);
+}
+#else
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ int i;
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+ for_each_online_node(i) {
+ unsigned long addr;
+
+ addr = node_start_pfn(i) << PAGE_SHIFT;
+ if (addr < physnodes[node].start ||
+ addr >= physnodes[node].end)
+ continue;
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+ }
}
+#endif /* CONFIG_NUMA_EMU */
void __cpuinit numa_add_cpu(int cpu)
{
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 171a0aacb99..603d285d1da 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -349,18 +349,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
void __init acpi_numa_arch_fixup(void) {}
-int __init acpi_get_nodes(struct bootnode *physnodes)
+#ifdef CONFIG_NUMA_EMU
+void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
+ unsigned long end)
{
int i;
- int ret = 0;
for_each_node_mask(i, nodes_parsed) {
- physnodes[ret].start = nodes[i].start;
- physnodes[ret].end = nodes[i].end;
- ret++;
+ cutoff_node(i, start, end);
+ physnodes[i].start = nodes[i].start;
+ physnodes[i].end = nodes[i].end;
}
- return ret;
}
+#endif /* CONFIG_NUMA_EMU */
/* Use the information discovered above to actually set up the nodes. */
int __init acpi_scan_nodes(unsigned long start, unsigned long end)
@@ -505,8 +506,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
{
int i, j;
- printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
- "topology.\n");
for (i = 0; i < num_nodes; i++) {
int nid, pxm;
@@ -526,6 +525,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
fake_apicid_to_node[j] == NUMA_NO_NODE)
fake_apicid_to_node[j] = i;
}
+
+ /*
+ * If there are apicid-to-node mappings for physical nodes that do not
+ * have a corresponding emulated node, it should default to a guaranteed
+ * value.
+ */
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ if (apicid_to_node[i] != NUMA_NO_NODE &&
+ fake_apicid_to_node[i] == NUMA_NO_NODE)
+ fake_apicid_to_node[i] = 0;
+
for (i = 0; i < num_nodes; i++)
__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 358c8b9c96a..e2b7b0c06cd 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self,
switch (val) {
case DIE_NMI:
- case DIE_NMI_IPI:
if (ctr_running)
model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
else if (!nmi_enabled)
@@ -143,7 +142,7 @@ static inline int has_mux(void)
inline int op_x86_phys_to_virt(int phys)
{
- return __get_cpu_var(switch_index) + phys;
+ return __this_cpu_read(switch_index) + phys;
}
inline int op_x86_virt_to_phys(int virt)
@@ -361,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
static struct notifier_block profile_exceptions_nb = {
.notifier_call = profile_exceptions_notify,
.next = NULL,
- .priority = 2
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index 0636dd93cef..720bf5a53c5 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
static struct notifier_block profile_timer_exceptions_nb = {
.notifier_call = profile_timer_exceptions_notify,
.next = NULL,
- .priority = 0
+ .priority = NMI_LOW_PRIOR,
};
static int timer_start(void)
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index d769cda5408..94b745045e4 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -95,8 +95,8 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
* counter width:
*/
if (!(eax.split.version_id == 0 &&
- current_cpu_data.x86 == 6 &&
- current_cpu_data.x86_model == 15)) {
+ __this_cpu_read(cpu_info.x86) == 6 &&
+ __this_cpu_read(cpu_info.x86_model) == 15)) {
if (counter_width < eax.split.bit_width)
counter_width = eax.split.bit_width;
@@ -235,8 +235,8 @@ static void arch_perfmon_setup_counters(void)
eax.full = cpuid_eax(0xa);
/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
- if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
- current_cpu_data.x86_model == 15) {
+ if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 &&
+ __this_cpu_read(cpu_info.x86_model) == 15) {
eax.split.version_id = 2;
eax.split.num_counters = 2;
eax.split.bit_width = 40;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index fc1e8fe07e5..e27dffbbb1a 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -4,6 +4,7 @@
#include <linux/cpu.h>
#include <linux/range.h>
+#include <asm/amd_nb.h>
#include <asm/pci_x86.h>
#include <asm/pci-direct.h>
@@ -378,6 +379,34 @@ static struct notifier_block __cpuinitdata amd_cpu_notifier = {
.notifier_call = amd_cpu_notify,
};
+static void __init pci_enable_pci_io_ecs(void)
+{
+#ifdef CONFIG_AMD_NB
+ unsigned int i, n;
+
+ for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) {
+ u8 bus = amd_nb_bus_dev_ranges[i].bus;
+ u8 slot = amd_nb_bus_dev_ranges[i].dev_base;
+ u8 limit = amd_nb_bus_dev_ranges[i].dev_limit;
+
+ for (; slot < limit; ++slot) {
+ u32 val = read_pci_config(bus, slot, 3, 0);
+
+ if (!early_is_amd_nb(val))
+ continue;
+
+ val = read_pci_config(bus, slot, 3, 0x8c);
+ if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) {
+ val |= ENABLE_CF8_EXT_CFG >> 32;
+ write_pci_config(bus, slot, 3, 0x8c, val);
+ }
+ ++n;
+ }
+ }
+ pr_info("Extended Config Space enabled on %u nodes\n", n);
+#endif
+}
+
static int __init pci_io_ecs_init(void)
{
int cpu;
@@ -386,6 +415,10 @@ static int __init pci_io_ecs_init(void)
if (boot_cpu_data.x86 < 0x10)
return 0;
+ /* Try the PCI method first. */
+ if (early_pci_allowed())
+ pci_enable_pci_io_ecs();
+
register_cpu_notifier(&amd_cpu_notifier);
for_each_online_cpu(cpu)
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index fee0b4914e0..ea6529e93c6 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -71,32 +71,6 @@ struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
EXPORT_SYMBOL_GPL(sfi_mrtc_array);
int sfi_mrtc_num;
-static inline void assign_to_mp_irq(struct mpc_intsrc *m,
- struct mpc_intsrc *mp_irq)
-{
- memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static void save_mp_irq(struct mpc_intsrc *m)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- if (!mp_irq_cmp(&mp_irqs[i], m))
- return;
- }
-
- assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
/* parse all the mtimer info to a static mtimer array */
static int __init sfi_parse_mtmr(struct sfi_table_header *table)
{
@@ -130,7 +104,7 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
}
return 0;
@@ -200,7 +174,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
mp_irq.srcbusirq = pentry->irq; /* IRQ */
mp_irq.dstapic = MP_APIC_ALL;
mp_irq.dstirq = pentry->irq;
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
}
return 0;
}
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index ca54875ac79..7785b72ecc3 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -34,17 +34,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-static void __init mp_sfi_register_lapic_address(unsigned long address)
-{
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
- if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = read_apic_id();
-
- pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
-}
-
/* All CPUs enumerated by SFI must be present and enabled */
static void __cpuinit mp_sfi_register_lapic(u8 id)
{
@@ -110,7 +99,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
int __init sfi_platform_init(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
- mp_sfi_register_lapic_address(sfi_lapic_addr);
+ register_lapic_address(sfi_lapic_addr);
sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
#endif
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 44dcad43989..7e8d3bc80af 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -574,8 +574,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
preempt_disable();
- start = __get_cpu_var(idt_desc).address;
- end = start + __get_cpu_var(idt_desc).size + 1;
+ start = __this_cpu_read(idt_desc.address);
+ end = start + __this_cpu_read(idt_desc.size) + 1;
xen_mc_flush();
@@ -1174,6 +1174,15 @@ asmlinkage void __init xen_start_kernel(void)
xen_smp_init();
+#ifdef CONFIG_ACPI_NUMA
+ /*
+ * The pages we from Xen are not related to machine pages, so
+ * any NUMA information the kernel tries to get from ACPI will
+ * be meaningless. Prevent it from trying.
+ */
+ acpi_numa = -1;
+#endif
+
pgd = (pgd_t *)xen_start_info->pt_base;
if (!xen_initial_domain())
@@ -1256,25 +1265,6 @@ asmlinkage void __init xen_start_kernel(void)
#endif
}
-static uint32_t xen_cpuid_base(void)
-{
- uint32_t base, eax, ebx, ecx, edx;
- char signature[13];
-
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
- cpuid(base, &eax, &ebx, &ecx, &edx);
- *(uint32_t *)(signature + 0) = ebx;
- *(uint32_t *)(signature + 4) = ecx;
- *(uint32_t *)(signature + 8) = edx;
- signature[12] = 0;
-
- if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
- return base;
- }
-
- return 0;
-}
-
static int init_hvm_pv_info(int *major, int *minor)
{
uint32_t eax, ebx, ecx, edx, pages, msr, base;
@@ -1384,6 +1374,18 @@ static bool __init xen_hvm_platform(void)
return true;
}
+bool xen_hvm_need_lapic(void)
+{
+ if (xen_pv_domain())
+ return false;
+ if (!xen_hvm_domain())
+ return false;
+ if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
+
const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
.name = "Xen HVM",
.detect = xen_hvm_platform,
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 9e565da5d1f..4ec8035e321 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -22,7 +22,7 @@ static inline void xen_mc_batch(void)
unsigned long flags;
/* need to disable interrupts until this entry is complete */
local_irq_save(flags);
- __get_cpu_var(xen_mc_irq_flags) = flags;
+ __this_cpu_write(xen_mc_irq_flags, flags);
}
static inline struct multicall_space xen_mc_entry(size_t args)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23e061b9327..cc9b1e182fc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -159,8 +159,8 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
{
struct xen_spinlock *prev;
- prev = __get_cpu_var(lock_spinners);
- __get_cpu_var(lock_spinners) = xl;
+ prev = __this_cpu_read(lock_spinners);
+ __this_cpu_write(lock_spinners, xl);
wmb(); /* set lock of interest before count */
@@ -179,14 +179,14 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
asm(LOCK_PREFIX " decw %0"
: "+m" (xl->spinners) : : "memory");
wmb(); /* decrement count before restoring lock */
- __get_cpu_var(lock_spinners) = prev;
+ __this_cpu_write(lock_spinners, prev);
}
static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
struct xen_spinlock *prev;
- int irq = __get_cpu_var(lock_kicker_irq);
+ int irq = __this_cpu_read(lock_kicker_irq);
int ret;
u64 start;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5da5e53fb94..067759e3d6a 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -135,24 +135,24 @@ static void do_stolen_accounting(void)
/* Add the appropriate number of ticks of stolen time,
including any left-overs from last time. */
- stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
+ stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
if (stolen < 0)
stolen = 0;
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
- __get_cpu_var(xen_residual_stolen) = stolen;
+ __this_cpu_write(xen_residual_stolen, stolen);
account_steal_ticks(ticks);
/* Add the appropriate number of ticks of blocked time,
including any left-overs from last time. */
- blocked += __get_cpu_var(xen_residual_blocked);
+ blocked += __this_cpu_read(xen_residual_blocked);
if (blocked < 0)
blocked = 0;
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
- __get_cpu_var(xen_residual_blocked) = blocked;
+ __this_cpu_write(xen_residual_blocked, blocked);
account_idle_ticks(ticks);
}