summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/bioscall.S2
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/fpu.c4
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/atomic_32.h3
-rw-r--r--arch/x86/include/asm/desc.h26
-rw-r--r--arch/x86/include/asm/dma-mapping.h173
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/mce.h63
-rw-r--r--arch/x86/include/asm/msr.h7
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/pci.h1
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/perf_counter.h5
-rw-r--r--arch/x86/include/asm/pgtable_32.h8
-rw-r--r--arch/x86/include/asm/pgtable_64.h5
-rw-r--r--arch/x86/include/asm/therm_throt.h9
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile1
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c26
-rw-r--r--arch/x86/kernel/apic/io_apic.c9
-rw-r--r--arch/x86/kernel/apic/probe_32.c11
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile9
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c233
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h38
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c)0
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c250
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c248
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c106
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c138
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c12
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/efi.c31
-rw-r--r--arch/x86/kernel/entry_32.S66
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-swiotlb.c3
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/gup.c67
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/pci/acpi.c35
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/i386.c17
-rw-r--r--arch/x86/vdso/Makefile1
67 files changed, 906 insertions, 881 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cf42fc30541..d1430ef6b4f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,11 +28,13 @@ config X86
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
+ select HAVE_DMA_ATTRS
select HAVE_KRETPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_FTRACE_SYSCALLS
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 8d16ada2504..ec749c2bfdd 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -70,6 +70,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
$(call cc-option, -mpreferred-stack-boundary=2)
KBUILD_CFLAGS += $(call cc-option, -m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 507793739ea..1dfbf64e52a 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -13,7 +13,7 @@
* touching registers they shouldn't be.
*/
- .code16
+ .code16gcc
.text
.globl intcall
.type intcall, @function
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 49c8a4c37d7..e2ff504b4dd 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -15,6 +15,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index caba9960170..eb0566e8331 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc)
*/
ENTRY(aesni_cbc_dec)
cmp $16, LEN
- jb .Lcbc_dec_ret
+ jb .Lcbc_dec_just_ret
mov 480(KEYP), KLEN
add $240, KEYP
movups (IVP), IV
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec)
add $16, OUTP
cmp $16, LEN
jge .Lcbc_dec_loop1
- movups IV, (IVP)
.Lcbc_dec_ret:
+ movups IV, (IVP)
+.Lcbc_dec_just_ret:
ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4e663398f77..c580c5ec1ca 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -198,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
@@ -221,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
@@ -266,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
@@ -289,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 5f9781a3815..daef6cd2b45 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -48,7 +48,7 @@ static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
- .flags = desc_in->flags,
+ .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
@@ -67,7 +67,7 @@ static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
struct blkcipher_desc desc = {
.tfm = child,
.info = desc_in->info,
- .flags = desc_in->flags,
+ .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
};
kernel_fpu_begin();
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 262e0282004..bdf96f119f0 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_flush_all_domains(void);
extern void amd_iommu_flush_all_devices(void);
+extern void amd_iommu_shutdown(void);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
+static inline void amd_iommu_shutdown(void) { }
#endif
#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 8cb9c814e12..2503d4e64c2 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -257,7 +257,7 @@ typedef struct {
/**
* atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
+ * @ptr: pointer of type atomic64_t
*
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
* atomic64_xchg - xchg atomic64 variable
* @ptr: pointer to type atomic64_t
* @new_val: value to assign
- * @old_val: old value that was there
*
* Atomically xchgs the value of @ptr to @new_val and returns
* the old value.
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c45f415ce31..c993e9e0fed 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_DESC_H
#define _ASM_X86_DESC_H
-#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
-#else
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- * idx - descriptor index
- * gdt - GDT pointer
- * base - 32bit register to which the base will be written
- * lo_w - lo word of the "base" register
- * lo_b - lo byte of the "base" register
- * hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
- movb idx * 8 + 4(gdt), lo_b; \
- movb idx * 8 + 7(gdt), hi_b; \
- shll $16, base; \
- movw idx * 8 + 2(gdt), lo_w;
-
-
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index b93405b228b..1c3f9435f1c 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -33,6 +33,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
#endif
}
+#include <asm-generic/dma-mapping-common.h>
+
/* Make sure we keep the same behaviour */
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
@@ -53,177 +55,6 @@ extern int dma_set_mask(struct device *dev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag);
-static inline dma_addr_t
-dma_map_single(struct device *hwdev, void *ptr, size_t size,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
- dma_addr_t addr;
-
- BUG_ON(!valid_dma_direction(dir));
- kmemcheck_mark_initialized(ptr, size);
- addr = ops->map_page(hwdev, virt_to_page(ptr),
- (unsigned long)ptr & ~PAGE_MASK, size,
- dir, NULL);
- debug_dma_map_page(hwdev, virt_to_page(ptr),
- (unsigned long)ptr & ~PAGE_MASK, size,
- dir, addr, true);
- return addr;
-}
-
-static inline void
-dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->unmap_page)
- ops->unmap_page(dev, addr, size, dir, NULL);
- debug_dma_unmap_page(dev, addr, size, dir, true);
-}
-
-static inline int
-dma_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
- int ents;
- struct scatterlist *s;
- int i;
-
- BUG_ON(!valid_dma_direction(dir));
- for_each_sg(sg, s, nents, i)
- kmemcheck_mark_initialized(sg_virt(s), s->length);
- ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
- debug_dma_map_sg(hwdev, sg, nents, ents, dir);
-
- return ents;
-}
-
-static inline void
-dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- debug_dma_unmap_sg(hwdev, sg, nents, dir);
- if (ops->unmap_sg)
- ops->unmap_sg(hwdev, sg, nents, dir, NULL);
-}
-
-static inline void
-dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_cpu)
- ops->sync_single_for_cpu(hwdev, dma_handle, size, dir);
- debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir);
- flush_write_buffers();
-}
-
-static inline void
-dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_for_device)
- ops->sync_single_for_device(hwdev, dma_handle, size, dir);
- debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir);
- flush_write_buffers();
-}
-
-static inline void
-dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_range_for_cpu)
- ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
- size, dir);
- debug_dma_sync_single_range_for_cpu(hwdev, dma_handle,
- offset, size, dir);
- flush_write_buffers();
-}
-
-static inline void
-dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_single_range_for_device)
- ops->sync_single_range_for_device(hwdev, dma_handle,
- offset, size, dir);
- debug_dma_sync_single_range_for_device(hwdev, dma_handle,
- offset, size, dir);
- flush_write_buffers();
-}
-
-static inline void
-dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_sg_for_cpu)
- ops->sync_sg_for_cpu(hwdev, sg, nelems, dir);
- debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir);
- flush_write_buffers();
-}
-
-static inline void
-dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(hwdev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->sync_sg_for_device)
- ops->sync_sg_for_device(hwdev, sg, nelems, dir);
- debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir);
-
- flush_write_buffers();
-}
-
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
- size_t offset, size_t size,
- enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- dma_addr_t addr;
-
- BUG_ON(!valid_dma_direction(dir));
- kmemcheck_mark_initialized(page_address(page) + offset, size);
- addr = ops->map_page(dev, page, offset, size, dir, NULL);
- debug_dma_map_page(dev, page, offset, size, dir, addr, false);
-
- return addr;
-}
-
-static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- BUG_ON(!valid_dma_direction(dir));
- if (ops->unmap_page)
- ops->unmap_page(dev, addr, size, dir, NULL);
- debug_dma_unmap_page(dev, addr, size, dir, false);
-}
-
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index af326a2975b..fd6d21bbee6 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,6 +6,7 @@ extern void no_iommu_init(void);
extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
+extern int iommu_pass_through;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 540a466e50f..5cdd8d100ec 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
extern int mce_disabled;
+extern int mce_p5_enabled;
-#include <asm/atomic.h>
-#include <linux/percpu.h>
+#ifdef CONFIG_X86_MCE
+void mcheck_init(struct cpuinfo_x86 *c);
+#else
+static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+#endif
+
+#ifdef CONFIG_X86_OLD_MCE
+extern int nr_mce_banks;
+void amd_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+#endif
+
+#ifdef CONFIG_X86_ANCIENT_MCE
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
+#else
+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void enable_p5_mce(void) {}
+#endif
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev);
-extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/*
* To support more than 128 would need to escape the predefined
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
-void mce_log_therm_throt_event(__u64 status);
-
extern atomic_t mce_entry;
-void do_machine_check(struct pt_regs *, long);
-
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU(struct mce, injectm);
extern struct file_operations mce_chrdev_ops;
-#ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
-#else
-#define mcheck_init(c) do { } while (0)
-#endif
+/*
+ * Exception handler
+ */
+
+/* Call the installed machine check handler for this CPU setup. */
+extern void (*machine_check_vector)(struct pt_regs *, long error_code);
+void do_machine_check(struct pt_regs *, long);
+
+/*
+ * Threshold handler
+ */
extern void (*mce_threshold_vector)(void);
+extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
+
+/*
+ * Thermal handler
+ */
+
+void intel_init_thermal(struct cpuinfo_x86 *c);
+
+#ifdef CONFIG_X86_NEW_MCE
+void mce_log_therm_throt_event(__u64 status);
+#else
+static inline void mce_log_therm_throt_event(__u64 status) {}
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 22603764e7d..48ad9d29484 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -3,13 +3,10 @@
#include <asm/msr-index.h>
-#ifndef __ASSEMBLY__
-# include <linux/types.h>
-#endif
-
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+#include <linux/types.h>
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
#endif /* CONFIG_SMP */
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
-
-
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8d382d3abf3..7639dbf5d22 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -41,7 +41,7 @@
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 46
-#define __VIRTUAL_MASK_SHIFT 48
+#define __VIRTUAL_MASK_SHIFT 47
/*
* Kernel image size is limited to 512 MB (see level2_kernel_pgt in
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b51a1e8b0ba..927958d13c1 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void);
/* generic pci stuff */
#include <asm-generic/pci.h>
+#define PCIBIOS_MAX_MEM_32 0xffffffff
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index e60fd3e14bd..cb739cc0a08 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -25,7 +25,7 @@
#define PCI_BIOS_IRQ_SCAN 0x2000
#define PCI_ASSIGN_ALL_BUSSES 0x4000
#define PCI_CAN_SKIP_ISA_ALIGN 0x8000
-#define PCI_USE__CRS 0x10000
+#define PCI_NO_ROOT_CRS 0x10000
#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
#define PCI_HAS_IO_ECS 0x40000
#define PCI_NOASSIGN_ROMS 0x80000
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b..5fb33e160ea 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
-extern void set_perf_counter_pending(void);
-
-#define clear_perf_counter_pending() do { } while (0)
-#define test_perf_counter_pending() (0)
-
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 31bd120cf2a..01fd9461d32 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
#endif
#if defined(CONFIG_HIGHPTE)
+#define __KM_PTE \
+ (in_nmi() ? KM_NMI_PTE : \
+ in_irq() ? KM_IRQ_PTE : \
+ KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
pte_index((address)))
#define pte_offset_map_nested(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
#else
#define pte_offset_map(dir, address) \
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index abde308fdb0..c57a3011714 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -165,10 +165,7 @@ extern void cleanup_highmap(void);
/* fs/proc/kcore.c */
#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
-#define kc_offset_to_vaddr(o) \
- (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \
- ? ((o) | ~__VIRTUAL_MASK) \
- : (o))
+#define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
#define __HAVE_ARCH_PTE_SAME
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h
deleted file mode 100644
index c62349ee786..00000000000
--- a/arch/x86/include/asm/therm_throt.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ASM_X86_THERM_THROT_H
-#define _ASM_X86_THERM_THROT_H
-
-#include <asm/atomic.h>
-
-extern atomic_t therm_throt_en;
-int therm_throt_process(int curr);
-
-#endif /* _ASM_X86_THERM_THROT_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index bd37ed444a2..20ca9c4d468 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -45,12 +45,16 @@ extern int no_timer_check;
*/
DECLARE_PER_CPU(unsigned long, cyc2ns);
+DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR;
+ int cpu = smp_processor_id();
+ unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
+ ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
+ return ns;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b685ece89d5..20e6a795e16 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,7 @@
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(-1UL)
-#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
#define get_ds() (KERNEL_DS)
#define get_fs() (current_thread_info()->addr_limit)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f3477bb8456..6c327b852e2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,6 +24,8 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp)
+GCOV_PROFILE_vsyscall_64.o := n
+GCOV_PROFILE_hpet.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 167bc16ce0e..6a564ac67ef 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -42,6 +42,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
$(call cc-option, -mpreferred-stack-boundary=2)
KBUILD_CFLAGS += $(call cc-option, -m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1c60554537c..9372f0406ad 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
}
+/* Flush the whole IO/TLB for a given protection domain - including PDE */
+static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
+{
+ u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+ INC_STATS_COUNTER(domain_flush_single);
+
+ iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
+}
+
/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu,
amd_iommu_pd_table[devid] = domain;
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ /*
+ * We might boot into a crash-kernel here. The crashed kernel
+ * left the caches in the IOMMU dirty. So we have to flush
+ * here to evict all dirty stuff.
+ */
iommu_queue_inv_dev_entry(iommu, devid);
+ iommu_flush_tlb_pde(iommu, domain->id);
}
/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 238989ec077..10b2accd12e 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu)
static void iommu_disable(struct amd_iommu *iommu)
{
+ /* Disable command buffer */
+ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+
+ /* Disable event logging and event interrupts */
+ iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
+ iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+
+ /* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
}
@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry));
+ /* set head and tail to zero manually */
+ writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
+
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
}
@@ -1042,6 +1054,7 @@ static void enable_iommus(void)
struct amd_iommu *iommu;
for_each_iommu(iommu) {
+ iommu_disable(iommu);
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
@@ -1066,12 +1079,6 @@ static void disable_iommus(void)
static int amd_iommu_resume(struct sys_device *dev)
{
- /*
- * Disable IOMMUs before reprogramming the hardware registers.
- * IOMMU is still enabled from the resume kernel.
- */
- disable_iommus();
-
/* re-load the hardware */
enable_iommus();
@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev)
* we have to flush after the IOMMUs are enabled because a
* disabled IOMMU will never execute the commands we send
*/
- amd_iommu_flush_all_domains();
amd_iommu_flush_all_devices();
+ amd_iommu_flush_all_domains();
return 0;
}
@@ -1273,6 +1280,11 @@ free:
goto out;
}
+void amd_iommu_shutdown(void)
+{
+ disable_iommus();
+}
+
/****************************************************************************
*
* Early detect code. This code runs at IOMMU detection time in the DMA
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ef8d9290c7e..b7a79207295 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
static void
__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- union entry_union eu;
+ union entry_union eu = {{0, 0}};
+
eu.entry = e;
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -2003,7 +2004,9 @@ void disable_IO_APIC(void)
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
- disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+ if (cpu_has_apic)
+ disconnect_bsp_APIC(!intr_remapping_enabled &&
+ ioapic_i8259.pin != -1);
}
#ifdef CONFIG_X86_32
@@ -3567,7 +3570,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
#endif /* CONFIG_SMP */
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.unmask = dmar_msi_unmask,
.mask = dmar_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 440a8bccd91..0c0182cc947 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -20,23 +20,12 @@
#include <asm/apic.h>
#include <asm/setup.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
#include <linux/smp.h>
-#include <linux/init.h>
#include <asm/ipi.h>
-#include <linux/smp.h>
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/e820.h>
-#include <asm/setup.h>
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 344eee4ac0a..eafdfbd1ea9 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -44,7 +44,6 @@
#include <asm/ipi.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/smp.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9fa33886c0d..6b26d4deada 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
/* data */
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+ [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
#endif
@@ -848,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
numa_add_cpu(smp_processor_id());
#endif
+
+ /* Cap the iomem address space to what is addressable on all CPUs */
+ iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 45004faf67e..188a1ca5ad2 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,12 @@
-obj-y = mce.o therm_throt.o
+obj-y = mce.o
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
-obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o
-obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
+obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
+
+obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 89e51042415..b945d5dbc60 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
/* Machine Check Handler For AMD Athlon/Duron: */
static void k7_machine_check(struct pt_regs *regs, long error_code)
{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fabba15e455..284d1de968b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,7 +44,6 @@
#include <asm/msr.h>
#include "mce-internal.h"
-#include "mce.h"
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
-int mce_disabled;
+int mce_disabled __read_mostly;
#ifdef CONFIG_X86_NEW_MCE
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
* 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
* 3: never panic or SIGBUS, log all errors (for testing only)
*/
-static int tolerant = 1;
-static int banks;
-static u64 *bank;
-static unsigned long notify_user;
-static int rip_msr;
-static int mce_bootlog = -1;
-static int monarch_timeout = -1;
-static int mce_panic_timeout;
-static int mce_dont_log_ce;
-int mce_cmci_disabled;
-int mce_ignore_ce;
-int mce_ser;
-
-static char trigger[128];
-static char *trigger_argv[2] = { trigger, NULL };
+static int tolerant __read_mostly = 1;
+static int banks __read_mostly;
+static u64 *bank __read_mostly;
+static int rip_msr __read_mostly;
+static int mce_bootlog __read_mostly = -1;
+static int monarch_timeout __read_mostly = -1;
+static int mce_panic_timeout __read_mostly;
+static int mce_dont_log_ce __read_mostly;
+int mce_cmci_disabled __read_mostly;
+int mce_ignore_ce __read_mostly;
+int mce_ser __read_mostly;
+
+/* User mode helper program triggered by machine check event */
+static unsigned long mce_need_notify;
+static char mce_helper[128];
+static char *mce_helper_argv[2] = { mce_helper, NULL };
static unsigned long dont_init_banks;
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
wmb();
mce->finished = 1;
- set_bit(0, &notify_user);
+ set_bit(0, &mce_need_notify);
}
static void print_mce(struct mce *m)
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
* in the entry order.
* TBD double check parallel CPU hotunplug
*/
-static int mce_start(int no_way_out, int *order)
+static int mce_start(int *no_way_out)
{
- int nwo;
+ int order;
int cpus = num_online_cpus();
u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
- if (!timeout) {
- *order = -1;
- return no_way_out;
- }
+ if (!timeout)
+ return -1;
- atomic_add(no_way_out, &global_nwo);
+ atomic_add(*no_way_out, &global_nwo);
+ /*
+ * global_nwo should be updated before mce_callin
+ */
+ smp_wmb();
+ order = atomic_add_return(1, &mce_callin);
/*
* Wait for everyone.
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
while (atomic_read(&mce_callin) != cpus) {
if (mce_timed_out(&timeout)) {
atomic_set(&global_nwo, 0);
- *order = -1;
- return no_way_out;
+ return -1;
}
ndelay(SPINUNIT);
}
/*
- * Cache the global no_way_out state.
+ * mce_callin should be read before global_nwo
*/
- nwo = atomic_read(&global_nwo);
+ smp_rmb();
- /*
- * Monarch starts executing now, the others wait.
- */
- if (*order == 1) {
+ if (order == 1) {
+ /*
+ * Monarch: Starts executing now, the others wait.
+ */
atomic_set(&mce_executing, 1);
- return nwo;
+ } else {
+ /*
+ * Subject: Now start the scanning loop one by one in
+ * the original callin order.
+ * This way when there are any shared banks it will be
+ * only seen by one CPU before cleared, avoiding duplicates.
+ */
+ while (atomic_read(&mce_executing) < order) {
+ if (mce_timed_out(&timeout)) {
+ atomic_set(&global_nwo, 0);
+ return -1;
+ }
+ ndelay(SPINUNIT);
+ }
}
/*
- * Now start the scanning loop one by one
- * in the original callin order.
- * This way when there are any shared banks it will
- * be only seen by one CPU before cleared, avoiding duplicates.
+ * Cache the global no_way_out state.
*/
- while (atomic_read(&mce_executing) < *order) {
- if (mce_timed_out(&timeout)) {
- atomic_set(&global_nwo, 0);
- *order = -1;
- return no_way_out;
- }
- ndelay(SPINUNIT);
- }
- return nwo;
+ *no_way_out = atomic_read(&global_nwo);
+
+ return order;
}
/*
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* check handler.
*/
int order;
-
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If tolerant is cranked up, we'll try anyway.
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (!banks)
goto out;
- order = atomic_add_return(1, &mce_callin);
mce_setup(&m);
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
*/
- no_way_out = mce_start(no_way_out, &order);
+ order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
if (!bank[i])
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
static void mce_do_trigger(struct work_struct *work)
{
- call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+ call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
}
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
clear_thread_flag(TIF_MCE_NOTIFY);
- if (test_and_clear_bit(0, &notify_user)) {
+ if (test_and_clear_bit(0, &mce_need_notify)) {
wake_up_interruptible(&mce_wait);
/*
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
* work_pending is always cleared before the function is
* executed.
*/
- if (trigger[0] && !work_pending(&mce_trigger_work))
+ if (mce_helper[0] && !work_pending(&mce_trigger_work))
schedule_work(&mce_trigger_work);
if (__ratelimit(&ratelimit))
@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6)
+ if (c->x86 == 6 && banks > 0)
bank[0] = 0;
}
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
return;
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
- if (mce_p5_enabled())
- intel_p5_mcheck_init(c);
+ intel_p5_mcheck_init(c);
break;
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
- if (mce_available(&current_cpu_data))
- mce_init();
+ if (!mce_available(&current_cpu_data))
+ return;
+ mce_init();
mce_init_timer();
}
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
on_each_cpu(mce_cpu_restart, NULL, 1);
}
+/* Toggle features for corrected errors */
+static void mce_disable_ce(void *all)
+{
+ if (!mce_available(&current_cpu_data))
+ return;
+ if (all)
+ del_timer_sync(&__get_cpu_var(mce_timer));
+ cmci_clear();
+}
+
+static void mce_enable_ce(void *all)
+{
+ if (!mce_available(&current_cpu_data))
+ return;
+ cmci_reenable();
+ cmci_recheck();
+ if (all)
+ mce_init_timer();
+}
+
static struct sysdev_class mce_sysclass = {
.suspend = mce_suspend,
.shutdown = mce_shutdown,
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
static ssize_t
show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
{
- strcpy(buf, trigger);
+ strcpy(buf, mce_helper);
strcat(buf, "\n");
- return strlen(trigger) + 1;
+ return strlen(mce_helper) + 1;
}
static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char *p;
int len;
- strncpy(trigger, buf, sizeof(trigger));
- trigger[sizeof(trigger)-1] = 0;
- len = strlen(trigger);
- p = strchr(trigger, '\n');
+ strncpy(mce_helper, buf, sizeof(mce_helper));
+ mce_helper[sizeof(mce_helper)-1] = 0;
+ len = strlen(mce_helper);
+ p = strchr(mce_helper, '\n');
if (*p)
*p = 0;
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
return len;
}
+static ssize_t set_ignore_ce(struct sys_device *s,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t size)
+{
+ u64 new;
+
+ if (strict_strtoull(buf, 0, &new) < 0)
+ return -EINVAL;
+
+ if (mce_ignore_ce ^ !!new) {
+ if (new) {
+ /* disable ce features */
+ on_each_cpu(mce_disable_ce, (void *)1, 1);
+ mce_ignore_ce = 1;
+ } else {
+ /* enable ce features */
+ mce_ignore_ce = 0;
+ on_each_cpu(mce_enable_ce, (void *)1, 1);
+ }
+ }
+ return size;
+}
+
+static ssize_t set_cmci_disabled(struct sys_device *s,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t size)
+{
+ u64 new;
+
+ if (strict_strtoull(buf, 0, &new) < 0)
+ return -EINVAL;
+
+ if (mce_cmci_disabled ^ !!new) {
+ if (new) {
+ /* disable cmci */
+ on_each_cpu(mce_disable_ce, NULL, 1);
+ mce_cmci_disabled = 1;
+ } else {
+ /* enable cmci */
+ mce_cmci_disabled = 0;
+ on_each_cpu(mce_enable_ce, NULL, 1);
+ }
+ }
+ return size;
+}
+
static ssize_t store_int_with_restart(struct sys_device *s,
struct sysdev_attribute *attr,
const char *buf, size_t size)
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
+static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
static struct sysdev_ext_attribute attr_check_interval = {
_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
&check_interval
};
+static struct sysdev_ext_attribute attr_ignore_ce = {
+ _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
+ &mce_ignore_ce
+};
+
+static struct sysdev_ext_attribute attr_cmci_disabled = {
+ _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
+ &mce_cmci_disabled
+};
+
static struct sysdev_attribute *mce_attrs[] = {
- &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
+ &attr_tolerant.attr,
+ &attr_check_interval.attr,
+ &attr_trigger,
&attr_monarch_timeout.attr,
+ &attr_dont_log_ce.attr,
+ &attr_ignore_ce.attr,
+ &attr_cmci_disabled.attr,
NULL
};
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
static __cpuinit int mce_create_device(unsigned int cpu)
{
int err;
- int i;
+ int i, j;
if (!mce_available(&boot_cpu_data))
return -EIO;
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err)
goto error;
}
- for (i = 0; i < banks; i++) {
+ for (j = 0; j < banks; j++) {
err = sysdev_create_file(&per_cpu(mce_dev, cpu),
- &bank_attrs[i]);
+ &bank_attrs[j]);
if (err)
goto error2;
}
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0;
error2:
- while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+ while (--j >= 0)
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
error:
while (--i >= 0)
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
- alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
+ zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
err = mce_init_banks();
if (err)
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
/* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c)
{
- if (mce_disabled == 1)
+ if (mce_disabled)
return;
switch (c->x86_vendor) {
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
static int __init mcheck_enable(char *str)
{
- mce_disabled = -1;
+ mce_p5_enabled = 1;
return 1;
}
-
__setup("mce", mcheck_enable);
#endif /* CONFIG_X86_OLD_MCE */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
deleted file mode 100644
index 84a552b458c..00000000000
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <linux/init.h>
-#include <asm/mce.h>
-
-#ifdef CONFIG_X86_OLD_MCE
-void amd_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-#endif
-
-#ifdef CONFIG_X86_ANCIENT_MCE
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-extern int mce_p5_enable;
-static inline int mce_p5_enabled(void) { return mce_p5_enable; }
-static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
-#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline int mce_p5_enabled(void) { return 0; }
-static inline void enable_p5_mce(void) { }
-#endif
-
-/* Call the installed machine check handler for this CPU setup. */
-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
-
-#ifdef CONFIG_X86_OLD_MCE
-
-extern int nr_mce_banks;
-
-void intel_set_thermal_handler(void);
-
-#else
-
-static inline void intel_set_thermal_handler(void) { }
-
-#endif
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bd..ddae21620bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 2b011d2d857..e1acec0f7a3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,74 +1,226 @@
/*
- * Common code for Intel machine checks
+ * Intel specific MCE features.
+ * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Author: Andi Kleen
*/
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <asm/therm_throt.h>
-#include <asm/processor.h>
-#include <asm/system.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
#include <asm/apic.h>
+#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/mce.h>
+
+/*
+ * Support for Intel Correct Machine Check Interrupts. This allows
+ * the CPU to raise an interrupt when a corrected machine check happened.
+ * Normally we pick those up using a regular polling timer.
+ * Also supports reliable discovery of shared banks.
+ */
-#include "mce.h"
+static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
-void intel_init_thermal(struct cpuinfo_x86 *c)
+/*
+ * cmci_discover_lock protects against parallel discovery attempts
+ * which could race against each other.
+ */
+static DEFINE_SPINLOCK(cmci_discover_lock);
+
+#define CMCI_THRESHOLD 1
+
+static int cmci_supported(int *banks)
{
- unsigned int cpu = smp_processor_id();
- int tm2 = 0;
- u32 l, h;
+ u64 cap;
- /* Thermal monitoring depends on ACPI and clock modulation*/
- if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
- return;
+ if (mce_cmci_disabled || mce_ignore_ce)
+ return 0;
/*
- * First check if its enabled already, in which case there might
- * be some SMM goo which handles it, so we can't even put a handler
- * since it might be delivered via SMI already:
+ * Vendor check is not strictly needed, but the initial
+ * initialization is vendor keyed and this
+ * makes sure none of the backdoors are entered otherwise.
*/
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
- if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG
- "CPU%d: Thermal monitoring handled by SMI\n", cpu);
- return;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+ if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+ return 0;
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+ *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
+ return !!(cap & MCG_CMCI_P);
+}
+
+/*
+ * The interrupt handler. This is called on every event.
+ * Just call the poller directly to log any events.
+ * This could in theory increase the threshold under high load,
+ * but doesn't for now.
+ */
+static void intel_threshold_interrupt(void)
+{
+ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+ mce_notify_irq();
+}
+
+static void print_update(char *type, int *hdr, int num)
+{
+ if (*hdr == 0)
+ printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
+ *hdr = 1;
+ printk(KERN_CONT " %s:%d", type, num);
+}
+
+/*
+ * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
+ * on this CPU. Use the algorithm recommended in the SDM to discover shared
+ * banks.
+ */
+static void cmci_discover(int banks, int boot)
+{
+ unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+ unsigned long flags;
+ int hdr = 0;
+ int i;
+
+ spin_lock_irqsave(&cmci_discover_lock, flags);
+ for (i = 0; i < banks; i++) {
+ u64 val;
+
+ if (test_bit(i, owned))
+ continue;
+
+ rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+
+ /* Already owned by someone else? */
+ if (val & CMCI_EN) {
+ if (test_and_clear_bit(i, owned) || boot)
+ print_update("SHD", &hdr, i);
+ __clear_bit(i, __get_cpu_var(mce_poll_banks));
+ continue;
+ }
+
+ val |= CMCI_EN | CMCI_THRESHOLD;
+ wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+
+ /* Did the enable bit stick? -- the bank supports CMCI */
+ if (val & CMCI_EN) {
+ if (!test_and_set_bit(i, owned) || boot)
+ print_update("CMCI", &hdr, i);
+ __clear_bit(i, __get_cpu_var(mce_poll_banks));
+ } else {
+ WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
+ }
}
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ if (hdr)
+ printk(KERN_CONT "\n");
+}
+
+/*
+ * Just in case we missed an event during initialization check
+ * all the CMCI owned banks.
+ */
+void cmci_recheck(void)
+{
+ unsigned long flags;
+ int banks;
+
+ if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
+ return;
+ local_irq_save(flags);
+ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+ local_irq_restore(flags);
+}
- if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
- tm2 = 1;
+/*
+ * Disable CMCI on this CPU for all banks it owns when it goes down.
+ * This allows other CPUs to claim the banks on rediscovery.
+ */
+void cmci_clear(void)
+{
+ unsigned long flags;
+ int i;
+ int banks;
+ u64 val;
- /* Check whether a vector already exists */
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG
- "CPU%d: Thermal LVT vector (%#x) already installed\n",
- cpu, (h & APIC_VECTOR_MASK));
+ if (!cmci_supported(&banks))
return;
+ spin_lock_irqsave(&cmci_discover_lock, flags);
+ for (i = 0; i < banks; i++) {
+ if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+ continue;
+ /* Disable CMCI */
+ rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
+ wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ __clear_bit(i, __get_cpu_var(mce_banks_owned));
}
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
+/*
+ * After a CPU went down cycle through all the others and rediscover
+ * Must run in process context.
+ */
+void cmci_rediscover(int dying)
+{
+ int banks;
+ int cpu;
+ cpumask_var_t old;
+
+ if (!cmci_supported(&banks))
+ return;
+ if (!alloc_cpumask_var(&old, GFP_KERNEL))
+ return;
+ cpumask_copy(old, &current->cpus_allowed);
- /* We'll mask the thermal vector in the lapic till we're ready: */
- h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
- apic_write(APIC_LVTTHMR, h);
+ for_each_online_cpu(cpu) {
+ if (cpu == dying)
+ continue;
+ if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
+ continue;
+ /* Recheck banks in case CPUs don't all have the same */
+ if (cmci_supported(&banks))
+ cmci_discover(banks, 0);
+ }
- rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr(MSR_IA32_THERM_INTERRUPT,
- l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+ set_cpus_allowed_ptr(current, old);
+ free_cpumask_var(old);
+}
- intel_set_thermal_handler();
+/*
+ * Reenable CMCI on this CPU in case a CPU down failed.
+ */
+void cmci_reenable(void)
+{
+ int banks;
+ if (cmci_supported(&banks))
+ cmci_discover(banks, 0);
+}
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+static void intel_init_cmci(void)
+{
+ int banks;
- /* Unmask the thermal vector: */
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+ if (!cmci_supported(&banks))
+ return;
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
+ mce_threshold_vector = intel_threshold_interrupt;
+ cmci_discover(banks, 1);
+ /*
+ * For CPU #0 this runs with still disabled APIC, but that's
+ * ok because only the vector is set up. We still do another
+ * check for the banks later for CPU #0 just to make sure
+ * to not miss any events.
+ */
+ apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
+ cmci_recheck();
+}
- /* enable thermal throttle processing */
- atomic_set(&therm_throt_en, 1);
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+ intel_init_thermal(c);
+ intel_init_cmci();
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted file mode 100644
index f2ef6952c40..00000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Intel specific MCE features.
- * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
- * Copyright (C) 2008, 2009 Intel Corporation
- * Author: Andi Kleen
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <asm/processor.h>
-#include <asm/apic.h>
-#include <asm/msr.h>
-#include <asm/mce.h>
-#include <asm/hw_irq.h>
-#include <asm/idle.h>
-#include <asm/therm_throt.h>
-
-#include "mce.h"
-
-asmlinkage void smp_thermal_interrupt(void)
-{
- __u64 msr_val;
-
- ack_APIC_irq();
-
- exit_idle();
- irq_enter();
-
- rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
- mce_log_therm_throt_event(msr_val);
-
- inc_irq_stat(irq_thermal_count);
- irq_exit();
-}
-
-/*
- * Support for Intel Correct Machine Check Interrupts. This allows
- * the CPU to raise an interrupt when a corrected machine check happened.
- * Normally we pick those up using a regular polling timer.
- * Also supports reliable discovery of shared banks.
- */
-
-static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
-
-/*
- * cmci_discover_lock protects against parallel discovery attempts
- * which could race against each other.
- */
-static DEFINE_SPINLOCK(cmci_discover_lock);
-
-#define CMCI_THRESHOLD 1
-
-static int cmci_supported(int *banks)
-{
- u64 cap;
-
- if (mce_cmci_disabled || mce_ignore_ce)
- return 0;
-
- /*
- * Vendor check is not strictly needed, but the initial
- * initialization is vendor keyed and this
- * makes sure none of the backdoors are entered otherwise.
- */
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return 0;
- if (!cpu_has_apic || lapic_get_maxlvt() < 6)
- return 0;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
- return !!(cap & MCG_CMCI_P);
-}
-
-/*
- * The interrupt handler. This is called on every event.
- * Just call the poller directly to log any events.
- * This could in theory increase the threshold under high load,
- * but doesn't for now.
- */
-static void intel_threshold_interrupt(void)
-{
- machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
- mce_notify_irq();
-}
-
-static void print_update(char *type, int *hdr, int num)
-{
- if (*hdr == 0)
- printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
- *hdr = 1;
- printk(KERN_CONT " %s:%d", type, num);
-}
-
-/*
- * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
- * on this CPU. Use the algorithm recommended in the SDM to discover shared
- * banks.
- */
-static void cmci_discover(int banks, int boot)
-{
- unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
- unsigned long flags;
- int hdr = 0;
- int i;
-
- spin_lock_irqsave(&cmci_discover_lock, flags);
- for (i = 0; i < banks; i++) {
- u64 val;
-
- if (test_bit(i, owned))
- continue;
-
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
-
- /* Already owned by someone else? */
- if (val & CMCI_EN) {
- if (test_and_clear_bit(i, owned) || boot)
- print_update("SHD", &hdr, i);
- __clear_bit(i, __get_cpu_var(mce_poll_banks));
- continue;
- }
-
- val |= CMCI_EN | CMCI_THRESHOLD;
- wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
-
- /* Did the enable bit stick? -- the bank supports CMCI */
- if (val & CMCI_EN) {
- if (!test_and_set_bit(i, owned) || boot)
- print_update("CMCI", &hdr, i);
- __clear_bit(i, __get_cpu_var(mce_poll_banks));
- } else {
- WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
- }
- }
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
- if (hdr)
- printk(KERN_CONT "\n");
-}
-
-/*
- * Just in case we missed an event during initialization check
- * all the CMCI owned banks.
- */
-void cmci_recheck(void)
-{
- unsigned long flags;
- int banks;
-
- if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
- return;
- local_irq_save(flags);
- machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
- local_irq_restore(flags);
-}
-
-/*
- * Disable CMCI on this CPU for all banks it owns when it goes down.
- * This allows other CPUs to claim the banks on rediscovery.
- */
-void cmci_clear(void)
-{
- unsigned long flags;
- int i;
- int banks;
- u64 val;
-
- if (!cmci_supported(&banks))
- return;
- spin_lock_irqsave(&cmci_discover_lock, flags);
- for (i = 0; i < banks; i++) {
- if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
- continue;
- /* Disable CMCI */
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
- val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
- wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
- __clear_bit(i, __get_cpu_var(mce_banks_owned));
- }
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
-}
-
-/*
- * After a CPU went down cycle through all the others and rediscover
- * Must run in process context.
- */
-void cmci_rediscover(int dying)
-{
- int banks;
- int cpu;
- cpumask_var_t old;
-
- if (!cmci_supported(&banks))
- return;
- if (!alloc_cpumask_var(&old, GFP_KERNEL))
- return;
- cpumask_copy(old, &current->cpus_allowed);
-
- for_each_online_cpu(cpu) {
- if (cpu == dying)
- continue;
- if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
- continue;
- /* Recheck banks in case CPUs don't all have the same */
- if (cmci_supported(&banks))
- cmci_discover(banks, 0);
- }
-
- set_cpus_allowed_ptr(current, old);
- free_cpumask_var(old);
-}
-
-/*
- * Reenable CMCI on this CPU in case a CPU down failed.
- */
-void cmci_reenable(void)
-{
- int banks;
- if (cmci_supported(&banks))
- cmci_discover(banks, 0);
-}
-
-static void intel_init_cmci(void)
-{
- int banks;
-
- if (!cmci_supported(&banks))
- return;
-
- mce_threshold_vector = intel_threshold_interrupt;
- cmci_discover(banks, 1);
- /*
- * For CPU #0 this runs with still disabled APIC, but that's
- * ok because only the vector is set up. We still do another
- * check for the banks later for CPU #0 just to make sure
- * to not miss any events.
- */
- apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
- cmci_recheck();
-}
-
-void mce_intel_feature_init(struct cpuinfo_x86 *c)
-{
- intel_init_thermal(c);
- intel_init_cmci();
-}
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 70b710420f7..f5f2d6f71fb 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -17,10 +17,9 @@
#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
static int firstbank;
#define MCE_RATE (15*HZ) /* timer rate is 15s */
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 82cee108a2d..4482aea9aa2 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -1,21 +1,15 @@
/*
* P4 specific Machine Check Exception Reporting
*/
-
-#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
-#include <asm/therm_throt.h>
#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/apic.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
/* as supported by the P4/Xeon family */
struct intel_mce_extended_msrs {
u32 eax;
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static int mce_num_extended_msrs;
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-
-static void unexpected_thermal_interrupt(struct pt_regs *regs)
-{
- printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
- smp_processor_id());
- add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* P4/Xeon Thermal transition interrupt handler: */
-static void intel_thermal_interrupt(struct pt_regs *regs)
-{
- __u64 msr_val;
-
- ack_APIC_irq();
-
- rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
-}
-
-/* Thermal interrupt handler for this CPU setup: */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
- unexpected_thermal_interrupt;
-
-void smp_thermal_interrupt(struct pt_regs *regs)
-{
- irq_enter();
- vendor_thermal_interrupt(regs);
- __get_cpu_var(irq_stat).irq_thermal_count++;
- irq_exit();
-}
-
-void intel_set_thermal_handler(void)
-{
- vendor_thermal_interrupt = intel_thermal_interrupt;
-}
-
-#endif /* CONFIG_X86_MCE_P4THERMAL */
-
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 015f481ab1b..5c0e6533d9b 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -10,12 +10,11 @@
#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
/* By default disabled */
-int mce_p5_enable;
+int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
- /* Check for MCE support: */
- if (!cpu_has(c, X86_FEATURE_MCE))
+ /* Default P5 to off as its often misconnected: */
+ if (!mce_p5_enabled)
return;
-#ifdef CONFIG_X86_OLD_MCE
- /* Default P5 to off as its often misconnected: */
- if (mce_disabled != -1)
+ /* Check for MCE support: */
+ if (!cpu_has(c, X86_FEATURE_MCE))
return;
-#endif
machine_check_vector = pentium_machine_check;
/* Make sure the vector pointer is visible before we enable MCEs: */
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 43c24e66745..01e4f817818 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
/* Machine Check Handler For PII/PIII */
static void intel_machine_check(struct pt_regs *regs, long error_code)
{
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7b1ae2e20ba..bff8dd191dd 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code.
*/
+#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/sysdev.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
#include <linux/cpu.h>
-#include <asm/therm_throt.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
@@ -27,7 +37,7 @@
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-atomic_t therm_throt_en = ATOMIC_INIT(0);
+static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
-int therm_throt_process(int curr)
+static int therm_throt_process(int curr)
{
unsigned int cpu = smp_processor_id();
__u64 tmp_jiffs = get_jiffies_64();
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return 0;
}
-
device_initcall(thermal_throttle_init_device);
+
#endif /* CONFIG_SYSFS */
+
+/* Thermal transition interrupt handler */
+static void intel_thermal_interrupt(void)
+{
+ __u64 msr_val;
+
+ rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+ if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
+ mce_log_therm_throt_event(msr_val);
+}
+
+static void unexpected_thermal_interrupt(void)
+{
+ printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+ smp_processor_id());
+ add_taint(TAINT_MACHINE_CHECK);
+}
+
+static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
+
+asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+{
+ exit_idle();
+ irq_enter();
+ inc_irq_stat(irq_thermal_count);
+ smp_thermal_vector();
+ irq_exit();
+ /* Ack only at the end to avoid potential reentry */
+ ack_APIC_irq();
+}
+
+void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+ unsigned int cpu = smp_processor_id();
+ int tm2 = 0;
+ u32 l, h;
+
+ /* Thermal monitoring depends on ACPI and clock modulation*/
+ if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ return;
+
+ /*
+ * First check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already:
+ */
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ h = apic_read(APIC_LVTTHMR);
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+ printk(KERN_DEBUG
+ "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ return;
+ }
+
+ if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+ tm2 = 1;
+
+ /* Check whether a vector already exists */
+ if (h & APIC_VECTOR_MASK) {
+ printk(KERN_DEBUG
+ "CPU%d: Thermal LVT vector (%#x) already installed\n",
+ cpu, (h & APIC_VECTOR_MASK));
+ return;
+ }
+
+ /* We'll mask the thermal vector in the lapic till we're ready: */
+ h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+ apic_write(APIC_LVTTHMR, h);
+
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+
+ smp_thermal_vector = intel_thermal_interrupt;
+
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+ /* Unmask the thermal vector: */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+ printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+ cpu, tm2 ? "TM2" : "TM1");
+
+ /* enable thermal throttle processing */
+ atomic_set(&therm_throt_en, 1);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 81b02487090..54060f56597 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -9,10 +9,9 @@
#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/mce.h>
#include <asm/msr.h>
-#include "mce.h"
-
/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5..76dfef23f78 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
#include <linux/kdebug.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
+#include <linux/highmem.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
return event & CORE_EVNTSEL_MASK;
}
-static const u64 amd_0f_hw_cache_event_ids
+static const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
+ [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
+ [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
},
},
[ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+ [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
+ [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
if (!intel_pmu_save_and_restart(counter))
continue;
+ data.period = counter->hw.last_period;
+
if (perf_counter_overflow(counter, 1, &data))
intel_pmu_disable_counter(&counter->hw, bit);
}
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
static int amd_pmu_init(void)
{
+ /* Performance-monitoring supported from K7 and later: */
+ if (boot_cpu_data.x86 < 6)
+ return -ENODEV;
+
x86_pmu = amd_pmu;
- switch (boot_cpu_data.x86) {
- case 0x0f:
- case 0x10:
- case 0x11:
- memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
+ /* Events are common for all AMDs */
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
- pr_cont("AMD Family 0f/10/11 events, ");
- break;
- }
return 0;
}
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
*/
static inline
-void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
- if (entry->nr < MAX_STACK_DEPTH)
+ if (entry->nr < PERF_MAX_STACK_DEPTH)
entry->ip[entry->nr++] = ip;
}
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
- /* Don't bother with IRQ stacks for now */
- return -1;
+ /* Process all stacks: */
+ return 0;
}
static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
.address = backtrace_address,
};
+#include "../dumpstack.h"
+
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
- unsigned long bp;
- char *stack;
- int nr = entry->nr;
+ callchain_store(entry, PERF_CONTEXT_KERNEL);
+ callchain_store(entry, regs->ip);
- callchain_store(entry, instruction_pointer(regs));
+ dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
- stack = ((char *)regs + sizeof(struct pt_regs));
-#ifdef CONFIG_FRAME_POINTER
- bp = frame_pointer(regs);
-#else
- bp = 0;
-#endif
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+ unsigned long offset, addr = (unsigned long)from;
+ int type = in_nmi() ? KM_NMI : KM_IRQ0;
+ unsigned long size, len = 0;
+ struct page *page;
+ void *map;
+ int ret;
- dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+ do {
+ ret = __get_user_pages_fast(addr, 1, 0, &page);
+ if (!ret)
+ break;
- entry->kernel = entry->nr - nr;
-}
+ offset = addr & (PAGE_SIZE - 1);
+ size = min(PAGE_SIZE - offset, n - len);
+ map = kmap_atomic(page, type);
+ memcpy(to, map+offset, size);
+ kunmap_atomic(map, type);
+ put_page(page);
-struct stack_frame {
- const void __user *next_fp;
- unsigned long return_address;
-};
+ len += size;
+ to += size;
+ addr += size;
+
+ } while (len < n);
+
+ return len;
+}
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
- int ret;
+ unsigned long bytes;
- if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
- return 0;
+ bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
- ret = 1;
- pagefault_disable();
- if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
- ret = 0;
- pagefault_enable();
-
- return ret;
+ return bytes == sizeof(*frame);
}
static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
struct stack_frame frame;
const void __user *fp;
- int nr = entry->nr;
- regs = (struct pt_regs *)current->thread.sp0 - 1;
- fp = (void __user *)regs->bp;
+ if (!user_mode(regs))
+ regs = task_pt_regs(current);
+ fp = (void __user *)regs->bp;
+
+ callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip);
- while (entry->nr < MAX_STACK_DEPTH) {
- frame.next_fp = NULL;
+ while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ frame.next_frame = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
- if ((unsigned long)fp < user_stack_pointer(regs))
+ if ((unsigned long)fp < regs->sp)
break;
callchain_store(entry, frame.return_address);
- fp = frame.next_fp;
+ fp = frame.next_frame;
}
-
- entry->user = entry->nr - nr;
}
static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
entry = &__get_cpu_var(irq_entry);
entry->nr = 0;
- entry->hv = 0;
- entry->kernel = 0;
- entry->user = 0;
perf_do_callchain(regs, entry);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d6f5b9fbde3..5c481f6205b 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void)
wd_ops = &k7_wd_ops;
break;
case X86_VENDOR_INTEL:
- /*
- * Work around Core Duo (Yonah) errata AE49 where perfctr1
- * doesn't have a working enable bit.
+ /* Work around where perfctr1 doesn't have a working enable
+ * bit as described in the following errata:
+ * AE49 Core Duo and Intel Core Solo 65 nm
+ * AN49 Intel Pentium Dual-Core
+ * AF49 Dual-Core Intel Xeon Processor LV
*/
- if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
+ if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
+ ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
+ boot_cpu_data.x86_mask == 4))) {
intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index ff958248e61..5e409dc298a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,6 +27,7 @@
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
+#include <asm/iommu.h>
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
+
+#ifdef CONFIG_X86_64
+ pci_iommu_shutdown();
+#endif
+
crash_save_cpu(regs, safe_smp_processor_id());
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1736acc4d7a..96f7ac0bbf0 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void)
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type;
- if (md->attribute & EFI_MEMORY_WB)
- e820_type = E820_RAM;
- else
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ if (md->attribute & EFI_MEMORY_WB)
+ e820_type = E820_RAM;
+ else
+ e820_type = E820_RESERVED;
+ break;
+ case EFI_ACPI_RECLAIM_MEMORY:
+ e820_type = E820_ACPI;
+ break;
+ case EFI_ACPI_MEMORY_NVS:
+ e820_type = E820_NVS;
+ break;
+ case EFI_UNUSABLE_MEMORY:
+ e820_type = E820_UNUSABLE;
+ break;
+ default:
+ /*
+ * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
+ * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
+ * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
+ */
e820_type = E820_RESERVED;
+ break;
+ }
e820_add_region(start, size, e820_type);
}
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add475c..c097e7d607c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -48,7 +48,6 @@
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page_types.h>
-#include <asm/desc.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
@@ -84,7 +83,7 @@
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
#define preempt_stop(clobbers)
-#define resume_kernel restore_nocheck
+#define resume_kernel restore_all
#endif
.macro TRACE_IRQS_IRET
@@ -372,7 +371,7 @@ END(ret_from_exception)
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_nocheck
+ jnz restore_all
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
@@ -540,6 +539,8 @@ syscall_exit:
jne syscall_exit_work
restore_all:
+ TRACE_IRQS_IRET
+restore_all_notrace:
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
# are returning to the kernel.
@@ -551,8 +552,6 @@ restore_all:
CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
- TRACE_IRQS_IRET
-restore_nocheck_notrace:
RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
@@ -588,22 +587,34 @@ ldt_ss:
jne restore_nocheck
#endif
- /* If returning to userspace with 16bit stack,
- * try to fix the higher word of ESP, as the CPU
- * won't restore it.
- * This is an "official" bug of all the x86-compatible
- * CPUs, which we can try to work around to make
- * dosemu and wine happy. */
- movl PT_OLDESP(%esp), %eax
- movl %esp, %edx
- call patch_espfix_desc
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+ mov %esp, %edx /* load kernel esp */
+ mov PT_OLDESP(%esp), %eax /* load userspace esp */
+ mov %dx, %ax /* eax: new kernel esp */
+ sub %eax, %edx /* offset (low word is 0) */
+ PER_CPU(gdt_page, %ebx)
+ shr $16, %edx
+ mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
+ mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4
- pushl %eax
+ push %eax /* new kernel esp */
CFI_ADJUST_CFA_OFFSET 4
+ /* Disable interrupts, but do not irqtrace this section: we
+ * will soon execute iret and the tracer was already set to
+ * the irqstate after the iret */
DISABLE_INTERRUPTS(CLBR_EAX)
- TRACE_IRQS_OFF
- lss (%esp), %esp
+ lss (%esp), %esp /* switch to espfix segment */
CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck
CFI_ENDPROC
@@ -716,15 +727,24 @@ PTREGSCALL(vm86)
PTREGSCALL(vm86old)
.macro FIXUP_ESPFIX_STACK
- /* since we are on a wrong stack, we cant make it a C code :( */
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+ /* fixup the stack */
PER_CPU(gdt_page, %ebx)
- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- addl %esp, %eax
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+ shl $16, %eax
+ addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
- lss (%esp), %esp
+ lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
@@ -1154,6 +1174,7 @@ ENTRY(ftrace_graph_caller)
pushl %edx
movl 0xc(%esp), %edx
lea 0x4(%ebp), %eax
+ movl (%ebp), %ecx
subl $MCOUNT_INSN_SIZE, %edx
call prepare_ftrace_return
popl %edx
@@ -1168,6 +1189,7 @@ return_to_handler:
pushl %eax
pushl %ecx
pushl %edx
+ movl %ebp, %eax
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
popl %edx
@@ -1329,7 +1351,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- jmp restore_nocheck_notrace
+ jmp restore_all_notrace
CFI_ENDPROC
nmi_stack_fixup:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index de74f0a3e0e..c251be74510 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
leaq 8(%rbp), %rdi
movq 0x38(%rsp), %rsi
+ movq (%rbp), %rdx
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
/* Save the return values */
movq %rax, (%rsp)
movq %rdx, 8(%rsp)
+ movq %rbp, %rdi
call ftrace_return_to_handler
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b79c5533c42..d94e1ea3b9f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
{
unsigned long old;
int faulted;
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer) == -EBUSY) {
*parent = old;
return;
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index dc5ed4bdd88..8663afb5653 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -13,7 +13,6 @@
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/pgtable_types.h>
-#include <asm/desc.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 54b29bb24e7..fa54f78e2a0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -12,7 +12,6 @@
#include <linux/linkage.h>
#include <linux/threads.h>
#include <linux/init.h>
-#include <asm/desc.h>
#include <asm/segment.h>
#include <asm/pgtable.h>
#include <asm/page.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 81408b93f88..dedc2bddf7a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev)
{
if (request_irq(dev->irq, hpet_interrupt_handler,
- IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
+ IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
+ dev->name, dev))
return -1;
disable_irq(dev->irq);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 745579bc825..47630479b06 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -32,6 +32,8 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
+int iommu_pass_through;
+
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
@@ -209,6 +211,10 @@ static __init int iommu_setup(char *p)
#ifdef CONFIG_SWIOTLB
if (!strncmp(p, "soft", 4))
swiotlb = 1;
+ if (!strncmp(p, "pt", 2)) {
+ iommu_pass_through = 1;
+ return 1;
+ }
#endif
gart_parse_options(p);
@@ -290,6 +296,8 @@ static int __init pci_iommu_init(void)
void pci_iommu_shutdown(void)
{
gart_iommu_shutdown();
+
+ amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index a1712f2b50f..6af96ee4420 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
- if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
+ if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
+ iommu_pass_through)
swiotlb = 1;
#endif
if (swiotlb_force)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5f935f0d586..a0f48f5671c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,6 +54,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/mce.h>
#include <asm/mach_traps.h>
@@ -65,8 +66,6 @@
#include <asm/setup.h>
#include <asm/traps.h>
-#include "cpu/mcheck/mce.h"
-
asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b0597ad02c9..6e1a368d21d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -590,22 +590,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
*/
DEFINE_PER_CPU(unsigned long, cyc2ns);
+DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
- unsigned long long tsc_now, ns_now;
+ unsigned long long tsc_now, ns_now, *offset;
unsigned long flags, *scale;
local_irq_save(flags);
sched_clock_idle_sleep_event();
scale = &per_cpu(cyc2ns, cpu);
+ offset = &per_cpu(cyc2ns_offset, cpu);
rdtscll(tsc_now);
ns_now = __cycles_2_ns(tsc_now);
- if (cpu_khz)
+ if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ }
sched_clock_idle_wakeup_event(0);
local_irq_restore(flags);
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ec13cb5f17e..b7c2849ffb6 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(__strnlen_user);
long strnlen_user(const char __user *s, long n)
{
- if (!access_ok(VERIFY_READ, s, n))
+ if (!access_ok(VERIFY_READ, s, 1))
return 0;
return __strnlen_user(s, n);
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index baa0e86adfb..78a5fff857b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -952,8 +952,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
tsk = current;
mm = tsk->mm;
- prefetchw(&mm->mmap_sem);
-
/* Get the faulting address: */
address = read_cr2();
@@ -963,6 +961,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
if (kmemcheck_active(regs))
kmemcheck_hide(regs);
+ prefetchw(&mm->mmap_sem);
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -1114,7 +1113,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
- fault = handle_mm_fault(mm, vma, address, write);
+ fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6340cef6798..71da1bca13c 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
static inline pte_t gup_get_pte(pte_t *ptep)
{
#ifndef CONFIG_X86_PAE
- return *ptep;
+ return ACCESS_ONCE(*ptep);
#else
/*
* With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next;
+ unsigned long flags;
+ pgd_t *pgdp;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
+
+ /*
+ * XXX: batch / limit 'nr', to avoid large irq off latency
+ * needs some instrumenting to determine the common sizes used by
+ * important workloads (eg. DB2), and whether limiting the batch size
+ * will decrease performance.
+ *
+ * It seems like we're in the clear for the moment. Direct-IO is
+ * the main guy that batches up lots of get_user_pages, and even
+ * they are limited to 64-at-a-time which is not so many.
+ */
+ /*
+ * This doesn't prevent pagetable teardown, but does prevent
+ * the pagetables and pages from being freed on x86.
+ *
+ * So long as we atomically load page table pointers versus teardown
+ * (which we do on x86, with the above PAE exception), we can follow the
+ * address down to the the page and take a ref on it.
+ */
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = *pgdp;
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
@@ -247,11 +303,16 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
+
end = start + len;
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- (void __user *)start, len)))
+ if (end < start)
goto slow_irqon;
+#ifdef CONFIG_X86_64
+ if (end >> __VIRTUAL_MASK_SHIFT)
+ goto slow_irqon;
+#endif
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9c543290a81..c4378f4fd4a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
return phys_pud_init(pud, addr, end, page_size_mask);
}
-unsigned long __init
+unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index c0ecf250fe5..16c3fda85bb 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -38,15 +38,26 @@ count_resource(struct acpi_resource *acpi_res, void *data)
struct acpi_resource_address64 addr;
acpi_status status;
- if (info->res_num >= PCI_BUS_NUM_RESOURCES)
- return AE_OK;
-
status = resource_to_addr(acpi_res, &addr);
if (ACPI_SUCCESS(status))
info->res_num++;
return AE_OK;
}
+static int
+bus_has_transparent_bridge(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ u16 class = dev->class >> 8;
+
+ if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent)
+ return true;
+ }
+ return false;
+}
+
static acpi_status
setup_resource(struct acpi_resource *acpi_res, void *data)
{
@@ -56,9 +67,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
acpi_status status;
unsigned long flags;
struct resource *root;
-
- if (info->res_num >= PCI_BUS_NUM_RESOURCES)
- return AE_OK;
+ int max_root_bus_resources = PCI_BUS_NUM_RESOURCES;
status = resource_to_addr(acpi_res, &addr);
if (!ACPI_SUCCESS(status))
@@ -82,6 +91,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
res->end = res->start + addr.address_length - 1;
res->child = NULL;
+ if (bus_has_transparent_bridge(info->bus))
+ max_root_bus_resources -= 3;
+ if (info->res_num >= max_root_bus_resources) {
+ printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx "
+ "from %s for %s due to _CRS returning more than "
+ "%d resource descriptors\n", (unsigned long) res->start,
+ (unsigned long) res->end, root->name, info->name,
+ max_root_bus_resources);
+ info->res_num++;
+ return AE_OK;
+ }
+
if (insert_resource(root, res)) {
printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx "
"from %s for %s\n", (unsigned long) res->start,
@@ -217,7 +238,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
#endif
}
- if (bus && (pci_probe & PCI_USE__CRS))
+ if (bus && !(pci_probe & PCI_NO_ROOT_CRS))
get_current_resources(device, busnum, domain, bus);
return bus;
}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index f893d6a6e80..2255f880678 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -101,7 +101,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
struct pci_root_info *info;
/* don't go for it if _CRS is used */
- if (pci_probe & PCI_USE__CRS)
+ if (!(pci_probe & PCI_NO_ROOT_CRS))
return;
/* if only one root bus, don't need to anything */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2202b6257b8..4740119e4bb 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -515,8 +515,8 @@ char * __devinit pcibios_setup(char *str)
} else if (!strcmp(str, "assign-busses")) {
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
return NULL;
- } else if (!strcmp(str, "use_crs")) {
- pci_probe |= PCI_USE__CRS;
+ } else if (!strcmp(str, "nocrs")) {
+ pci_probe |= PCI_NO_ROOT_CRS;
return NULL;
} else if (!strcmp(str, "earlydump")) {
pci_early_dump_regs = 1;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index a85bef20a3b..0fb56db16d1 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -116,7 +116,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
struct pci_bus *bus;
struct pci_dev *dev;
int idx;
- struct resource *r, *pr;
+ struct resource *r;
/* Depth-First Search on bus tree */
list_for_each_entry(bus, bus_list, node) {
@@ -126,9 +126,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
r = &dev->resource[idx];
if (!r->flags)
continue;
- pr = pci_find_parent_resource(dev, r);
- if (!r->start || !pr ||
- request_resource(pr, r) < 0) {
+ if (!r->start ||
+ pci_claim_resource(dev, idx) < 0) {
dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/*
* Something is wrong with the region.
@@ -149,7 +148,7 @@ static void __init pcibios_allocate_resources(int pass)
struct pci_dev *dev = NULL;
int idx, disabled;
u16 command;
- struct resource *r, *pr;
+ struct resource *r;
for_each_pci_dev(dev) {
pci_read_config_word(dev, PCI_COMMAND, &command);
@@ -168,8 +167,7 @@ static void __init pcibios_allocate_resources(int pass)
(unsigned long long) r->start,
(unsigned long long) r->end,
r->flags, disabled, pass);
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0) {
+ if (pci_claim_resource(dev, idx) < 0) {
dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/* We'll assign a new address later */
r->end -= r->start;
@@ -197,7 +195,7 @@ static void __init pcibios_allocate_resources(int pass)
static int __init pcibios_assign_resources(void)
{
struct pci_dev *dev = NULL;
- struct resource *r, *pr;
+ struct resource *r;
if (!(pci_probe & PCI_ASSIGN_ROMS)) {
/*
@@ -209,8 +207,7 @@ static int __init pcibios_assign_resources(void)
r = &dev->resource[PCI_ROM_RESOURCE];
if (!r->flags || !r->start)
continue;
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0) {
+ if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
r->end -= r->start;
r->start = 0;
}
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 16a9020c8f1..88112b49f02 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -123,6 +123,7 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+GCOV_PROFILE := n
#
# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).