From c33f543d320843e1732534c3931da4bbd18e6c14 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 8 Sep 2010 10:34:28 -0400 Subject: oprofile: Add Support for Intel CPU Family 6 / Model 22 (Intel Celeron 540) This patch adds CPU type detection for the Intel Celeron 540, which is part of the Core 2 family according to Wikipedia; the family and ID pair is absent from the Volume 3B table referenced in the source code comments. I have tested this patch on an Intel Celeron 540 machine reporting itself as Family 6 Model 22, and OProfile runs on the machine without issue. Spec: http://download.intel.com/design/mobile/SPECUPDT/317667.pdf Signed-off-by: Patrick Simmons Acked-by: Andi Kleen Acked-by: Arnd Bergmann Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cfe4faabb0f..009b819f48d 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -671,7 +671,9 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: *cpu_type = "i386/core_2"; break; case 0x1a: -- cgit v1.2.3-70-g09d2 From 89e45aac42d40426c97e6901811309bf49c4993f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 17 Sep 2010 03:24:13 +0200 Subject: x86: Fix instruction breakpoint encoding Lengths and types of breakpoints are encoded in a half byte into CPU registers. However when we extract these values and store them, we add a high half byte part to them: 0x40 to the length and 0x80 to the type. When that gets reloaded to the CPU registers, the high part is masked. While making the instruction breakpoints available for perf, I zapped that high part on instruction breakpoint encoding and that broke the arch -> generic translation used by ptrace instruction breakpoints. Writing dr7 to set an inst breakpoint was then failing. There is no apparent reason for these high parts so we could get rid of them altogether. That's an invasive change though so let's do that later and for now fix the problem by restoring that inst breakpoint high part encoding in this sole patch. Reported-by: Kelvie Wong Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Mahesh Salgaonkar Cc: Will Deacon --- arch/x86/include/asm/hw_breakpoint.h | 2 +- arch/x86/kernel/hw_breakpoint.c | 40 +++++++++++++++++------------------- 2 files changed, 20 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 528a11e8d3e..824ca07860d 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint { #include /* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_X 0x00 +#define X86_BREAKPOINT_LEN_X 0x40 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32..ff15c9dcc25 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_X: + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (x86_len) { case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; -- cgit v1.2.3-70-g09d2 From 9b6efcd2e2275e13403700913b7a1da92cf11ad2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 21 Sep 2010 10:54:01 -0600 Subject: lguest: update comments to reflect LHCALL_LOAD_GDT_ENTRY. We used to have a hypercall which reloaded the entire GDT, then we switched to one which loaded a single entry (to match the IDT code). Some comments were not updated, so fix them. Signed-off-by: Rusty Russell Reported by: Eviatar Khen --- arch/x86/lguest/boot.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9257510b483..9d5f5584845 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc) } /* - * For a single GDT entry which changes, we do the lazy thing: alter our GDT, - * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. + * For a single GDT entry which changes, we simply change our copy and + * then tell the host about it. */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) @@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, } /* - * OK, I lied. There are three "thread local storage" GDT entries which change + * There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. + * __thread variables). As an optimization, we have a hypercall + * specifically for this case. + * + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall + * which took a range of entries? */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { -- cgit v1.2.3-70-g09d2 From 83d9f65bdae6f6b34d75282c6618d3f50846849a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 29 Aug 2010 18:12:24 -0700 Subject: x86, setup: Fix earlyprintk=serial,ttyS0,115200 Torsten reported that there is garbage output, after commit 8fee13a48e4879fba57725f6d9513df4bfa8e9f3 (x86, setup: enable early console output from the decompressor) It turns out we missed the offset for that case. Reported-by: Torsten Kaiser Signed-off-by: Yinghai Lu LKML-Reference: <4C7B0578.8090807@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/boot/early_serial_console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 030f4b93e25..407a8e26f0a 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -58,7 +58,7 @@ static void parse_earlyprintk(void) if (arg[pos] == ',') pos++; - if (!strncmp(arg, "ttyS", 4)) { + if (!strncmp(arg + pos, "ttyS", 4)) { static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; -- cgit v1.2.3-70-g09d2 From 74b3c444a963ba55aef89b33a1bcaada9a4c206f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 29 Aug 2010 18:13:10 -0700 Subject: x86, setup: Fix earlyprintk=serial,0x3f8,115200 earlyprintk can take and I/O port, so we need to handle this case in the setup code too, otherwise 0x3f8 will be treated as a baud rate. Signed-off-by: Yinghai Lu LKML-Reference: <4C7B05A6.4010801@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/boot/early_serial_console.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 407a8e26f0a..5df2869c874 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -58,7 +58,19 @@ static void parse_earlyprintk(void) if (arg[pos] == ',') pos++; - if (!strncmp(arg + pos, "ttyS", 4)) { + /* + * make sure we have + * "serial,0x3f8,115200" + * "serial,ttyS0,115200" + * "ttyS0,115200" + */ + if (pos == 7 && !strncmp(arg + pos, "0x", 2)) { + port = simple_strtoull(arg + pos, &e, 16); + if (port == 0 || arg + pos == e) + port = DEFAULT_SERIAL_PORT; + else + pos = e - arg; + } else if (!strncmp(arg + pos, "ttyS", 4)) { static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; -- cgit v1.2.3-70-g09d2 From 9ecd4e1689208afe9b059a5ce1333acb2f42c4d2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 22 Sep 2010 17:07:27 -0700 Subject: tracing/x86: Don't use mcount in pvclock.c When using a paravirt clock, pvclock.c can be used by sched_clock(), which in turn is used by the tracing mechanism for timestamps, which leads to infinite recursion. Disable mcount/tracing for pvclock.o. Cc: stable@kernel.org Signed-off-by: Jeremy Fitzhardinge LKML-Reference: <4C9A9A3F.4040201@goop.org> Signed-off-by: Steven Rostedt --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676266b..882bbff9d50 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_pvclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif -- cgit v1.2.3-70-g09d2 From 258af47479980d8238a04568b94a4e55aa1cb537 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 22 Sep 2010 22:22:25 -0400 Subject: tracing/x86: Don't use mcount in kvmclock.c The guest can use the paravirt clock in kvmclock.c which is used by sched_clock(), which in turn is used by the tracing mechanism for timestamps, which leads to infinite recursion. Disable mcount/tracing for kvmclock.o. Cc: stable@kernel.org Cc: Jeremy Fitzhardinge Cc: Avi Kivity Signed-off-by: Steven Rostedt --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 882bbff9d50..fedf32a8c3e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg +CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif -- cgit v1.2.3-70-g09d2 From e9bf51971157e367aabfc111a8219db010f69cd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 20 Sep 2010 14:33:07 +0200 Subject: x86/amd-iommu: Set iommu configuration flags in enable-loop This patch moves the setting of the configuration and feature flags out out the acpi table parsing path and moves it into the iommu-enable path. This is needed to reliably fix resume-from-s3. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 3 +++ arch/x86/kernel/amd_iommu_init.c | 49 +++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 7014e88bc77..ef2d5cd7d7e 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -368,6 +368,9 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* flags read from acpi table */ + u8 acpi_flags; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd..85e9817ead4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -649,29 +649,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_entry *e; /* - * First set the recommended feature enable bits from ACPI - * into the IOMMU control registers + * First save the recommended feature enable bits from ACPI */ - h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - h->flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - h->flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent - */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + iommu->acpi_flags = h->flags; /* * Done. Now parse the device entries @@ -1116,6 +1096,30 @@ static void init_device_table(void) } } +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1126,6 +1130,7 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); -- cgit v1.2.3-70-g09d2 From 4c894f47bb49284008073d351c0ddaac8860864e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 23 Sep 2010 15:15:19 +0200 Subject: x86/amd-iommu: Work around S3 BIOS bug This patch adds a workaround for an IOMMU BIOS problem to the AMD IOMMU driver. The result of the bug is that the IOMMU does not execute commands anymore when the system comes out of the S3 state resulting in system failure. The bug in the BIOS is that is does not restore certain hardware specific registers correctly. This workaround reads out the contents of these registers at boot time and restores them on resume from S3. The workaround is limited to the specific IOMMU chipset where this problem occurs. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 6 ++++++ arch/x86/include/asm/amd_iommu_types.h | 9 +++++++++ arch/x86/kernel/amd_iommu_init.c | 18 ++++++++++++++++++ include/linux/pci_ids.h | 3 +++ 4 files changed, 36 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index d2544f1d705..cb030374b90 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { } #endif /* !CONFIG_AMD_IOMMU_STATS */ +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ef2d5cd7d7e..08616180dea 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -414,6 +414,15 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + + /* + * This array is required to work around a potential BIOS bug. + * The BIOS may miss to restore parts of the PCI configuration + * space when the system resumes from S3. The result is that the + * IOMMU does not execute commands anymore which leads to system + * failure. + */ + u32 cache_cfg[4]; }; /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 85e9817ead4..5a170cbbbed 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (is_rd890_iommu(iommu->dev)) { + pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); + pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); + pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); + pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); + } } /* @@ -1120,6 +1127,16 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); } +static void iommu_apply_quirks(struct amd_iommu *iommu) +{ + if (is_rd890_iommu(iommu->dev)) { + pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); + pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); + pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); + pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1130,6 +1147,7 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_apply_quirks(iommu); iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d33309e9a..570fddeb038 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -393,6 +393,9 @@ #define PCI_DEVICE_ID_VLSI_82C147 0x0105 #define PCI_DEVICE_ID_VLSI_VAS96011 0x0702 +/* AMD RD890 Chipset */ +#define PCI_DEVICE_ID_RD890_IOMMU 0x5a23 + #define PCI_VENDOR_ID_ADL 0x1005 #define PCI_DEVICE_ID_ADL_2301 0x2301 -- cgit v1.2.3-70-g09d2 From 04e0463e088b41060c08c255eb0d3278a504f094 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 23 Sep 2010 16:12:48 +0200 Subject: x86/amd-iommu: Fix rounding-bug in __unmap_single In the __unmap_single function the dma_addr is rounded down to a page boundary before the dma pages are unmapped. The address is later also used to flush the TLB entries for that mapping. But without the offset into the dma page the amount of pages to flush might be miscalculated in the TLB flushing path. This patch fixes this bug by using the original address to flush the TLB. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1e30a..679b6450382 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, size_t size, int dir) { + dma_addr_t flush_addr; dma_addr_t i, start; unsigned int pages; @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, (dma_addr + size > dma_dom->aperture_size)) return; + flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } -- cgit v1.2.3-70-g09d2 From 63e6be6d98e1a2bcdca86872b67052e51ab6afa1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 15 Sep 2010 18:20:34 +0200 Subject: perf, x86: Catch spurious interrupts after disabling counters Some cpus still deliver spurious interrupts after disabling a counter. This caused 'undelivered NMI' messages. This patch fixes this. Introduced by: 4177c42: perf, x86: Try to handle unknown nmis with an enabled PMU Reported-by: Ingo Molnar Signed-off-by: Robert Richter Cc: Don Zickus Cc: gorcunov@gmail.com Cc: fweisbec@gmail.com Cc: ying.huang@intel.com Cc: ming.m.lin@intel.com Cc: yinghai@kernel.org Cc: andi@firstfloor.org Cc: eranian@google.com Cc: Peter Zijlstra LKML-Reference: <20100915162034.GO13563@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3efdf2870a3..03a5b0385ad 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,6 +102,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; @@ -1010,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event) x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); + __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); @@ -1141,8 +1143,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* + * Though we deactivated the counter some cpus + * might still deliver spurious interrupts still + * in flight. Catch them: + */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; -- cgit v1.2.3-70-g09d2 From a46590533ad7b0f3f640732081d7e1658145c0ba Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Sep 2010 22:21:34 -0700 Subject: x86/hwmon: fix initialization of coretemp Using cpuid_eax() to determine feature availability on other than the current CPU is invalid. And feature availability should also be checked in the hotplug code path. Signed-off-by: Jan Beulich Cc: Rudolf Marek Cc: Fenghua Yu Signed-off-by: Guenter Roeck --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + drivers/hwmon/coretemp.c | 29 +++++++++++++---------------- 3 files changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c6fbb7b430d..3f76523589a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,6 +168,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad6f0b..d4907951512 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { + { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 6b3ef553bd1..5850da64ae2 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -423,9 +423,18 @@ static int __cpuinit coretemp_device_add(unsigned int cpu) int err; struct platform_device *pdev; struct pdev_entry *pdev_entry; -#ifdef CONFIG_SMP struct cpuinfo_x86 *c = &cpu_data(cpu); -#endif + + /* + * CPUID.06H.EAX[0] indicates whether the CPU has thermal + * sensors. We check this bit only, all the early CPUs + * without thermal sensors will be filtered out. + */ + if (!cpu_has(c, X86_FEATURE_DTS)) { + printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" + " has no thermal sensor.\n", c->x86_model); + return 0; + } mutex_lock(&pdev_list_mutex); @@ -527,20 +536,8 @@ static int __init coretemp_init(void) if (err) goto exit; - for_each_online_cpu(i) { - struct cpuinfo_x86 *c = &cpu_data(i); - /* - * CPUID.06H.EAX[0] indicates whether the CPU has thermal - * sensors. We check this bit only, all the early CPUs - * without thermal sensors will be filtered out. - */ - if (c->cpuid_level >= 6 && (cpuid_eax(0x06) & 0x01)) - coretemp_device_add(i); - else { - printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" - " has no thermal sensor.\n", c->x86_model); - } - } + for_each_online_cpu(i) + coretemp_device_add(i); #ifndef CONFIG_HOTPLUG_CPU if (list_empty(&pdev_list)) { -- cgit v1.2.3-70-g09d2 From c9e2fbd909c20b165b2b9ffb59f8b674cf0a55b0 Mon Sep 17 00:00:00 2001 From: Alexander Chumachenko Date: Thu, 1 Apr 2010 15:34:52 +0300 Subject: x86: Avoid 'constant_test_bit()' misoptimization due to cast to non-volatile While debugging bit_spin_lock() hang, it was tracked down to gcc-4.4 misoptimization of non-inlined constant_test_bit() due to non-volatile addr when 'const volatile unsigned long *addr' cast to 'unsigned long *' with subsequent unconditional jump to pause (and not to the test) leading to hang. Compiling with gcc-4.3 or disabling CONFIG_OPTIMIZE_INLINING yields inlined constant_test_bit() and correct jump, thus working around the kernel bug. Other arches than asm-x86 may implement this slightly differently; 2.6.29 mitigates the misoptimization by changing the function prototype (commit c4295fbb6048d85f0b41c5ced5cbf63f6811c46c) but probably fixing the issue itself is better. Signed-off-by: Alexander Chumachenko Signed-off-by: Michael Shigorin Acked-by: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 545776efeb1..bafd80defa4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -309,7 +309,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; + (addr[nr / BITS_PER_LONG])) != 0; } static inline int variable_test_bit(int nr, volatile const unsigned long *addr) -- cgit v1.2.3-70-g09d2