From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jan 2010 16:11:06 -0500 Subject: x86, ACPI: delete acpi_boot_table_init() return value cleanup only. setup_arch(), doesn't care care if ACPI initialization succeeded or failed, so delete acpi_boot_table_init()'s return value. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb1035cd9a6..036d28adf59 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * if acpi_blacklisted() acpi_disabled = 1; * acpi_irq_model=... * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure */ -int __init acpi_boot_table_init(void) +void __init acpi_boot_table_init(void) { - int error; - dmi_check_system(acpi_dmi_table); /* @@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return; /* * Initialize the ACPI boot-time table parser. */ - error = acpi_table_init(); - if (error) { + if (acpi_table_init()) { disable_acpi(); - return error; + return; } acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); @@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void) /* * blacklist may disable ACPI entirely */ - error = acpi_blacklisted(); - if (error) { + if (acpi_blacklisted()) { if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); disable_acpi(); - return error; + return; } } - - return 0; } int __init early_acpi_boot_init(void) -- cgit v1.2.3-70-g09d2 From c2c5d45d46c8c0fd34291dec958670ad4816796f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 03:52:25 +0100 Subject: perf: Stop stack frame walking off kernel addresses boundaries While processing kernel perf callchains, an bad entry can be considered as a valid stack pointer but not as a kernel address. In this case, we hang in an endless loop. This can happen in an x86-32 kernel after processing the last entry in a kernel stacktrace. Just stop the stack frame walking after we encounter an invalid kernel address. This fixes a hard lockup in x86-32. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262227945-27014-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c56bc287303..6d817554780 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -123,13 +123,15 @@ print_context_stack_bp(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; - if (__kernel_text_address(addr)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } + if (!__kernel_text_address(addr)) + break; + + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } + return (unsigned long)frame; } EXPORT_SYMBOL_GPL(print_context_stack_bp); -- cgit v1.2.3-70-g09d2 From df39a2e48f99e2d706e8fa4dc99fd148eb59449d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 4 Jan 2010 16:17:21 +0000 Subject: x86: mce.h: Fix warning in header checks Someone isn't reading their build output: Move the definition out of the exported header. Signed-off-by: Alan Cox Cc: linux-kernel@vger.kernelorg Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 858baa061cf..6c3fdd631ed 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,10 +108,11 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) -extern struct atomic_notifier_head x86_mce_decoder_chain; #ifdef __KERNEL__ +extern struct atomic_notifier_head x86_mce_decoder_chain; + #include #include #include -- cgit v1.2.3-70-g09d2 From fcfbb2b5facd65efa7284cc315225bfe3d1856c2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 8 Jan 2010 12:13:54 -0800 Subject: x86: SGI UV: Fix mapping of MMIO registers This fixes the problem of the initialization code not correctly mapping the entire MMIO space on a UV system. A side effect is the map_high() interface needed to be changed to accommodate different address and size shifts. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: Cc: Jack Steiner Cc: Linus Torvalds LKML-Reference: <4B479202.7080705@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 5f92494dab6..b8bb869a661 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -374,13 +374,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int shift, - int max_pnode, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int pshift, + int bshift, int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; - paddr = base << shift; - bytes = (1UL << shift) * (max_pnode + 1); + paddr = base << pshift; + bytes = (1UL << bshift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) @@ -396,7 +396,7 @@ static __init void map_gru_high(int max_pnode) gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, max_pnode, map_wb); + map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)gru.s.base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); @@ -410,7 +410,7 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); + map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); } static __init void map_mmioh_high(int max_pnode) @@ -420,7 +420,8 @@ static __init void map_mmioh_high(int max_pnode) mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); + map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + max_pnode, map_uc); } static __init void map_low_mmrs(void) -- cgit v1.2.3-70-g09d2 From 42590a75019a50012f25a962246498dead428433 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 4 Jan 2010 16:16:23 +0900 Subject: x86/agp: Fix agp_amd64_init and agp_amd64_cleanup This fixes the regression introduced by the commit f405d2c02395a74d3883bd03ded36457aa3697ad. The above commit fixes the following issue: http://marc.info/?l=linux-kernel&m=126192729110083&w=2 However, it doesn't work properly when you remove and insert the agp_amd64 module again. agp_amd64_init() and agp_amd64_cleanup should be called only when gart_iommu is not called earlier (that is, the GART IOMMU is not enabled). We need to use 'gart_iommu_aperture' to see if GART IOMMU is enabled or not. Signed-off-by: FUJITA Tomonori Cc: mitov@issp.bas.bg Cc: davej@redhat.com LKML-Reference: <20100104161603L.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 1 + drivers/char/agp/amd64-agp.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b2..f147a95fd84 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -31,6 +31,7 @@ #include int gart_iommu_aperture; +EXPORT_SYMBOL_GPL(gart_iommu_aperture); int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 5aa7a586a7f..1afb8968a34 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -725,12 +725,11 @@ static struct pci_driver agp_amd64_pci_driver = { int __init agp_amd64_init(void) { int err = 0; - static int done = 0; if (agp_off) return -EINVAL; - if (done++) + if (gart_iommu_aperture) return agp_bridges_found ? 0 : -ENODEV; err = pci_register_driver(&agp_amd64_pci_driver); @@ -771,6 +770,8 @@ int __init agp_amd64_init(void) static void __exit agp_amd64_cleanup(void) { + if (gart_iommu_aperture) + return; if (aperture_resource) release_resource(aperture_resource); pci_unregister_driver(&agp_amd64_pci_driver); -- cgit v1.2.3-70-g09d2 From 864a0922dd128392467611d9857e5138c6a91999 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 13 Jan 2010 10:16:07 +0000 Subject: x86: kernel_thread() -- initialize SS to a known state Before the kernel_thread was converted into "C" we had pt_regs::ss set to __KERNEL_DS (by SAVE_ALL asm macro). Though I must admit I didn't find any *explicit* load of %ss from this structure the better to be on a safe side and set it to a known value. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Brian Gerst LKML-Reference: <1263377768-19600-1-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c6ee241c8a9..02c3ee013cc 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -288,6 +288,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; regs.gs = __KERNEL_STACK_CANARY; +#else + regs.ss = __KERNEL_DS; #endif regs.orig_ax = -1; -- cgit v1.2.3-70-g09d2 From e68266b7001a4e29af083716f0c36c0d6dbb1b39 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 13 Jan 2010 10:16:08 +0000 Subject: x86: xen: 64-bit kernel RPL should be 0 Under Xen 64 bit guests actually run their kernel in ring 3, however the hypervisor takes care of squashing descriptor the RPLs transparently (in order to allow them to continue to differentiate between user and kernel space CS using the RPL). Therefore the Xen paravirt backend should use RPL==0 instead of 1 (or 3). Using RPL==1 causes generic arch code to take incorrect code paths because it uses "testl $3, , je foo" type tests for a userspace CS and this considers 1==userspace. This issue was previously masked because get_kernel_rpl() was omitted when setting CS in kernel_thread(). This was fixed when kernel_thread() was unified with 32 bit in f443ff4201dd25cd4dec183f9919ecba90c8edc2. Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Cyrill Gorcunov Cc: Brian Gerst LKML-Reference: <1263377768-19600-2-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2b26dd5930c..36daccb6864 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1151,9 +1151,13 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ +#ifdef CONFIG_X86_32 pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; +#else + pv_info.kernel_rpl = 0; +#endif /* set the limit of our address space */ xen_reserve_top(); -- cgit v1.2.3-70-g09d2 From 7a1110e861b2666ac09f5708d6fbe71d18ce64bb Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 12 Jan 2010 15:09:04 -0600 Subject: x86, uv: Add function retrieving node controller revision number Add function for determining the revision id of the SGI UV node controller chip (HUB). This function is needed in a subsequent patch. Signed-off-by: Jack Steiner LKML-Reference: <20100112210904.GA24546@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_hub.h | 12 ++++++++++++ arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bc54fa965af..40be813fefb 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -495,5 +495,17 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } +/* + * Get the minimum revision number of the hub chips within the partition. + * 1 - initial rev 1.0 silicon + * 2 - rev 2.0 production silicon + */ +static inline int uv_get_min_hub_revision_id(void) +{ + extern int uv_min_hub_revision_id; + + return uv_min_hub_revision_id; +} + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_UV_UV_HUB_H */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b8bb869a661..0e48de9ff86 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -36,6 +36,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +int uv_min_hub_revision_id; +EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); static inline bool is_GRU_range(u64 start, u64 end) { @@ -55,6 +57,10 @@ static int early_get_nodeid(void) mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); node_id.v = *mmr; early_iounmap(mmr, sizeof(*mmr)); + + /* Currently, all blades have same revision number */ + uv_min_hub_revision_id = node_id.s.revision; + return node_id.s.node_id; } -- cgit v1.2.3-70-g09d2 From 1d2c867c941d635e53e8ad7bf37d060bb5b25ec5 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 15 Jan 2010 12:09:09 -0600 Subject: x86, uv: Ensure hub revision set for all ACPI modes. Ensure that UV hub revision is set for all ACPI modes. Signed-off-by: Russ Anderson LKML-Reference: <20100115180908.GB7757@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0e48de9ff86..21db3cbea7d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -66,7 +66,10 @@ static int early_get_nodeid(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { + int nodeid; + if (!strcmp(oem_id, "SGI")) { + nodeid = early_get_nodeid(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; if (!strcmp(oem_table_id, "UVL")) uv_system_type = UV_LEGACY_APIC; @@ -74,7 +77,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); + nodeid << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3-70-g09d2 From 0bb7a95f5455cd87e6a69e5818bc1f509a98d187 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 16 Jan 2010 10:39:30 +0100 Subject: hw-breakpoints, perf: Fix broken mmiotrace due to dr6 by reference change Commit 62edab9056a6cf0c9207339c8892c923a5217e45 (from June 2009 but merged in 2.6.33) changes notify_die to pass dr6 by reference. However, it forgets to fix the check for DR_STEP in kmmio.c, breaking mmiotrace. It also passes a wrong value to the post handler. This simple fix makes mmiotrace work again. Signed-off-by: Luca Barbieri Acked-by: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <1263634770-14578-1-git-send-email-luca@luca-barbieri.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c0f6198565e..536fb682336 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -538,14 +538,15 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; + unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); - if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) { + if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) + if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { /* * Reset the BS bit in dr6 (pointed by args->err) to * denote completion of processing */ - (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; + *dr6_p &= ~DR_STEP; return NOTIFY_STOP; } -- cgit v1.2.3-70-g09d2 From dfea91d5a7c795fd6f4e1a97489a98e4e767463e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:48 -0800 Subject: x86, apic: use physical mode for IBM summit platforms Chris McDermott from IBM confirmed that hurricane chipset in IBM summit platforms doesn't support logical flat mode. Irrespective of the other things like apic_id's, total number of logical cpu's, Linux kernel should default to physical mode for this system. The 32-bit kernel does so using the OEM checks for the IBM summit platform. Add a similar OEM platform check for the 64bit kernel too. Otherwise the linux kernel boot can hang on this platform under certain bios/platform settings. Signed-off-by: Suresh Siddha Tested-by: Ananth N Mavinakayanahalli Cc: Chris McDermott Cc: Yinghai Lu Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic_flat_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index eacbd2b31d2..e3c3d820c32 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; } + + if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { + printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); + return 1; + } #endif return 0; -- cgit v1.2.3-70-g09d2 From bb668da6d6f2bec8a63838c098d9515eccb22cc4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:49 -0800 Subject: x86, apic: use logical flat for systems with <= 8 logical cpus We can use logical flat mode if there are <= 8 logical cpu's (irrespective of physical apic id values). This will enable simplified and efficient IPI and device interrupt routing on such platforms. This has been tested to work on both Intel and AMD platforms. Exceptions like IBM summit platform which can't use logical flat mode are addressed by using OEM platform checks. Signed-off-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Ananth N Mavinakayanahalli Cc: Chris McDermott Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 15 +-------------- arch/x86/kernel/apic/probe_64.c | 8 +++----- 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e80f291472a..3987e4408f7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. - * - * This determines the messaging protocol we can use: if all APIC IDs - * are in the 0 ... 7 range, then we can use logical addressing which - * has some performance advantages (better broadcasting). - * - * If there's an APIC ID above 8, we use physical addressing. */ unsigned int max_physical_apicid; @@ -1898,14 +1892,7 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { + if (num_processors > 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 65edc180fc8..450fe2064a1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,15 +64,13 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif - if (apic == &apic_flat) { - if (max_physical_apicid >= 8) + if (apic == &apic_flat && num_processors > 8) apic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - } + + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); if (is_vsmp_box()) { /* need to update phys_pkg_id */ -- cgit v1.2.3-70-g09d2 From b27d515a49169e5e2a92d621faac761074a8c5b1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Jan 2010 10:58:01 +0200 Subject: perf: x86: Add support for the ANY bit Propagate the ANY bit into the fixed counter config for v3 and higher. Signed-off-by: Stephane Eranian [a.p.zijlstra@chello.nl: split from larger patch] Signed-off-by: Peter Zijlstra LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a87..1380367dabd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -19,6 +19,7 @@ #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d616c06e99b..8c1c07073cc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1343,6 +1343,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) bits |= 0x1; + + /* + * ANY bit is supported in v3 and up + */ + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) + bits |= 0x4; + bits <<= (idx * 4); mask = 0xfULL << (idx * 4); -- cgit v1.2.3-70-g09d2 From d91afd15b041f27d34859c79afa9e172018a86f4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:40:20 +0100 Subject: x86/amd-iommu: Fix possible integer overflow The variable i in this function could be increased to over 2**32 which would result in an integer overflow when using int. Fix it by changing i to unsigned long. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 23824fef789..c2ccbd7b862 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -980,7 +980,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; struct amd_iommu *iommu; - int i; + unsigned long i; #ifdef CONFIG_IOMMU_STRESS populate = false; -- cgit v1.2.3-70-g09d2 From 2ca762790caf822f7b61430fbaffa3ae4219977f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:45:31 +0100 Subject: x86/amd-iommu: Fix NULL pointer dereference in __detach_device() In the __detach_device function the reference count for a device-domain binding may become zero. This results in the device being removed from the domain and dev_data->domain will be NULL. This is bad because this pointer is dereferenced when trying to unlock the domain->lock. This patch fixes the issue by keeping the domain in a seperate variable. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c2ccbd7b862..4478a48198a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1489,11 +1489,14 @@ static void __detach_device(struct device *dev) { struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; + struct protection_domain *domain; unsigned long flags; BUG_ON(!dev_data->domain); - spin_lock_irqsave(&dev_data->domain->lock, flags); + domain = dev_data->domain; + + spin_lock_irqsave(&domain->lock, flags); if (dev_data->alias != dev) { alias_data = get_dev_data(dev_data->alias); @@ -1504,7 +1507,7 @@ static void __detach_device(struct device *dev) if (atomic_dec_and_test(&dev_data->bind)) do_detach(dev); - spin_unlock_irqrestore(&dev_data->domain->lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the -- cgit v1.2.3-70-g09d2 From f5325094379158e6b876ea0010c807bf7890ec8f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:44:35 +0100 Subject: x86/amd-iommu: Fix IOMMU-API initialization for iommu=pt This patch moves the initialization of the iommu-api out of the dma-ops initialization code. This ensures that the iommu-api is initialized even with iommu=pt. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 1 + arch/x86/kernel/amd_iommu.c | 8 ++++++-- arch/x86/kernel/amd_iommu_init.c | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 4d817f9e6e7..d2544f1d705 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -31,6 +31,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); +extern void amd_iommu_init_api(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 4478a48198a..751ce73c6e1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2221,6 +2221,12 @@ static struct dma_map_ops amd_iommu_dma_ops = { /* * The function which clues the AMD IOMMU driver into dma_ops. */ + +void __init amd_iommu_init_api(void) +{ + register_iommu(&amd_iommu_ops); +} + int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; @@ -2256,8 +2262,6 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; - register_iommu(&amd_iommu_ops); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fb490ce7dd5..9dc91b43147 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1292,9 +1292,12 @@ static int __init amd_iommu_init(void) ret = amd_iommu_init_passthrough(); else ret = amd_iommu_init_dma_ops(); + if (ret) goto free; + amd_iommu_init_api(); + amd_iommu_init_notifier(); enable_iommus(); -- cgit v1.2.3-70-g09d2 From d3ad9373b7c29b63d5e8460a69453718d200cc3b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:55:27 +0100 Subject: x86/amd-iommu: Fix deassignment of a device from the pt_domain Deassigning a device from the passthrough domain does not work and breaks device assignment to kvm guests. This patch fixes the issue. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 751ce73c6e1..adb0ba02570 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1511,9 +1511,11 @@ static void __detach_device(struct device *dev) /* * If we run in passthrough mode the device must be assigned to the - * passthrough domain if it is detached from any other domain + * passthrough domain if it is detached from any other domain. + * Make sure we can deassign from the pt_domain itself. */ - if (iommu_pass_through && dev_data->domain == NULL) + if (iommu_pass_through && + (dev_data->domain == NULL && domain != pt_domain)) __attach_device(dev, pt_domain); } -- cgit v1.2.3-70-g09d2 From 3a5fc0e40cb467e692737bc798bc99773c81e1e2 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 20 Jan 2010 12:10:47 -0800 Subject: x86: Set hotpluggable nodes in nodes_possible_map nodes_possible_map does not currently include nodes that have SRAT entries that are all ACPI_SRAT_MEM_HOT_PLUGGABLE since the bit is cleared in nodes_parsed if it does not have an online address range. Unequivocally setting the bit in nodes_parsed is insufficient since existing code, such as acpi_get_nodes(), assumes all nodes in the map have online address ranges. In fact, all code using nodes_parsed assumes such nodes represent an address range of online memory. nodes_possible_map is created by unioning nodes_parsed and cpu_nodes_parsed; the former represents nodes with online memory and the latter represents memoryless nodes. We now set the bit for hotpluggable nodes in cpu_nodes_parsed so that it also gets set in nodes_possible_map. [ hpa: Haicheng Li points out that this makes the naming of the variable cpu_nodes_parsed somewhat counterintuitive. However, leave it as is in the interest of keeping the pure bug fix patch small. ] Signed-off-by: David Rientjes Tested-by: Haicheng Li LKML-Reference: Cc: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a27124185fc..28c68762648 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - if (changed) + if (changed) { + node_set(node, cpu_nodes_parsed); printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); + } } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -- cgit v1.2.3-70-g09d2 From 73472a46b5b28116b145fb5fc05242c1aa8e1461 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 21 Jan 2010 11:09:52 -0800 Subject: x86: Disable HPET MSI on ATI SB700/SB800 HPET MSI on platforms with ATI SB700/SB800 as they seem to have some side-effects on floppy DMA. Do not use HPET MSI on such platforms. Original problem report from Mark Hounschell http://lkml.indiana.edu/hypermail/linux/kernel/0912.2/01118.html [ This patch needs to go to stable as well. But, there are some conflicts that prevents the patch from going as is. I can rebase/resubmit to stable once the patch goes upstream. hpa: still Cc:'ing stable@ as an FYI. ] Tested-by: Mark Hounschell Signed-off-by: Venkatesh Pallipadi Cc: LKML-Reference: <20100121190952.GA32523@linux-os.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hpet.h | 1 + arch/x86/kernel/hpet.c | 8 ++++++++ arch/x86/kernel/quirks.c | 13 +++++++++++++ 3 files changed, 22 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5d89fd2a369..1d5c08a1bdf 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -67,6 +67,7 @@ extern unsigned long hpet_address; extern unsigned long force_hpet_address; extern u8 hpet_blockid; extern int hpet_force_user; +extern u8 hpet_msi_disable; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba6e6588460..ad80a1c718c 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -34,6 +34,8 @@ */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ +u8 hpet_msi_disable; + #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; #endif @@ -596,6 +598,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + if (hpet_msi_disable) + return; + if (boot_cpu_has(X86_FEATURE_ARAT)) return; id = hpet_readl(HPET_ID); @@ -928,6 +933,9 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); hpet_print_config(); + if (hpet_msi_disable) + return 0; + if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 18093d7498f..12e9feaa2f7 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -491,6 +491,19 @@ void force_hpet_resume(void) break; } } + +/* + * HPET MSI on some boards (ATI SB700/SB800) has side effect on + * floppy DMA. Disable HPET MSI on such platforms. + */ +static void force_disable_hpet_msi(struct pci_dev *unused) +{ + hpet_msi_disable = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, + force_disable_hpet_msi); + #endif #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) -- cgit v1.2.3-70-g09d2 From 3b2e3d85aeb80769fb96c15ee4f6e14135328471 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 22 Jan 2010 21:34:56 +0100 Subject: Revert "x86: ucode-amd: Load ucode-patches once ..." Commit d1c84f79a6ba992dc01e312c44a21496303874d6 leads to a regression when microcode_amd.c is compiled into the kernel. It causes a big boot delay because the firmware is not available. See http://marc.info/?l=linux-kernel&m=126267290920060 It also renders the reload sysfs attribute useless. Fixing this is too intrusive for an -rc5 kernel. Thus I'd like to restore the microcode loading behaviour of kernel 2.6.32. CC: Gene Heskett Signed-off-by: Andreas Herrmann LKML-Reference: <20100122203456.GB13792@alberich.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 2 -- arch/x86/kernel/microcode_amd.c | 44 ++++++++++++---------------------------- arch/x86/kernel/microcode_core.c | 6 ------ 3 files changed, 13 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c24ca9a5645..ef51b501e22 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -12,8 +12,6 @@ struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; struct microcode_ops { - void (*init)(struct device *device); - void (*fini)(void); enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 37542b67c57..e1af7c055c7 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -36,9 +36,6 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 -const struct firmware *firmware; -static int supported_cpu; - struct equiv_cpu_entry { u32 installed_cpu; u32 fixed_errata_mask; @@ -77,12 +74,15 @@ static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { + struct cpuinfo_x86 *c = &cpu_data(cpu); u32 dummy; - if (!supported_cpu) - return -1; - memset(csig, 0, sizeof(*csig)); + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " + "supported\n", cpu, c->x86); + return -1; + } rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; @@ -294,10 +294,14 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) static enum ucode_state request_microcode_fw(int cpu, struct device *device) { + const char *fw_name = "amd-ucode/microcode_amd.bin"; + const struct firmware *firmware; enum ucode_state ret; - if (firmware == NULL) + if (request_firmware(&firmware, fw_name, device)) { + printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); return UCODE_NFOUND; + } if (*(u32 *)firmware->data != UCODE_MAGIC) { pr_err("invalid UCODE_MAGIC (0x%08x)\n", @@ -307,6 +311,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) ret = generic_load_microcode(cpu, firmware->data, firmware->size); + release_firmware(firmware); + return ret; } @@ -325,31 +331,7 @@ static void microcode_fini_cpu_amd(int cpu) uci->mc = NULL; } -void init_microcode_amd(struct device *device) -{ - const char *fw_name = "amd-ucode/microcode_amd.bin"; - struct cpuinfo_x86 *c = &boot_cpu_data; - - WARN_ON(c->x86_vendor != X86_VENDOR_AMD); - - if (c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); - return; - } - supported_cpu = 1; - - if (request_firmware(&firmware, fw_name, device)) - pr_err("failed to load file %s\n", fw_name); -} - -void fini_microcode_amd(void) -{ - release_firmware(firmware); -} - static struct microcode_ops microcode_amd_ops = { - .init = init_microcode_amd, - .fini = fini_microcode_amd, .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 0c863243309..cceb5bc3c3c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -521,9 +521,6 @@ static int __init microcode_init(void) return PTR_ERR(microcode_pdev); } - if (microcode_ops->init) - microcode_ops->init(µcode_pdev->dev); - get_online_cpus(); mutex_lock(µcode_mutex); @@ -566,9 +563,6 @@ static void __exit microcode_exit(void) platform_device_unregister(microcode_pdev); - if (microcode_ops->fini) - microcode_ops->fini(); - microcode_ops = NULL; pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -- cgit v1.2.3-70-g09d2 From b160091802d4a76dd063facb09fcf10bf5d5d747 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 23 Jan 2010 18:27:47 -0800 Subject: x86: Remove "x86 CPU features in debugfs" (CONFIG_X86_CPU_DEBUG) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_X86_CPU_DEBUG, which provides some parsed versions of the x86 CPU configuration via debugfs, has caused boot failures on real hardware. The value of this feature has been marginal at best, as all this information is already available to userspace via generic interfaces. Causes crashes that have not been fixed + minimal utility -> remove. See the referenced LKML thread for more information. Reported-by: Ozan Çağlayan Signed-off-by: H. Peter Anvin LKML-Reference: Cc: Jaswinder Singh Rajput Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Yinghai Lu Cc: --- arch/x86/Kconfig | 6 - arch/x86/include/asm/cpu_debug.h | 127 -------- arch/x86/kernel/cpu/Makefile | 2 - arch/x86/kernel/cpu/cpu_debug.c | 688 --------------------------------------- 4 files changed, 823 deletions(-) delete mode 100644 arch/x86/include/asm/cpu_debug.h delete mode 100644 arch/x86/kernel/cpu/cpu_debug.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cbcbfdee3ee..eb4092568f9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -989,12 +989,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config X86_CPU_DEBUG - tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" - ---help--- - If you select this option, this will provide various x86 CPUs - information through debugfs. - choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h deleted file mode 100644 index d96c1ee3a95..00000000000 --- a/arch/x86/include/asm/cpu_debug.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef _ASM_X86_CPU_DEBUG_H -#define _ASM_X86_CPU_DEBUG_H - -/* - * CPU x86 architecture debug - * - * Copyright(C) 2009 Jaswinder Singh Rajput - */ - -/* Register flags */ -enum cpu_debug_bit { -/* Model Specific Registers (MSRs) */ - CPU_MC_BIT, /* Machine Check */ - CPU_MONITOR_BIT, /* Monitor */ - CPU_TIME_BIT, /* Time */ - CPU_PMC_BIT, /* Performance Monitor */ - CPU_PLATFORM_BIT, /* Platform */ - CPU_APIC_BIT, /* APIC */ - CPU_POWERON_BIT, /* Power-on */ - CPU_CONTROL_BIT, /* Control */ - CPU_FEATURES_BIT, /* Features control */ - CPU_LBRANCH_BIT, /* Last Branch */ - CPU_BIOS_BIT, /* BIOS */ - CPU_FREQ_BIT, /* Frequency */ - CPU_MTTR_BIT, /* MTRR */ - CPU_PERF_BIT, /* Performance */ - CPU_CACHE_BIT, /* Cache */ - CPU_SYSENTER_BIT, /* Sysenter */ - CPU_THERM_BIT, /* Thermal */ - CPU_MISC_BIT, /* Miscellaneous */ - CPU_DEBUG_BIT, /* Debug */ - CPU_PAT_BIT, /* PAT */ - CPU_VMX_BIT, /* VMX */ - CPU_CALL_BIT, /* System Call */ - CPU_BASE_BIT, /* BASE Address */ - CPU_VER_BIT, /* Version ID */ - CPU_CONF_BIT, /* Configuration */ - CPU_SMM_BIT, /* System mgmt mode */ - CPU_SVM_BIT, /*Secure Virtual Machine*/ - CPU_OSVM_BIT, /* OS-Visible Workaround*/ -/* Standard Registers */ - CPU_TSS_BIT, /* Task Stack Segment */ - CPU_CR_BIT, /* Control Registers */ - CPU_DT_BIT, /* Descriptor Table */ -/* End of Registers flags */ - CPU_REG_ALL_BIT, /* Select all Registers */ -}; - -#define CPU_REG_ALL (~0) /* Select all Registers */ - -#define CPU_MC (1 << CPU_MC_BIT) -#define CPU_MONITOR (1 << CPU_MONITOR_BIT) -#define CPU_TIME (1 << CPU_TIME_BIT) -#define CPU_PMC (1 << CPU_PMC_BIT) -#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) -#define CPU_APIC (1 << CPU_APIC_BIT) -#define CPU_POWERON (1 << CPU_POWERON_BIT) -#define CPU_CONTROL (1 << CPU_CONTROL_BIT) -#define CPU_FEATURES (1 << CPU_FEATURES_BIT) -#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) -#define CPU_BIOS (1 << CPU_BIOS_BIT) -#define CPU_FREQ (1 << CPU_FREQ_BIT) -#define CPU_MTRR (1 << CPU_MTTR_BIT) -#define CPU_PERF (1 << CPU_PERF_BIT) -#define CPU_CACHE (1 << CPU_CACHE_BIT) -#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) -#define CPU_THERM (1 << CPU_THERM_BIT) -#define CPU_MISC (1 << CPU_MISC_BIT) -#define CPU_DEBUG (1 << CPU_DEBUG_BIT) -#define CPU_PAT (1 << CPU_PAT_BIT) -#define CPU_VMX (1 << CPU_VMX_BIT) -#define CPU_CALL (1 << CPU_CALL_BIT) -#define CPU_BASE (1 << CPU_BASE_BIT) -#define CPU_VER (1 << CPU_VER_BIT) -#define CPU_CONF (1 << CPU_CONF_BIT) -#define CPU_SMM (1 << CPU_SMM_BIT) -#define CPU_SVM (1 << CPU_SVM_BIT) -#define CPU_OSVM (1 << CPU_OSVM_BIT) -#define CPU_TSS (1 << CPU_TSS_BIT) -#define CPU_CR (1 << CPU_CR_BIT) -#define CPU_DT (1 << CPU_DT_BIT) - -/* Register file flags */ -enum cpu_file_bit { - CPU_INDEX_BIT, /* index */ - CPU_VALUE_BIT, /* value */ -}; - -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -#define MAX_CPU_FILES 512 - -struct cpu_private { - unsigned cpu; - unsigned type; - unsigned reg; - unsigned file; -}; - -struct cpu_debug_base { - char *name; /* Register name */ - unsigned flag; /* Register flag */ - unsigned write; /* Register write flag */ -}; - -/* - * Currently it looks similar to cpu_debug_base but once we add more files - * cpu_file_base will go in different direction - */ -struct cpu_file_base { - char *name; /* Register file name */ - unsigned flag; /* Register file flag */ - unsigned write; /* Register write flag */ -}; - -struct cpu_cpuX_base { - struct dentry *dentry; /* Register dentry */ - int init; /* Register index file */ -}; - -struct cpu_debug_range { - unsigned min; /* Register range min */ - unsigned max; /* Register range max */ - unsigned flag; /* Supported flags */ -}; - -#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1d2cb383410..c202b62f367 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100644 index b368cd86299..00000000000 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * CPU x86 architecture debug code - * - * Copyright(C) 2009 Jaswinder Singh Rajput - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); -static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); -static DEFINE_PER_CPU(int, cpud_priv_count); - -static DEFINE_MUTEX(cpu_debug_lock); - -static struct dentry *cpu_debugfs_dir; - -static struct cpu_debug_base cpu_base[] = { - { "mc", CPU_MC, 0 }, - { "monitor", CPU_MONITOR, 0 }, - { "time", CPU_TIME, 0 }, - { "pmc", CPU_PMC, 1 }, - { "platform", CPU_PLATFORM, 0 }, - { "apic", CPU_APIC, 0 }, - { "poweron", CPU_POWERON, 0 }, - { "control", CPU_CONTROL, 0 }, - { "features", CPU_FEATURES, 0 }, - { "lastbranch", CPU_LBRANCH, 0 }, - { "bios", CPU_BIOS, 0 }, - { "freq", CPU_FREQ, 0 }, - { "mtrr", CPU_MTRR, 0 }, - { "perf", CPU_PERF, 0 }, - { "cache", CPU_CACHE, 0 }, - { "sysenter", CPU_SYSENTER, 0 }, - { "therm", CPU_THERM, 0 }, - { "misc", CPU_MISC, 0 }, - { "debug", CPU_DEBUG, 0 }, - { "pat", CPU_PAT, 0 }, - { "vmx", CPU_VMX, 0 }, - { "call", CPU_CALL, 0 }, - { "base", CPU_BASE, 0 }, - { "ver", CPU_VER, 0 }, - { "conf", CPU_CONF, 0 }, - { "smm", CPU_SMM, 0 }, - { "svm", CPU_SVM, 0 }, - { "osvm", CPU_OSVM, 0 }, - { "tss", CPU_TSS, 0 }, - { "cr", CPU_CR, 0 }, - { "dt", CPU_DT, 0 }, - { "registers", CPU_REG_ALL, 0 }, -}; - -static struct cpu_file_base cpu_file[] = { - { "index", CPU_REG_ALL, 0 }, - { "value", CPU_REG_ALL, 1 }, -}; - -/* CPU Registers Range */ -static struct cpu_debug_range cpu_reg_range[] = { - { 0x00000000, 0x00000001, CPU_MC, }, - { 0x00000006, 0x00000007, CPU_MONITOR, }, - { 0x00000010, 0x00000010, CPU_TIME, }, - { 0x00000011, 0x00000013, CPU_PMC, }, - { 0x00000017, 0x00000017, CPU_PLATFORM, }, - { 0x0000001B, 0x0000001B, CPU_APIC, }, - { 0x0000002A, 0x0000002B, CPU_POWERON, }, - { 0x0000002C, 0x0000002C, CPU_FREQ, }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, }, - { 0x00000040, 0x00000047, CPU_LBRANCH, }, - { 0x00000060, 0x00000067, CPU_LBRANCH, }, - { 0x00000079, 0x00000079, CPU_BIOS, }, - { 0x00000088, 0x0000008A, CPU_CACHE, }, - { 0x0000008B, 0x0000008B, CPU_BIOS, }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, }, - { 0x000000C1, 0x000000C4, CPU_PMC, }, - { 0x000000CD, 0x000000CD, CPU_FREQ, }, - { 0x000000E7, 0x000000E8, CPU_PERF, }, - { 0x000000FE, 0x000000FE, CPU_MTRR, }, - - { 0x00000116, 0x0000011E, CPU_CACHE, }, - { 0x00000174, 0x00000176, CPU_SYSENTER, }, - { 0x00000179, 0x0000017B, CPU_MC, }, - { 0x00000186, 0x00000189, CPU_PMC, }, - { 0x00000198, 0x00000199, CPU_PERF, }, - { 0x0000019A, 0x0000019A, CPU_TIME, }, - { 0x0000019B, 0x0000019D, CPU_THERM, }, - { 0x000001A0, 0x000001A0, CPU_MISC, }, - { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, }, - { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, }, - { 0x00000250, 0x00000250, CPU_MTRR, }, - { 0x00000258, 0x00000259, CPU_MTRR, }, - { 0x00000268, 0x0000026F, CPU_MTRR, }, - { 0x00000277, 0x00000277, CPU_PAT, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, }, - - { 0x00000300, 0x00000311, CPU_PMC, }, - { 0x00000345, 0x00000345, CPU_PMC, }, - { 0x00000360, 0x00000371, CPU_PMC, }, - { 0x0000038D, 0x00000390, CPU_PMC, }, - { 0x000003A0, 0x000003BE, CPU_PMC, }, - { 0x000003C0, 0x000003CD, CPU_PMC, }, - { 0x000003E0, 0x000003E1, CPU_PMC, }, - { 0x000003F0, 0x000003F2, CPU_PMC, }, - - { 0x00000400, 0x00000417, CPU_MC, }, - { 0x00000480, 0x0000048B, CPU_VMX, }, - - { 0x00000600, 0x00000600, CPU_DEBUG, }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, - - { 0x000107CC, 0x000107D3, CPU_PMC, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, }, - { 0xC0000081, 0xC0000084, CPU_CALL, }, - { 0xC0000100, 0xC0000102, CPU_BASE, }, - { 0xC0000103, 0xC0000103, CPU_TIME, }, - - { 0xC0010000, 0xC0010007, CPU_PMC, }, - { 0xC0010010, 0xC0010010, CPU_CONF, }, - { 0xC0010015, 0xC0010015, CPU_CONF, }, - { 0xC0010016, 0xC001001A, CPU_MTRR, }, - { 0xC001001D, 0xC001001D, CPU_MTRR, }, - { 0xC001001F, 0xC001001F, CPU_CONF, }, - { 0xC0010030, 0xC0010035, CPU_BIOS, }, - { 0xC0010044, 0xC0010048, CPU_MC, }, - { 0xC0010050, 0xC0010056, CPU_SMM, }, - { 0xC0010058, 0xC0010058, CPU_CONF, }, - { 0xC0010060, 0xC0010060, CPU_CACHE, }, - { 0xC0010061, 0xC0010068, CPU_SMM, }, - { 0xC0010069, 0xC001006B, CPU_SMM, }, - { 0xC0010070, 0xC0010071, CPU_SMM, }, - { 0xC0010111, 0xC0010113, CPU_SMM, }, - { 0xC0010114, 0xC0010118, CPU_SVM, }, - { 0xC0010140, 0xC0010141, CPU_OSVM, }, - { 0xC0011022, 0xC0011023, CPU_CONF, }, -}; - -static int is_typeflag_valid(unsigned cpu, unsigned flag) -{ - int i; - - /* Standard Registers should be always valid */ - if (flag >= CPU_TSS) - return 1; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (cpu_reg_range[i].flag == flag) - return 1; - } - - /* Invalid */ - return 0; -} - -static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, - int index, unsigned flag) -{ - if (cpu_reg_range[index].flag == flag) { - *min = cpu_reg_range[index].min; - *max = cpu_reg_range[index].max; - } else - *max = 0; - - return *max; -} - -/* This function can also be called with seq = NULL for printk */ -static void print_cpu_data(struct seq_file *seq, unsigned type, - u32 low, u32 high) -{ - struct cpu_private *priv; - u64 val = high; - - if (seq) { - priv = seq->private; - if (priv->file) { - val = (val << 32) | low; - seq_printf(seq, "0x%llx\n", val); - } else - seq_printf(seq, " %08x: %08x_%08x\n", - type, high, low); - } else - printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); -} - -/* This function can also be called with seq = NULL for printk */ -static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) -{ - unsigned msr, msr_min, msr_max; - struct cpu_private *priv; - u32 low, high; - int i; - - if (seq) { - priv = seq->private; - if (priv->file) { - if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, - &low, &high)) - print_cpu_data(seq, priv->reg, low, high); - return; - } - } - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) - continue; - - for (msr = msr_min; msr <= msr_max; msr++) { - if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) - continue; - print_cpu_data(seq, msr, low, high); - } - } -} - -static void print_tss(void *arg) -{ - struct pt_regs *regs = task_pt_regs(current); - struct seq_file *seq = arg; - unsigned int seg; - - seq_printf(seq, " RAX\t: %016lx\n", regs->ax); - seq_printf(seq, " RBX\t: %016lx\n", regs->bx); - seq_printf(seq, " RCX\t: %016lx\n", regs->cx); - seq_printf(seq, " RDX\t: %016lx\n", regs->dx); - - seq_printf(seq, " RSI\t: %016lx\n", regs->si); - seq_printf(seq, " RDI\t: %016lx\n", regs->di); - seq_printf(seq, " RBP\t: %016lx\n", regs->bp); - seq_printf(seq, " ESP\t: %016lx\n", regs->sp); - -#ifdef CONFIG_X86_64 - seq_printf(seq, " R08\t: %016lx\n", regs->r8); - seq_printf(seq, " R09\t: %016lx\n", regs->r9); - seq_printf(seq, " R10\t: %016lx\n", regs->r10); - seq_printf(seq, " R11\t: %016lx\n", regs->r11); - seq_printf(seq, " R12\t: %016lx\n", regs->r12); - seq_printf(seq, " R13\t: %016lx\n", regs->r13); - seq_printf(seq, " R14\t: %016lx\n", regs->r14); - seq_printf(seq, " R15\t: %016lx\n", regs->r15); -#endif - - asm("movl %%cs,%0" : "=r" (seg)); - seq_printf(seq, " CS\t: %04x\n", seg); - asm("movl %%ds,%0" : "=r" (seg)); - seq_printf(seq, " DS\t: %04x\n", seg); - seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); - asm("movl %%es,%0" : "=r" (seg)); - seq_printf(seq, " ES\t: %04x\n", seg); - asm("movl %%fs,%0" : "=r" (seg)); - seq_printf(seq, " FS\t: %04x\n", seg); - asm("movl %%gs,%0" : "=r" (seg)); - seq_printf(seq, " GS\t: %04x\n", seg); - - seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); - - seq_printf(seq, " EIP\t: %016lx\n", regs->ip); -} - -static void print_cr(void *arg) -{ - struct seq_file *seq = arg; - - seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); - seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); - seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); - seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); -#ifdef CONFIG_X86_64 - seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); -#endif -} - -static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) -{ - seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); -} - -static void print_dt(void *seq) -{ - struct desc_ptr dt; - unsigned long ldt; - - /* IDT */ - store_idt((struct desc_ptr *)&dt); - print_desc_ptr("IDT", seq, dt); - - /* GDT */ - store_gdt((struct desc_ptr *)&dt); - print_desc_ptr("GDT", seq, dt); - - /* LDT */ - store_ldt(ldt); - seq_printf(seq, " LDT\t: %016lx\n", ldt); - - /* TR */ - store_tr(ldt); - seq_printf(seq, " TR\t: %016lx\n", ldt); -} - -static void print_dr(void *arg) -{ - struct seq_file *seq = arg; - unsigned long dr; - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - get_debugreg(dr, i); - seq_printf(seq, " dr%d\t: %016lx\n", i, dr); - } - - seq_printf(seq, "\n MSR\t:\n"); -} - -static void print_apic(void *arg) -{ - struct seq_file *seq = arg; - -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(seq, " LAPIC\t:\n"); - seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); - seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); - seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); - seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); - seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); - seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); - seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); - seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); - seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); - seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); - seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); - seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); - seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); - seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); - seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); - seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); - seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); - seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); - seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); - seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); - seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - unsigned int i, v, maxeilvt; - - v = apic_read(APIC_EFEAT); - maxeilvt = (v >> 16) & 0xff; - seq_printf(seq, " EFEAT\t\t: %08x\n", v); - seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); - - for (i = 0; i < maxeilvt; i++) { - v = apic_read(APIC_EILVTn(i)); - seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); - } - } -#endif /* CONFIG_X86_LOCAL_APIC */ - seq_printf(seq, "\n MSR\t:\n"); -} - -static int cpu_seq_show(struct seq_file *seq, void *v) -{ - struct cpu_private *priv = seq->private; - - if (priv == NULL) - return -EINVAL; - - switch (cpu_base[priv->type].flag) { - case CPU_TSS: - smp_call_function_single(priv->cpu, print_tss, seq, 1); - break; - case CPU_CR: - smp_call_function_single(priv->cpu, print_cr, seq, 1); - break; - case CPU_DT: - smp_call_function_single(priv->cpu, print_dt, seq, 1); - break; - case CPU_DEBUG: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_dr, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - case CPU_APIC: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_apic, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - - default: - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - } - seq_printf(seq, "\n"); - - return 0; -} - -static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) /* One time is enough ;-) */ - return seq; - - return NULL; -} - -static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - - return cpu_seq_start(seq, pos); -} - -static void cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations cpu_seq_ops = { - .start = cpu_seq_start, - .next = cpu_seq_next, - .stop = cpu_seq_stop, - .show = cpu_seq_show, -}; - -static int cpu_seq_open(struct inode *inode, struct file *file) -{ - struct cpu_private *priv = inode->i_private; - struct seq_file *seq; - int err; - - err = seq_open(file, &cpu_seq_ops); - if (!err) { - seq = file->private_data; - seq->private = priv; - } - - return err; -} - -static int write_msr(struct cpu_private *priv, u64 val) -{ - u32 low, high; - - high = (val >> 32) & 0xffffffff; - low = val & 0xffffffff; - - if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) - return 0; - - return -EPERM; -} - -static int write_cpu_register(struct cpu_private *priv, const char *buf) -{ - int ret = -EPERM; - u64 val; - - ret = strict_strtoull(buf, 0, &val); - if (ret < 0) - return ret; - - /* Supporting only MSRs */ - if (priv->type < CPU_TSS_BIT) - return write_msr(priv, val); - - return ret; -} - -static ssize_t cpu_write(struct file *file, const char __user *ubuf, - size_t count, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct cpu_private *priv = seq->private; - char buf[19]; - - if ((priv == NULL) || (count >= sizeof(buf))) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, count)) - return -EFAULT; - - buf[count] = 0; - - if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) - if (!write_cpu_register(priv, buf)) - return count; - - return -EACCES; -} - -static const struct file_operations cpu_fops = { - .owner = THIS_MODULE, - .open = cpu_seq_open, - .read = seq_read, - .write = cpu_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, - unsigned file, struct dentry *dentry) -{ - struct cpu_private *priv = NULL; - - /* Already intialized */ - if (file == CPU_INDEX_BIT) - if (per_cpu(cpud_arr[type].init, cpu)) - return 0; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv == NULL) - return -ENOMEM; - - priv->cpu = cpu; - priv->type = type; - priv->reg = reg; - priv->file = file; - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_priv_arr[type], cpu) = priv; - per_cpu(cpud_priv_count, cpu)++; - mutex_unlock(&cpu_debug_lock); - - if (file) - debugfs_create_file(cpu_file[file].name, S_IRUGO, - dentry, (void *)priv, &cpu_fops); - else { - debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpud_arr[type].dentry, cpu), - (void *)priv, &cpu_fops); - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_arr[type].init, cpu) = 1; - mutex_unlock(&cpu_debug_lock); - } - - return 0; -} - -static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, - struct dentry *dentry) -{ - unsigned file; - int err = 0; - - for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { - err = cpu_create_file(cpu, type, reg, file, dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned reg, reg_min, reg_max; - int i, err = 0; - char reg_dir[12]; - u32 low, high; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, ®_min, ®_max, i, - cpu_base[type].flag)) - continue; - - for (reg = reg_min; reg <= reg_max; reg++) { - if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) - continue; - - sprintf(reg_dir, "0x%x", reg); - cpu_dentry = debugfs_create_dir(reg_dir, dentry); - err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); - if (err) - return err; - } - } - - return err; -} - -static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned type; - int err = 0; - - for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { - if (!is_typeflag_valid(cpu, cpu_base[type].flag)) - continue; - cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; - - if (type < CPU_TSS_BIT) - err = cpu_init_msr(cpu, type, cpu_dentry); - else - err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, - cpu_dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_cpu(void) -{ - struct dentry *cpu_dentry = NULL; - struct cpuinfo_x86 *cpui; - char cpu_dir[12]; - unsigned cpu; - int err = 0; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - cpui = &cpu_data(cpu); - if (!cpu_has(cpui, X86_FEATURE_MSR)) - continue; - - sprintf(cpu_dir, "cpu%d", cpu); - cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); - err = cpu_init_allreg(cpu, cpu_dentry); - - pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); - if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { - pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; - err = -ENFILE; - } - if (err) - return err; - } - - return err; -} - -static int __init cpu_debug_init(void) -{ - cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); - - return cpu_init_cpu(); -} - -static void __exit cpu_debug_exit(void) -{ - int i, cpu; - - if (cpu_debugfs_dir) - debugfs_remove_recursive(cpu_debugfs_dir); - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) - kfree(per_cpu(cpud_priv_arr[i], cpu)); -} - -module_init(cpu_debug_init); -module_exit(cpu_debug_exit); - -MODULE_AUTHOR("Jaswinder Singh Rajput"); -MODULE_DESCRIPTION("CPU Debug module"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From a5d36f82c4f3e852b61fdf1fee13463c8aa91b90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Dec 2009 12:42:16 +0200 Subject: KVM: Fix race between APIC TMR and IRR When we queue an interrupt to the local apic, we set the IRR before the TMR. The vcpu can pick up the IRR and inject the interrupt before setting the TMR, and perhaps even EOI it, causing incorrect behaviour. The race is really insignificant since it can only occur on the first interrupt (usually following interrupts will not change TMR), but it's better closed than open. Fixed by reordering setting the TMR vs IRR. Cc: stable@kernel.org Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3063a0c4858..ba8c045da78 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; + if (trig_mode) { + apic_debug("level trig mode for vector %d", vector); + apic_set_vector(vector, apic->regs + APIC_TMR); + } else + apic_clear_vector(vector, apic->regs + APIC_TMR); + result = !apic_test_and_set_irr(vector, apic); trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector, !result); @@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; } - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); kvm_vcpu_kick(vcpu); break; -- cgit v1.2.3-70-g09d2 From 82b7005f0e72d8d1a8226e4c192cbb0850d10b3f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 5 Jan 2010 19:02:28 +0800 Subject: KVM: x86: Fix host_mapping_level() When found a error hva, should not return PAGE_SIZE but the level... Also clean up the coding style of the following loop. Cc: stable@kernel.org Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2314c..89a49fb46a2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return page_size; + return PT_PAGE_TABLE_LEVEL; down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); @@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { - + for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) break; - } return level - 1; } -- cgit v1.2.3-70-g09d2 From a6085fbaf65ab09bfb5ec8d902d6d21680fe1895 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 14 Jan 2010 17:41:27 -0200 Subject: KVM: MMU: bail out pagewalk on kvm_read_guest error Exit the guest pagetable walk loop if reading gpte failed. Otherwise its possible to enter an endless loop processing the previous present pte. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 58a0f1e8859..ede2131a922 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -150,7 +150,9 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) + goto not_present; + trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) -- cgit v1.2.3-70-g09d2 From 36cb93fd6b6bf7e9163a69a8bf20207aed5fea44 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:18:47 +0800 Subject: KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks vcpu->arch.mce_banks is malloc in kvm_arch_vcpu_init(), but never free in any place, this may cause memory leak. So this patch fixed to free it in kvm_arch_vcpu_uninit(). Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6651dbf5867..b265eecc741 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5088,6 +5088,7 @@ fail: void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { + kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); down_read(&vcpu->kvm->slots_lock); kvm_mmu_destroy(vcpu); -- cgit v1.2.3-70-g09d2 From 443c39bc9ef7d8f648408d74c97e943f3bb3f48a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:21:29 +0800 Subject: KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() In function kvm_arch_vcpu_init(), if the memory malloc for vcpu->arch.mce_banks is fail, it does not free the memory of lapic date. This patch fixed it. Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b265eecc741..1ddcad452ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) GFP_KERNEL); if (!vcpu->arch.mce_banks) { r = -ENOMEM; - goto fail_mmu_destroy; + goto fail_free_lapic; } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; return 0; - +fail_free_lapic: + kvm_free_lapic(vcpu); fail_mmu_destroy: kvm_mmu_destroy(vcpu); fail_free_pio_data: -- cgit v1.2.3-70-g09d2 From d8cc108f4fab42b380c6b3f3356f99e8dd5372e2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Jan 2010 11:25:36 -0600 Subject: oprofile/x86: fix crash when profiling more than 28 events With multiplexing enabled oprofile crashs when profiling more than 28 events. This patch fixes this. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cb88b1a0bd5..76d4f566ade 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -222,7 +222,7 @@ static void nmi_cpu_switch(void *dummy) /* move to next set */ si += model->num_counters; - if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) per_cpu(switch_index, cpu) = 0; else per_cpu(switch_index, cpu) = si; -- cgit v1.2.3-70-g09d2 From e83e452b0692c9c13372540deb88a77d4ae2553d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Jan 2010 23:26:27 +0100 Subject: oprofile/x86: add Xeon 7500 series support Add Xeon 7500 series support to oprofile. Straight forward: it's the same as Core i7, so just detect the model number. No user space changes needed. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 76d4f566ade..3347f696edc 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -598,6 +598,7 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; + case 0x2e: case 26: spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; -- cgit v1.2.3-70-g09d2 From da482474b8396e1a099c37ffc6541b78775aedb4 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 26 Jan 2010 20:37:22 -0600 Subject: x86, msr/cpuid: Pass the number of minors when unregistering MSR and CPUID drivers. Pass the number of minors when unregistering MSR and CPUID drivers. Reported-by: Dean Nelson Signed-off-by: Dean Nelson LKML-Reference: <20100127023722.GA22305@sgi.com> Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index cb27fd6136c..83e5e628de7 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -229,7 +229,7 @@ static void __exit cpuid_exit(void) for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4bd93c9b2b2..206735ac8cb 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -285,7 +285,7 @@ static void __exit msr_exit(void) for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); } -- cgit v1.2.3-70-g09d2 From aca3bb5910119d4cf6c28568a642582efb4cc14a Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 22 Jan 2010 09:41:40 -0600 Subject: x86, UV: Fix RTC latency bug by reading replicated cachelines For SGI UV node controllers (HUB) rev 2.0 or greater, use replicated cachelines to read the RTC timer. This optimization allows faster simulataneous reads from a given socket. Signed-off-by: Dimitri Sivanich Cc: Jack Steiner LKML-Reference: <20100122154140.GB4975@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 13 ++++++++++++- drivers/char/uv_mmtimer.c | 18 +++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 3c84aa001c1..2b75ef638db 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -282,10 +282,21 @@ static int uv_rtc_unset_timer(int cpu, int force) /* * Read the RTC. + * + * Starting with HUB rev 2.0, the UV RTC register is replicated across all + * cachelines of it's own page. This allows faster simultaneous reads + * from a given socket. */ static cycle_t uv_read_rtc(struct clocksource *cs) { - return (cycle_t)uv_read_local_mmr(UVH_RTC); + unsigned long offset; + + if (uv_get_min_hub_revision_id() == 1) + offset = 0; + else + offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; + + return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); } /* diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c index 867b67be9f0..c7072ba14f4 100644 --- a/drivers/char/uv_mmtimer.c +++ b/drivers/char/uv_mmtimer.c @@ -89,13 +89,17 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case MMTIMER_GETOFFSET: /* offset of the counter */ /* - * UV RTC register is on its own page + * Starting with HUB rev 2.0, the UV RTC register is + * replicated across all cachelines of it's own page. + * This allows faster simultaneous reads from a given socket. + * + * The offset returned is in 64 bit units. */ - if (PAGE_SIZE <= (1 << 16)) - ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) - / 8; + if (uv_get_min_hub_revision_id() == 1) + ret = 0; else - ret = -ENOSYS; + ret = ((uv_blade_processor_id() * L1_CACHE_BYTES) % + PAGE_SIZE) / 8; break; case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ @@ -115,8 +119,8 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); break; - case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ - ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; + case MMTIMER_MMAPAVAIL: + ret = 1; break; case MMTIMER_GETCOUNTER: -- cgit v1.2.3-70-g09d2 From 35ea63d70f827a26c150993b4b940925bb02b03f Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Wed, 27 Jan 2010 15:29:18 -0800 Subject: x86: Add Dell OptiPlex 760 reboot quirk Dell OptiPlex 760 hangs on reboot unless reboot=bios is used. Add quirk to reboot through the BIOS. BugLink: https://bugs.launchpad.net/bugs/488319 Signed-off-by: Leann Ogasawara LKML-Reference: <1264634958.27335.1091.camel@emiko> Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1545bc0c984..704bddcdf64 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0T656F"), }, }, + { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 760", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), + DMI_MATCH(DMI_BOARD_NAME, "0G919G"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", -- cgit v1.2.3-70-g09d2 From e8e06eae4ffd683931b928f460c11c40cd3f7fd8 Mon Sep 17 00:00:00 2001 From: Jeff Garrett Date: Wed, 27 Jan 2010 22:02:26 -0600 Subject: x86/PCI: remove IOH range fetching Turned out to cause trouble on single IOH machines, and is superceded by _CRS on multi-IOH machines with production BIOSes. Signed-off-by: Jeff Garrett Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/intel_bus.c | 94 ------------------------------------------------ 2 files changed, 1 insertion(+), 95 deletions(-) delete mode 100644 arch/x86/pci/intel_bus.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 564b008a51c..39fba37f702 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o -obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c deleted file mode 100644 index f81a2fa8fe2..00000000000 --- a/arch/x86/pci/intel_bus.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * to read io range from IOH pci conf, need to do it after mmconfig is there - */ - -#include -#include -#include -#include -#include - -#include "bus_numa.h" - -static inline void print_ioh_resources(struct pci_root_info *info) -{ - int res_num; - int busnum; - int i; - - printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", - info->bus_min, info->bus_max); - res_num = info->res_num; - busnum = info->bus_min; - for (i = 0; i < res_num; i++) { - struct resource *res; - - res = &info->res[i]; - printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", - busnum, i, - (res->flags & IORESOURCE_IO) ? "io port" : - "mmio", - res->start, res->end); - } -} - -#define IOH_LIO 0x108 -#define IOH_LMMIOL 0x10c -#define IOH_LMMIOH 0x110 -#define IOH_LMMIOH_BASEU 0x114 -#define IOH_LMMIOH_LIMITU 0x118 -#define IOH_LCFGBUS 0x11c - -static void __devinit pci_root_bus_res(struct pci_dev *dev) -{ - u16 word; - u32 dword; - struct pci_root_info *info; - u16 io_base, io_end; - u32 mmiol_base, mmiol_end; - u64 mmioh_base, mmioh_end; - int bus_base, bus_end; - - /* some sys doesn't get mmconf enabled */ - if (dev->cfg_size < 0x120) - return; - - if (pci_root_num >= PCI_ROOT_NR) { - printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); - return; - } - - info = &pci_root_info[pci_root_num]; - pci_root_num++; - - pci_read_config_word(dev, IOH_LCFGBUS, &word); - bus_base = (word & 0xff); - bus_end = (word & 0xff00) >> 8; - sprintf(info->name, "PCI Bus #%02x", bus_base); - info->bus_min = bus_base; - info->bus_max = bus_end; - - pci_read_config_word(dev, IOH_LIO, &word); - io_base = (word & 0xf0) << (12 - 4); - io_end = (word & 0xf000) | 0xfff; - update_res(info, io_base, io_end, IORESOURCE_IO, 0); - - pci_read_config_dword(dev, IOH_LMMIOL, &dword); - mmiol_base = (dword & 0xff00) << (24 - 8); - mmiol_end = (dword & 0xff000000) | 0xffffff; - update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); - - pci_read_config_dword(dev, IOH_LMMIOH, &dword); - mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); - mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); - pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); - mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; - pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); - mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; - update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); - - print_ioh_resources(info); -} - -/* intel IOH */ -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); -- cgit v1.2.3-70-g09d2 From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jan 2010 22:14:42 -0800 Subject: Split 'flush_old_exec' into two functions 'flush_old_exec()' is the point of no return when doing an execve(), and it is pretty badly misnamed. It doesn't just flush the old executable environment, it also starts up the new one. Which is very inconvenient for things like setting up the new personality, because we want the new personality to affect the starting of the new environment, but at the same time we do _not_ want the new personality to take effect if flushing the old one fails. As a result, the x86-64 '32-bit' personality is actually done using this insane "I'm going to change the ABI, but I haven't done it yet" bit (TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the personality, but just the "pending" bit, so that "flush_thread()" can do the actual personality magic. This patch in no way changes any of that insanity, but it does split the 'flush_old_exec()' function up into a preparatory part that can fail (still called flush_old_exec()), and a new part that will actually set up the new exec environment (setup_new_exec()). All callers are changed to trivially comply with the new world order. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/sh/kernel/process_64.c | 2 +- arch/x86/ia32/ia32_aout.c | 10 ++++++---- fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 27 ++------------------------- fs/binfmt_elf_fdpic.c | 3 +++ fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/exec.c | 26 ++++++++++++++++---------- include/linux/binfmts.h | 1 + include/linux/sched.h | 2 +- 10 files changed, 33 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 31f80c61b03..ec79faf6f02 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -368,7 +368,7 @@ void exit_thread(void) void flush_thread(void) { - /* Called by fs/exec.c (flush_old_exec) to remove traces of a + /* Called by fs/exec.c (setup_new_exec) to remove traces of a * previously running executable. */ #ifdef CONFIG_SH_FPU if (last_task_used_math == current) { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf..435d2a5323d 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval) return retval; - regs->cs = __USER32_CS; - regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = - regs->r13 = regs->r14 = regs->r15 = 0; - /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); + setup_new_exec(bprm); + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 346b6940536..fdd39709917 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index edd90c49003..fd5b2ea5d29 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c57d9ce5ff7..18d77297ccc 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d4a00ea1054..42c6b4a5444 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 2a9b5330cc5..cc8560f6c9b 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/exec.c b/fs/exec.c index 632b02e34ec..675c3f44c2e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; bprm->mm = NULL; /* We're using it now */ + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index cd4349bdc34..89c6249fc56 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *); extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern void setup_new_exec(struct linux_binprm * bprm); extern int suid_dumpable; #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929..abdfacc5865 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1369,7 +1369,7 @@ struct task_struct { char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - - initialized normally by flush_old_exec */ + - initialized normally by setup_new_exec */ /* file system info */ int link_count, total_link_count; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3-70-g09d2 From 05d43ed8a89c159ff641d472f970e3f1baa66318 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 28 Jan 2010 22:14:43 -0800 Subject: x86: get rid of the insane TIF_ABI_PENDING bit Now that the previous commit made it possible to do the personality setting at the point of no return, we do just that for ELF binaries. And suddenly all the reasons for that insane TIF_ABI_PENDING bit go away, and we can just make SET_PERSONALITY() just do the obvious thing for a 32-bit compat process. Everything becomes much more straightforward this way. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 1 - arch/x86/include/asm/elf.h | 10 ++-------- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/process.c | 12 ------------ arch/x86/kernel/process_64.c | 11 +++++++++++ 5 files changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 435d2a5323d..f9f47246275 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -311,7 +311,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_ABI_PENDING); setup_new_exec(bprm); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b4501ee223a..1994d3f5844 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -181,14 +181,8 @@ do { \ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -#define COMPAT_SET_PERSONALITY(ex) \ -do { \ - if (test_thread_flag(TIF_IA32)) \ - clear_thread_flag(TIF_ABI_PENDING); \ - else \ - set_thread_flag(TIF_ABI_PENDING); \ - current->personality |= force_personality32; \ -} while (0) +void set_personality_ia32(void); +#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() #define COMPAT_ELF_PLATFORM ("i686") diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 375c917c37d..e0d28901e96 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -87,7 +87,6 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 #define TIF_MEMDIE 20 #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -112,7 +111,6 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 02c3ee013cc..c9b3522b6b4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -115,18 +115,6 @@ void flush_thread(void) { struct task_struct *tsk = current; -#ifdef CONFIG_X86_64 - if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { - clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); - if (test_tsk_thread_flag(tsk, TIF_IA32)) { - clear_tsk_thread_flag(tsk, TIF_IA32); - } else { - set_tsk_thread_flag(tsk, TIF_IA32); - current_thread_info()->status |= TS_COMPAT; - } - } -#endif - flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f9e033150cd..41a26a82470 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -521,6 +521,17 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } +void set_personality_ia32(void) +{ + /* inherit personality from parent */ + + /* Make sure to be in 32bit mode */ + set_thread_flag(TIF_IA32); + + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; +} + unsigned long get_wchan(struct task_struct *p) { unsigned long stack; -- cgit v1.2.3-70-g09d2 From 7c099ce1575126395f186ecf58b51a60d5c3be7d Mon Sep 17 00:00:00 2001 From: David Härdeman Date: Thu, 28 Jan 2010 21:02:54 +0100 Subject: x86: Add quirk for Intel DG45FC board to avoid low memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6aa542a694dc9ea4344a8a590d2628c33d1b9431 added a quirk for the Intel DG45ID board due to low memory corruption. The Intel DG45FC shares the same BIOS (and the same bug) as noted in: http://bugzilla.kernel.org/show_bug.cgi?id=13736 Signed-off-by: David Härdeman LKML-Reference: <20100128200254.GA9134@hardeman.nu> Cc: Cc: Alexey Fisher Cc: ykzhao Cc: Tony Bones Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7b8b9894b2..5d9e40c5862 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -642,19 +642,27 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), }, }, - { /* - * AMI BIOS with low memory corruption was found on Intel DG45ID board. - * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will + * AMI BIOS with low memory corruption was found on Intel DG45ID and + * DG45FC boards. + * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will * match only DMI_BOARD_NAME and see if there is more bad products * with this vendor. */ + { .callback = dmi_low_memory_corruption, .ident = "AMI BIOS", .matches = { DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), + }, + }, #endif {} }; -- cgit v1.2.3-70-g09d2 From cc0967490c1c3824bc5b75718b6ca8a51d9f2617 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:42 -0600 Subject: x86, hw_breakpoints, kgdb: Fix kgdb to use hw_breakpoint API In the 2.6.33 kernel, the hw_breakpoint API is now used for the performance event counters. The hw_breakpoint_handler() now consumes the hw breakpoints that were previously set by kgdb arch specific code. In order for kgdb to work in conjunction with this core API change, kgdb must use some of the low level functions of the hw_breakpoint API to install, uninstall, and deal with hw breakpoint reservations. The kgdb core required a change to call kgdb_disable_hw_debug anytime a slave cpu enters kgdb_wait() in order to keep all the hw breakpoints in sync as well as to prevent hitting a hw breakpoint while kgdb is active. During the architecture specific initialization of kgdb, it will pre-allocate 4 disabled (struct perf event **) structures. Kgdb will use these to manage the capabilities for the 4 hw breakpoint registers, per cpu. Right now the hw_breakpoint API does not have a way to ask how many breakpoints are available, on each CPU so it is possible that the install of a breakpoint might fail when kgdb restores the system to the run state. The intent of this patch is to first get the basic functionality of hw breakpoints working and leave it to the person debugging the kernel to understand what hw breakpoints are in use and what restrictions have been imposed as a result. Breakpoint constraints will be dealt with in a future patch. While atomic, the x86 specific kgdb code will call arch_uninstall_hw_breakpoint() and arch_install_hw_breakpoint() to manage the cpu specific hw breakpoints. The net result of these changes allow kgdb to use the same pool of hw_breakpoints that are used by the perf event API, but neither knows about future reservations for the available hw breakpoint slots. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 171 ++++++++++++++++++++++++++++++++----------------- kernel/kgdb.c | 3 + 2 files changed, 117 insertions(+), 57 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index dd74fe7273b..62bea7307ea 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -204,40 +205,38 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) static struct hw_breakpoint { unsigned enabled; - unsigned type; - unsigned len; unsigned long addr; + int len; + int type; + struct perf_event **pev; } breakinfo[4]; static void kgdb_correct_hw_break(void) { - unsigned long dr7; - int correctit = 0; - int breakbit; int breakno; - get_debugreg(dr7, 7); for (breakno = 0; breakno < 4; breakno++) { - breakbit = 2 << (breakno << 1); - if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { - correctit = 1; - dr7 |= breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - dr7 |= ((breakinfo[breakno].len << 2) | - breakinfo[breakno].type) << - ((breakno << 2) + 16); - set_debugreg(breakinfo[breakno].addr, breakno); - - } else { - if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { - correctit = 1; - dr7 &= ~breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - } - } + struct perf_event *bp; + struct arch_hw_breakpoint *info; + int val; + int cpu = raw_smp_processor_id(); + if (!breakinfo[breakno].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); + info = counter_arch_bp(bp); + if (bp->attr.disabled != 1) + continue; + bp->attr.bp_addr = breakinfo[breakno].addr; + bp->attr.bp_len = breakinfo[breakno].len; + bp->attr.bp_type = breakinfo[breakno].type; + info->address = breakinfo[breakno].addr; + info->len = breakinfo[breakno].len; + info->type = breakinfo[breakno].type; + val = arch_install_hw_breakpoint(bp); + if (!val) + bp->attr.disabled = 0; } - if (correctit) - set_debugreg(dr7, 7); + hw_breakpoint_restore(); } static int @@ -259,15 +258,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) static void kgdb_remove_all_hw_break(void) { int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; - for (i = 0; i < 4; i++) - memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { - unsigned type; int i; for (i = 0; i < 4; i++) @@ -278,27 +285,38 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) switch (bptype) { case BP_HARDWARE_BREAKPOINT: - type = 0; - len = 1; + len = 1; + breakinfo[i].type = X86_BREAKPOINT_EXECUTE; break; case BP_WRITE_WATCHPOINT: - type = 1; + breakinfo[i].type = X86_BREAKPOINT_WRITE; break; case BP_ACCESS_WATCHPOINT: - type = 3; + breakinfo[i].type = X86_BREAKPOINT_RW; break; default: return -1; } - - if (len == 1 || len == 2 || len == 4) - breakinfo[i].len = len - 1; - else + switch (len) { + case 1: + breakinfo[i].len = X86_BREAKPOINT_LEN_1; + break; + case 2: + breakinfo[i].len = X86_BREAKPOINT_LEN_2; + break; + case 4: + breakinfo[i].len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case 8: + breakinfo[i].len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: return -1; - - breakinfo[i].enabled = 1; + } breakinfo[i].addr = addr; - breakinfo[i].type = type; + breakinfo[i].enabled = 1; return 0; } @@ -313,8 +331,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) */ void kgdb_disable_hw_debug(struct pt_regs *regs) { + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + /* Disable hardware debugging while we are in kgdb: */ set_debugreg(0UL, 7); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } /** @@ -378,7 +409,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, struct pt_regs *linux_regs) { unsigned long addr; - unsigned long dr6; char *ptr; int newPC; @@ -404,20 +434,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, raw_smp_processor_id()); } - get_debugreg(dr6, 6); - if (!(dr6 & 0x4000)) { - int breakno; - - for (breakno = 0; breakno < 4; breakno++) { - if (dr6 & (1 << breakno) && - breakinfo[breakno].type == 0) { - /* Set restore flag: */ - linux_regs->flags |= X86_EFLAGS_RF; - break; - } - } - } - set_debugreg(0UL, 6); kgdb_correct_hw_break(); return 0; @@ -485,8 +501,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) break; case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id()) { + if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) return single_step_cont(regs, args); break; @@ -539,7 +554,42 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int i, cpu; + int ret; + struct perf_event_attr attr; + struct perf_event **pevent; + + ret = register_die_notifier(&kgdb_notifier); + if (ret != 0) + return ret; + /* + * Pre-allocate the hw breakpoint structions in the non-atomic + * portion of kgdb because this operation requires mutexs to + * complete. + */ + attr.bp_addr = (unsigned long)kgdb_arch_init; + attr.type = PERF_TYPE_BREAKPOINT; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + for (i = 0; i < 4; i++) { + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); + if (IS_ERR(breakinfo[i].pev)) { + printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); + breakinfo[i].pev = NULL; + kgdb_arch_exit(); + return -1; + } + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); + pevent[0]->hw.sample_period = 1; + if (pevent[0]->destroy != NULL) { + pevent[0]->destroy = NULL; + release_bp_slot(*pevent); + } + } + } + return ret; } /** @@ -550,6 +600,13 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { + int i; + for (i = 0; i < 4; i++) { + if (breakinfo[i].pev) { + unregister_wide_hw_breakpoint(breakinfo[i].pev); + breakinfo[i].pev = NULL; + } + } unregister_die_notifier(&kgdb_notifier); } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e2351..c7ade62e4ef 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs) smp_wmb(); atomic_set(&cpu_in_kgdb[cpu], 1); + /* Disable any cpu specific hw breakpoints */ + kgdb_disable_hw_debug(regs); + /* Wait till primary CPU is done with debugging */ while (atomic_read(&passive_cpu_wait[cpu])) cpu_relax(); -- cgit v1.2.3-70-g09d2 From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:43 -0600 Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger This patch fixes the regression in functionality where the kernel debugger and the perf API do not nicely share hw breakpoint reservations. The kernel debugger cannot use any mutex_lock() calls because it can start the kernel running from an invalid context. A mutex free version of the reservation API needed to get created for the kernel debugger to safely update hw breakpoint reservations. The possibility for a breakpoint reservation to be concurrently processed at the time that kgdb interrupts the system is improbable. Should this corner case occur the end user is warned, and the kernel debugger will prohibit updating the hardware breakpoint reservations. Any time the kernel debugger reserves a hardware breakpoint it will be a system wide reservation. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hw_breakpoint.h | 2 ++ kernel/hw_breakpoint.c | 52 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 62bea7307ea..bfba6019d76 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void) hw_breakpoint_restore(); } +static int hw_break_reserve_slot(int breakno) +{ + int cpu; + int cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + struct perf_event **pevent; + int cpu; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responisble for handing the retry on + * remove failure. + */ + return -1; + } + return 0; +} + static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { @@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) if (i == 4) return -1; + if (hw_break_release_slot(i)) { + printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } breakinfo[i].enabled = 0; return 0; @@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) return -1; } breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } breakinfo[i].enabled = 1; return 0; diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 41235c93e4e..070ba062173 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); +extern int dbg_reserve_bp_slot(struct perf_event *bp); +extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c030ae657f2..8a5c7d55ac9 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ -int reserve_bp_slot(struct perf_event *bp) +static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - int ret = 0; - - mutex_lock(&nr_bp_mutex); fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } + if (slots.pinned + (!!slots.flexible) == HBP_NUM) + return -ENOSPC; toggle_bp_slot(bp, true); -end: + return 0; +} + +int reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + mutex_lock(&nr_bp_mutex); + + ret = __reserve_bp_slot(bp); + mutex_unlock(&nr_bp_mutex); return ret; } +static void __release_bp_slot(struct perf_event *bp) +{ + toggle_bp_slot(bp, false); +} + void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); - toggle_bp_slot(bp, false); + __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); } +/* + * Allow the kernel debugger to reserve breakpoint slots without + * taking a lock using the dbg_* variant of for the reserve and + * release breakpoint slots. + */ +int dbg_reserve_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + return __reserve_bp_slot(bp); +} + +int dbg_release_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + __release_bp_slot(bp); + + return 0; +} int register_perf_hw_breakpoint(struct perf_event *bp) { -- cgit v1.2.3-70-g09d2 From ea0854170c95245a258b386c7a9314399c949fe0 Mon Sep 17 00:00:00 2001 From: Shaohui Zheng Date: Tue, 2 Feb 2010 13:44:16 -0800 Subject: memory hotplug: fix a bug on /dev/mem for 64-bit kernels Newly added memory can not be accessed via /dev/mem, because we do not update the variables high_memory, max_pfn and max_low_pfn. Add a function update_end_of_memory_vars() to update these variables for 64-bit kernels. [akpm@linux-foundation.org: simplify comment] Signed-off-by: Shaohui Zheng Cc: Andi Kleen Cc: Li Haicheng Reviewed-by: Wu Fengguang Reviewed-by: KAMEZAWA Hiroyuki Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5198b9bb34e..69ddfbd9113 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -49,6 +49,7 @@ #include #include #include +#include static unsigned long dma_reserve __initdata; @@ -615,6 +616,21 @@ void __init paging_init(void) * Memory hotplug specific functions */ #ifdef CONFIG_MEMORY_HOTPLUG +/* + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need + * updating. + */ +static void update_end_of_memory_vars(u64 start, u64 size) +{ + unsigned long end_pfn = PFN_UP(start + size); + + if (end_pfn > max_pfn) { + max_pfn = end_pfn; + max_low_pfn = end_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + } +} + /* * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. @@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size) ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start, size); + return ret; } EXPORT_SYMBOL_GPL(arch_add_memory); -- cgit v1.2.3-70-g09d2