From 9f646389aa7727a2fd8f9ae6337b92af9cfbc264 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 May 2012 16:39:09 +0200 Subject: sched/x86: Use cpu_llc_shared_mask(cpu) for coregroup_mask Commit commit 8e7fbcbc2 ("sched: Remove stale power aware scheduling remnants and dysfunctional knobs") made a boo-boo with removing the power aware scheduling muck from the x86 topology bits. We should unconditionally use the llc_shared mask for multi-core. Reported-and-tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Borislav Petkov Cc: Andreas Herrmann Link: http://lkml.kernel.org/n/tip-lsksc2kfyeveb13avh327p0d@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f56f96da77f..fd019d78b1f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) /* maps the cpu to the sched domain representing multi-core */ const struct cpumask *cpu_coregroup_mask(int cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) - return cpu_core_mask(cpu); - else - return cpu_llc_shared_mask(cpu); + return cpu_llc_shared_mask(cpu); } static void impress_friends(void) -- cgit v1.2.3-70-g09d2 From 7c8d51848a88aafdb68f42b6b650c83485ea2f84 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 30 May 2012 01:43:08 +0200 Subject: crypto: aesni-intel - fix unaligned cbc decrypt for x86-32 The 32 bit variant of cbc(aes) decrypt is using instructions requiring 128 bit aligned memory locations but fails to ensure this constraint in the code. Fix this by loading the data into intermediate registers with load unaligned instructions. This fixes reported general protection faults related to aesni. References: https://bugzilla.kernel.org/show_bug.cgi?id=43223 Reported-by: Daniel Cc: stable@kernel.org [v2.6.39+] Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e365a8..3470624d783 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) pxor IN3, STATE4 movaps IN4, IV #else - pxor (INP), STATE2 - pxor 0x10(INP), STATE3 pxor IN1, STATE4 movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 #endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) -- cgit v1.2.3-70-g09d2 From c2238f10e0c34a85a2a555c8a197316d1ca3fb7e Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Tue, 5 Jun 2012 10:35:02 +0800 Subject: x86/mce: Fix the MCE poll timer logic In commit 82f7af09 (x86/mce: Cleanup timer mess), Thomas just forgot the "/ 2" there while cleaning up. Signed-off-by: Chen Gong Acked-by: Thomas Gleixner Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 98003bfc555..d6b18a4d0b9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1266,7 +1266,7 @@ static void mce_timer_fn(unsigned long data) */ iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - iv = max(iv, (unsigned long) HZ/100); + iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); __this_cpu_write(mce_next_interval, iv); -- cgit v1.2.3-70-g09d2 From 958fb3c51295764599d6abce87e1a01ace897a3e Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Tue, 5 Jun 2012 10:35:02 +0800 Subject: x86/mce: Fix the MCE poll timer logic In commit 82f7af09 ("x86/mce: Cleanup timer mess), Thomas just forgot the "/ 2" there while cleaning up. Signed-off-by: Chen Gong Acked-by: Thomas Gleixner Cc: bp@amd64.org Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1338863702-9245-1-git-send-email-gong.chen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0a687fd185e..a97f3c4a394 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1274,7 +1274,7 @@ static void mce_timer_fn(unsigned long data) */ iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - iv = max(iv, (unsigned long) HZ/100); + iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); __this_cpu_write(mce_next_interval, iv); -- cgit v1.2.3-70-g09d2 From 436d03faf6961b30e13b2d0967aea9d772d6cf44 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 00:09:11 +0900 Subject: x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix Fix the x86 instruction decoder to decode bsr/bsf/jmpe with operand-size prefix (66h). This fixes the test case failure reported by Linus, attached below. bsf/bsr/jmpe have a special encoding. Opcode map in Intel Software Developers Manual vol2 says they have TZCNT/LZCNT variants if it has F3h prefix. However, there is no information if it has other 66h or F2h prefixes. Current instruction decoder supposes that those are bad instructions, but it actually accepts at least operand-size prefixes. H. Peter Anvin further explains: " TZCNT/LZCNT are F3 + BSF/BSR exactly because the F2 and F3 prefixes have historically been no-ops with most instructions. This allows software to unconditionally use the prefixed versions and get TZCNT/LZCNT on the processors that have them if they don't care about the difference. " This fixes errors reported by test_get_len: Warning: arch/x86/tools/test_get_len found difference at :ffffffff81036d87 Warning: ffffffff81036de5: 66 0f bc c2 bsf %dx,%ax Warning: objdump says 4 bytes, but insn_get_length() says 3 Warning: arch/x86/tools/test_get_len found difference at :ffffffff81036ea6 Warning: ffffffff81036f04: 66 0f bd c2 bsr %dx,%ax Warning: objdump says 4 bytes, but insn_get_length() says 3 Warning: decoded and checked 13298882 instructions with 2 warnings Reported-by: Linus Torvalds Reported-by: Pekka Enberg Signed-off-by: Masami Hiramatsu Cc: "H. Peter Anvin" Cc: Link: http://lkml.kernel.org/r/20120604150911.22338.43296.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 8 ++++---- arch/x86/tools/gen-insn-attr-x86.awk | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 81913790442..5d7e51f3fd2 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -28,7 +28,7 @@ # - (66): the last prefix is 0x66 # - (F3): the last prefix is 0xF3 # - (F2): the last prefix is 0xF2 -# +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) Table: one byte opcode Referrer: @@ -515,12 +515,12 @@ b4: LFS Gv,Mp b5: LGS Gv,Mp b6: MOVZX Gv,Eb b7: MOVZX Gv,Ew -b8: JMPE | POPCNT Gv,Ev (F3) +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) -bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6c3a1..ddcf39b1a18 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -66,9 +66,10 @@ BEGIN { rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO - lprefix1_expr = "\\(66\\)" + lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\(F2\\)" + lprefix3_expr = "\\((F2|!F3)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 # All opcodes starting with lower-case 'v' or with (v1) superscript @@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, lprefix1_expr)) { lptable1[idx] = add_flags(lptable1[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix2_expr)) { + } + if (match(ext, lprefix2_expr)) { lptable2[idx] = add_flags(lptable2[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix3_expr)) { + } + if (match(ext, lprefix3_expr)) { lptable3[idx] = add_flags(lptable3[idx],flags) variant = "INAT_VARIANT" - } else { + } + if (!match(ext, lprefix_expr)){ table[idx] = add_flags(table[idx],flags) } } -- cgit v1.2.3-70-g09d2 From 1a87fc1ec7b05b9bc60df9dc52297d4c225d7f1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Jun 2012 11:33:21 +0200 Subject: x86: mce: Add the dropped timer interval init back commit 82f7af09 ("x86/mce: Cleanup timer mess) dropped the initialization of the per cpu timer interval. Duh :( Restore the previous behaviour. Reported-by: Chen Gong Cc: bp@amd64.org Cc: tony.luck@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a97f3c4a394..da27c5d2168 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); + unsigned long iv = check_interval * HZ; setup_timer(t, mce_timer_fn, smp_processor_id()); -- cgit v1.2.3-70-g09d2 From aff5a62d52ff03956ff6992b9fe4b561fd855804 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Tue, 5 Jun 2012 15:00:31 -0400 Subject: x86/gart: Fix kmemleak warning aperture_64.c now is using memblock, the previous kmemleak_ignore() for alloc_bootmem() should be removed then. Otherwise, with kmemleak enabled, kernel will throw warnings like: [ 0.000000] kmemleak: Trying to color unknown object at 0xffff8800c4000000 as Black [ 0.000000] Pid: 0, comm: swapper/0 Not tainted 3.5.0-rc1-next-20120605+ #130 [ 0.000000] Call Trace: [ 0.000000] [] paint_ptr+0x66/0xc0 [ 0.000000] [] kmemleak_ignore+0x2b/0x60 [ 0.000000] [] kmemleak_init+0x217/0x2c1 [ 0.000000] [] start_kernel+0x32d/0x3eb [ 0.000000] [] ? repair_env_string+0x5a/0x5a [ 0.000000] [] x86_64_start_reservations+0x131/0x135 [ 0.000000] [] ? early_idt_handlers+0x120/0x120 [ 0.000000] [] x86_64_start_kernel+0x102/0x111 [ 0.000000] kmemleak: Early log backtrace: [ 0.000000] [] kmemleak_ignore+0x4b/0x60 [ 0.000000] [] gart_iommu_hole_init+0x3e7/0x547 [ 0.000000] [] pci_iommu_alloc+0x44/0x6f [ 0.000000] [] mem_init+0x19/0xec [ 0.000000] [] start_kernel+0x1ea/0x3eb [ 0.000000] [] x86_64_start_reservations+0x131/0x135 [ 0.000000] [] x86_64_start_kernel+0x102/0x111 [ 0.000000] [] 0xffffffffffffffff Signed-off-by: Xiaotian Feng Cc: Xiaotian Feng Cc: Tejun Heo Link: http://lkml.kernel.org/r/1338922831-2847-1-git-send-email-xtfeng@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e76c191a83..d5fd66f0d4c 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void) return 0; } memblock_reserve(addr, aper_size); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(phys_to_virt(addr)); printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, addr); insert_aperture_resource((u32)addr, aper_size); -- cgit v1.2.3-70-g09d2 From 4af463d28f1a026e25c0b879fac2a0d2b7bff599 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Mon, 4 Jun 2012 11:42:32 +0900 Subject: x86/numa: Set numa_nodes_parsed at acpi_numa_memory_affinity_init() When hot-adding a CPU, the system outputs following messages since node_to_cpumask_map[2] was not allocated memory. Booting Node 2 Processor 32 APIC 0xc0 node_to_cpumask_map[2] NULL Pid: 0, comm: swapper/32 Tainted: G A 3.3.5-acd #21 Call Trace: [] debug_cpumask_set_cpu+0x155/0x160 [] ? add_timer_on+0xaa/0x120 [] numa_add_cpu+0x1e/0x22 [] identify_cpu+0x1df/0x1e4 [] identify_econdary_cpu+0x16/0x1d [] smp_store_cpu_info+0x3c/0x3e [] smp_callin+0x139/0x1be [] start_secondary+0x13/0xeb The reason is that the bit of node 2 was not set at numa_nodes_parsed. numa_nodes_parsed is set by only acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init. Thus even if hot-added memory which is same PXM as hot-added CPU is written in ACPI SRAT Table, if the hot-added CPU is not written in ACPI SRAT table, numa_nodes_parsed is not set. But according to ACPI Spec Rev 5.0, it says about ACPI SRAT table as follows: This optional table provides information that allows OSPM to associate processors and memory ranges, including ranges of memory provided by hot-added memory devices, with system localities / proximity domains and clock domains. It means that ACPI SRAT table only provides information for CPUs present at boot time and for memory including hot-added memory. So hot-added memory is written in ACPI SRAT table, but hot-added CPU is not written in it. Thus numa_nodes_parsed should be set by not only acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init but also acpi_numa_memory_affinity_init for the case. Additionally, if system has cpuless memory node, acpi_numa_processor_affinity_init / acpi_numa_x2apic_affinity_init cannot set numa_nodes_parseds since these functions cannot find cpu description for the node. In this case, numa_nodes_parsed needs to be set by acpi_numa_memory_affinity_init. Signed-off-by: Yasuaki Ishimatsu Acked-by: David Rientjes Acked-by: KOSAKI Motohiro Cc: liuj97@gmail.com Cc: kosaki.motohiro@gmail.com Link: http://lkml.kernel.org/r/4FCC2098.4030007@jp.fujitsu.com [ merged it ] Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 732af3a9618..4599c3e8bcb 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); -- cgit v1.2.3-70-g09d2 From 7071f6b2889bb41bea61891d8a3e6e70517ef5e6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 31 May 2012 23:20:25 +0200 Subject: x86/intel/moorestown: Change intel_scu_devices_create() to __devinit The allmodconfig hits: WARNING: vmlinux.o(.text+0x6553d): Section mismatch in reference from the function intel_scu_devices_create() to the function .devinit.text: spi_register_board_info() [...] This patch marks intel_scu_devices_create() as devinit because it only calls a devinit function, spi_register_board_info(). Signed-off-by: Sebastian Andrzej Siewior Cc: Alan Cox Cc: Kirill A. Shutemov Cc: Mika Westerberg Cc: Samuel Ortiz Cc: Feng Tang Link: http://lkml.kernel.org/r/20120531212025.GA8519@breakpoint.cc Signed-off-by: Ingo Molnar --- arch/x86/platform/mrst/mrst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e31bcd8f2ee..fd41a9262d6 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); EXPORT_SYMBOL_GPL(intel_scu_notifier); /* Called by IPC driver */ -void intel_scu_devices_create(void) +void __devinit intel_scu_devices_create(void) { int i; -- cgit v1.2.3-70-g09d2 From 55c844a4dd16a4d1fdc0cf2a283ec631a02ec448 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 May 2012 23:15:41 +0800 Subject: x86/reboot: Fix a warning message triggered by stop_other_cpus() When rebooting our 24 CPU Westmere servers with 3.4-rc6, we always see this warning msg: Restarting system. machine restart ------------[ cut here ]------------ WARNING: at arch/x86/kernel/smp.c:125 native_smp_send_reschedule+0x74/0xa7() Hardware name: X8DTN Modules linked in: igb [last unloaded: scsi_wait_scan] Pid: 1, comm: systemd-shutdow Not tainted 3.4.0-rc6+ #22 Call Trace: [] warn_slowpath_common+0x7e/0x96 [] warn_slowpath_null+0x15/0x17 [] native_smp_send_reschedule+0x74/0xa7 [] trigger_load_balance+0x279/0x2a6 [] scheduler_tick+0xe0/0xe9 [] update_process_times+0x60/0x70 [] tick_sched_timer+0x68/0x92 [] __run_hrtimer+0xb3/0x13c [] ? tick_nohz_handler+0xd0/0xd0 [] hrtimer_interrupt+0xdb/0x198 [] smp_apic_timer_interrupt+0x81/0x94 [] apic_timer_interrupt+0x67/0x70 [] ? default_send_IPI_mask_allbutself_phys+0xb4/0xc4 [] physflat_send_IPI_allbutself+0x12/0x14 [] native_nmi_stop_other_cpus+0x8a/0xd6 [] native_machine_shutdown+0x50/0x67 [] machine_shutdown+0xa/0xc [] native_machine_restart+0x20/0x32 [] machine_restart+0xa/0xc [] kernel_restart+0x47/0x4c [] sys_reboot+0x13e/0x17c [] ? _raw_spin_unlock_bh+0x10/0x12 [] ? bdi_queue_work+0xcf/0xd8 [] ? __bdi_start_writeback+0xae/0xb7 [] ? iterate_supers+0xa3/0xb7 [] system_call_fastpath+0x16/0x1b ---[ end trace 320af5cb1cb60c5b ]--- The root cause seems to be the default_send_IPI_mask_allbutself_phys() takes quite some time (I measured it could be several ms) to complete sending NMIs to all the other 23 CPUs, and for HZ=250/1000 system, the time is long enough for a timer interrupt to happen, which will in turn trigger to kick load balance to a stopped CPU and cause this warning in native_smp_send_reschedule(). So disabling the local irq before stop_other_cpu() can fix this problem (tested 25 times reboot ok), and it is fine as there should be nobody caring the timer interrupt in such reboot stage. The latest 3.4 kernel slightly changes this behavior by sending REBOOT_VECTOR first and only send NMI_VECTOR if the REBOOT_VCTOR fails, and this patch is still needed to prevent the problem. Signed-off-by: Feng Tang Acked-by: Don Zickus Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120530231541.4c13433a@feng-i7 Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af8160..25b48edb847 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -639,9 +639,11 @@ void native_machine_shutdown(void) set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* - * O.K Now that I'm on the appropriate processor, - * stop all of the others. + * O.K Now that I'm on the appropriate processor, stop all of the + * others. Also disable the local irq to not receive the per-cpu + * timer interrupt which may trigger scheduler's load balance. */ + local_irq_disable(); stop_other_cpus(); #endif -- cgit v1.2.3-70-g09d2 From f6175f5bfb4c9f2ed32758c95f765b529b1a7f15 Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Mon, 28 May 2012 18:09:18 +0900 Subject: x86/ioapic: Fix NULL pointer dereference on CPU hotplug after disabling irqs In current Linux, percpu variable `vector_irq' is not cleared on offlined cpus while disabling devices' irqs. If the cpu that has the disabled irqs in vector_irq is hotplugged, __setup_vector_irq() hits invalid irq vector and may crash. This bug can be reproduced as following; # echo 0 > /sys/devices/system/cpu/cpu7/online # modprobe -r some_driver_using_interrupts # vector_irq@cpu7 uncleared # echo 1 > /sys/devices/system/cpu/cpu7/online # kernel may crash This patch fixes this bug by clearing vector_irq in __clear_irq_vector() even if the cpu is offlined. Signed-off-by: Tomoki Sekiyama Acked-by: Thomas Gleixner Cc: yrl.pp-manager.tt@hitachi.com Cc: ltc-kernel@ml.yrl.intra.hitachi.co.jp Cc: Suresh Siddha Cc: Yinghai Lu Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/4FC340BE.7080101@hitachi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a9..5f0ff597437 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + for_each_cpu(cpu, cfg->domain) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; @@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu(cpu, cfg->old_domain) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) -- cgit v1.2.3-70-g09d2 From ceb1cbac8eda66cf0f889def226b4e82f8ff857b Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 31 May 2012 13:07:38 +0530 Subject: sched/x86: Calculate booted cores after construction of sibling_mask Commit 316ad248307fb ("sched/x86: Rewrite set_cpu_sibling_map()") broke the booted_cores accounting. The problem is that the booted_cores accounting needs all the sibling links set up. So restore the second loop and add a comment as to why its needed. On qemu booted with -smp sockets=1,cores=2,threads=2; Before: $ grep cores /proc/cpuinfo cpu cores : 2 cpu cores : 1 cpu cores : 4 cpu cores : 3 With the patch: $ grep cores /proc/cpuinfo cpu cores : 2 cpu cores : 2 cpu cores : 2 cpu cores : 2 Reported-by: Prarit Bhargava Reported-by: Borislav Petkov Signed-off-by: Kamalesh Babulal Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120531073738.GH7511@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd019d78b1f..3fab55bea29 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_mc && match_llc(c, o))) link_mask(llc_shared, cpu, i); + } + + /* + * This needs a separate iteration over the cpus because we rely on all + * cpu_sibling_mask links to be set-up. + */ + for_each_cpu(i, cpu_sibling_setup_mask) { + o = &cpu_data(i); + if ((i == cpu) || (has_mc && match_mc(c, o))) { link_mask(core, cpu, i); -- cgit v1.2.3-70-g09d2 From b430f7c4706aeba4270c7ab7744fc504b9315e1c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 15:30:31 +0200 Subject: perf/x86: Fix Intel shared extra MSR allocation Zheng Yan reported that event group validation can wreck event state when Intel extra_reg allocation changes event state. Validation shouldn't change any persistent state. Cloning events in validate_{event,group}() isn't really pretty either, so add a few special cases to avoid modifying the event state. The code is restructured to minimize the special case impact. Reported-by: Zheng Yan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338903031.28282.175.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 92 +++++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da018..cb608383e4f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) if (!cpuc->shared_regs) goto error; } + cpuc->is_fake = 1; return cpuc; error: free_fake_cpuc(cpuc); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf5449..83794d8e6af 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -117,6 +117,7 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ unsigned int group_flag; + int is_fake; /* * Intel DebugStore bits diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6ae..965baa2fa79 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +static int intel_alt_er(int idx) { if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; + return idx; - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { + if (idx == EXTRA_REG_RSP_0) + return EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + return EXTRA_REG_RSP_0; + + return idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= 0x01bb; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; } /* @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, struct event_constraint *c = &emptyconstraint; struct er_account *era; unsigned long flags; - int orig_idx = reg->idx; + int idx = reg->idx; - /* already allocated shared msr */ - if (reg->alloc) + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) return NULL; /* call x86_get_event_constraint() */ again: - era = &cpuc->shared_regs->regs[reg->idx]; + era = &cpuc->shared_regs->regs[idx]; /* * we use spin_lock_irqsave() to avoid lockdep issues when * passing a fake cpuc @@ -1173,6 +1183,29 @@ again: if (!atomic_read(&era->ref) || era->config == reg->config) { + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + /* lock in msr value */ era->config = reg->config; era->reg = reg->reg; @@ -1180,17 +1213,17 @@ again: /* one more user */ atomic_inc(&era->ref); - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - /* * need to call x86_get_event_constraint() * to check if associated event has constraints */ c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; + } else { + idx = intel_alt_er(idx); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, struct er_account *era; /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. */ - if (!reg->alloc) + if (!reg->alloc || cpuc->is_fake) return; era = &cpuc->shared_regs->regs[reg->idx]; -- cgit v1.2.3-70-g09d2 From cccb9ba9e4ee0d750265f53de9258df69655c40b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Implement cycles:p for SNB/IVB Now that there's finally a chip with working PEBS (IvyBridge), we can enable the hardware and implement cycles:p for SNB/IVB. Cc: Stephane Eranian Requested-and-tested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 50 ++++++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 83794d8e6af..7241e2fc3c1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -365,6 +365,7 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 965baa2fa79..2312c1ff1b1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1336,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_shared_regs_event_constraints(cpuc, event); } -static int intel_pmu_hw_config(struct perf_event *event) +static void intel_pebs_aliases_core2(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { /* * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P * (0x003c) so that we can use it with PEBS. @@ -1365,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) */ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.precise_ip && x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); if (intel_pmu_needs_lbr_smpl(event)) { ret = intel_pmu_setup_lbr_filter(event); @@ -1643,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, @@ -1885,6 +1918,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; -- cgit v1.2.3-70-g09d2 From b6db437ba8322f5cee0bd355ad2ef9f73c413754 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Enable/Add IvyBridge hardware support Implement rudimentary IVB perf support. The SDM states its identical to SNB with exception of the exact event tables, but a quick look suggests they're similar enough. Also mark SNB-EP as broken for now. Requested-and-tested-by: Linus Torvalds Cc: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2312c1ff1b1..187c294bc65 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1909,8 +1909,9 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ + x86_add_quirk(intel_sandybridge_quirk); + case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); -- cgit v1.2.3-70-g09d2 From 8440ccb43fc0ecffcf1acee0273d766e6a8cd51d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jun 2012 10:26:43 +0200 Subject: perf/x86: Update SNB PEBS constraints Afaict there's no need to (incompletely) iterate the MEM_UOPS_RETIRED.* umask state. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1338884803.28282.153.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e..35e2192df9f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ -- cgit v1.2.3-70-g09d2 From 302fa4b58ac754a6da13f4f5546f710fecc3b945 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:33 -0700 Subject: perf/x86: Allow multiple stacks Without this patch, applications with two different stack regions (eg: native stack vs JIT stack) get truncated callchains even when RBP chaining is present. GDB shows proper stack traces and the frame pointer chaining is intact. This patch disables the (fp < RSP) check, hoping that other checks in the code save the day for us. In our limited testing, this didn't seem to break anything. In the long term, we could potentially have userspace advise the kernel on the range of valid stack addresses, so we don't spend a lot of time unwinding from bogus addresses. Signed-off-by: Arun Sharma CC: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Stephane Eranian Cc: Namhyung Kim Cc: Tom Zanussi Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-2-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index cb608383e4f..e78bc256aea 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1781,9 +1781,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; - if (fp < compat_ptr(regs->sp)) - break; - perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } @@ -1827,9 +1824,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; - if ((unsigned long)fp < regs->sp) - break; - perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } -- cgit v1.2.3-70-g09d2 From bc6ca7b342d5ae15c3ba3081fd40271b8039fb25 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:35 -0700 Subject: perf/x86: Check if user fp is valid Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-4-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 12 ++++++------ arch/x86/kernel/cpu/perf_event.c | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 04cd6882308..e1f3a17034f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,9 +33,8 @@ #define segment_eq(a, b) ((a).seg == (b).seg) #define user_addr_max() (current_thread_info()->addr_limit.seg) -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < \ - (current_thread_info()->addr_limit.seg)) +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < user_addr_max()) /* * Test whether a block of memory is a valid user space address. @@ -47,14 +46,14 @@ * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ -#define __range_not_ok(addr, size) \ +#define __range_not_ok(addr, size, limit) \ ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ : "=&r" (flag), "=r" (roksum) \ : "1" (addr), "g" ((long)(size)), \ - "rm" (current_thread_info()->addr_limit.seg)); \ + "rm" (limit)); \ flag; \ }) @@ -77,7 +76,8 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ + (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) /* * The exception table consists of pairs of addresses relative to the diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e78bc256aea..c4706cf9c01 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1757,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); } +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + return (__range_not_ok(fp, size, TASK_SIZE) == 0); +} + #ifdef CONFIG_COMPAT #include @@ -1781,6 +1787,9 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; + if (!valid_user_frame(fp, sizeof(frame))) + break; + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } @@ -1824,6 +1833,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; + if (!valid_user_frame(fp, sizeof(frame))) + break; + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } -- cgit v1.2.3-70-g09d2 From db0dc75d6403b6663c0eab4c6ccb672eb9b2ed72 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:36 -0700 Subject: perf/x86: Check user address explicitly in copy_from_user_nmi() Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-5-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f61ee67ec00..677b1ed184c 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -8,6 +8,7 @@ #include #include +#include /* * best effort, GUP based copy_from_user() that is NMI-safe @@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) void *map; int ret; + if (__range_not_ok(from, n, TASK_SIZE) == 0) + return len; + do { ret = __get_user_pages_fast(addr, 1, 0, &page); if (!ret) -- cgit v1.2.3-70-g09d2 From 743628e868c5992354fc80b4d1e9a6143da1c0e6 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 7 Jun 2012 09:05:21 -0700 Subject: x86, efi stub: Add .reloc section back into image Some UEFI firmware will not load a .efi with a .reloc section with a size of 0. Therefore, we create a .efi image with 4 main areas and 3 sections. 1. PE/COFF file header 2. .setup section (covers all setup code following the first sector) 3. .reloc section (contains 1 dummy reloc entry, created in build.c) 4. .text section (covers the remaining kernel image) To make room for the new .setup section data, the header bugger_off_msg had to be shortened. Reported-by: Henrik Rydberg Signed-off-by: Jordan Justen Link: http://lkml.kernel.org/r/1339085121-12760-1-git-send-email-jordan.l.justen@intel.com Tested-by: Lee G Rosenbaum Tested-by: Henrik Rydberg Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/header.S | 42 ++++++++--- arch/x86/boot/tools/build.c | 172 ++++++++++++++++++++++++++++---------------- 2 files changed, 140 insertions(+), 74 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 8bbea6aa40d..efe5acfc79c 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -94,10 +94,10 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct booting from floppy is no longer supported.\r\n" - .ascii "Please use a boot loader program instead.\r\n" + .ascii "Direct floppy boot is not supported. " + .ascii "Use a boot loader program instead.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot . . .\r\n" + .ascii "Remove disk and press any key to reboot ...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -111,7 +111,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 2 # nr_sections + .word 3 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -158,8 +158,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x1000 # SectionAlignment - .long 0x200 # FileAlignment + .long 0x20 # SectionAlignment + .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word 0 # MajorImageVersion @@ -200,8 +200,10 @@ extra_header_fields: # Section table section_table: - .ascii ".text" - .byte 0 + # + # The offset & size fields are filled in by build.c. + # + .ascii ".setup" .byte 0 .byte 0 .long 0 @@ -217,9 +219,8 @@ section_table: # # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. But since - # we don't need the loader to fixup any relocs for us, we - # just create an empty (zero-length) .reloc section header. + # because EFI applications must be relocatable. The .reloc + # offset & size fields are filled in by build.c. # .ascii ".reloc" .byte 0 @@ -233,6 +234,25 @@ section_table: .word 0 # NumberOfRelocations .word 0 # NumberOfLineNumbers .long 0x42100040 # Characteristics (section flags) + + # + # The offset & size fields are filled in by build.c. + # + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 3f61f6e2b46..4b8e165ee57 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,6 +50,8 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; int is_big_kernel; +#define PECOFF_RELOC_RESERVE 0x20 + /*----------------------------------------------------------------------*/ static const u32 crctab32[] = { @@ -133,11 +135,103 @@ static void usage(void) die("Usage: build setup system [> image]"); } -int main(int argc, char ** argv) -{ #ifdef CONFIG_EFI_STUB - unsigned int file_sz, pe_header; + +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + unsigned int pe_header; + unsigned short num_sections; + u8 *section; + + pe_header = get_unaligned_le32(&buf[0x3c]); + num_sections = get_unaligned_le16(&buf[pe_header + 6]); + +#ifdef CONFIG_X86_32 + section = &buf[pe_header + 0xa8]; +#else + section = &buf[pe_header + 0xb8]; #endif + + while (num_sections > 0) { + if (strncmp((char*)section, section_name, 8) == 0) { + /* section header size field */ + put_unaligned_le32(size, section + 0x8); + + /* section header vma field */ + put_unaligned_le32(offset, section + 0xc); + + /* section header 'size of initialised data' field */ + put_unaligned_le32(size, section + 0x10); + + /* section header 'file offset' field */ + put_unaligned_le32(offset, section + 0x14); + + break; + } + section += 0x28; + num_sections--; + } +} + +static void update_pecoff_setup_and_reloc(unsigned int size) +{ + u32 setup_offset = 0x200; + u32 reloc_offset = size - PECOFF_RELOC_RESERVE; + u32 setup_size = reloc_offset - setup_offset; + + update_pecoff_section_header(".setup", setup_offset, setup_size); + update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); + + /* + * Modify .reloc section contents with a single entry. The + * relocation is applied to offset 10 of the relocation section. + */ + put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); + put_unaligned_le32(10, &buf[reloc_offset + 4]); +} + +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) +{ + unsigned int pe_header; + unsigned int text_sz = file_sz - text_start; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of image */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); + + /* + * Size of code: Subtract the size of the first sector (512 bytes) + * which includes the header. + */ + put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); + +#ifdef CONFIG_X86_32 + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation + */ + put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]); +#endif /* CONFIG_X86_32 */ + + update_pecoff_section_header(".text", text_start, text_sz); +} + +#endif /* CONFIG_EFI_STUB */ + +int main(int argc, char ** argv) +{ unsigned int i, sz, setup_sectors; int c; u32 sys_size; @@ -163,6 +257,12 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); +#ifdef CONFIG_EFI_STUB + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + c += PECOFF_RELOC_RESERVE; +#endif + /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; if (setup_sectors < SETUP_SECT_MIN) @@ -170,6 +270,10 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); +#ifdef CONFIG_EFI_STUB + update_pecoff_setup_and_reloc(i); +#endif + /* Set the default root device */ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); @@ -194,66 +298,8 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - file_sz = sz + i + ((sys_size * 16) - sz); - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - - /* - * Subtract the size of the first section (512 bytes) which - * includes the header and .reloc section. The remaining size - * is that of the .text section. - */ - file_sz -= 512; - - /* Size of code */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); - -#ifdef CONFIG_X86_32 - /* - * Address of entry point. - * - * The EFI stub entry point is +16 bytes from the start of - * the .text section. - */ - put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xb4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xbc]); -#else - /* - * Address of entry point. startup_32 is at the beginning and - * the 64-bit entry point (startup_64) is always 512 bytes - * after. The EFI stub entry point is 16 bytes after that, as - * the first instruction allows legacy loaders to jump over - * the EFI stub initialisation - */ - put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xc4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xcc]); -#endif /* CONFIG_X86_32 */ -#endif /* CONFIG_EFI_STUB */ + update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); +#endif crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) -- cgit v1.2.3-70-g09d2 From bd2753b2dda7bb43c7468826de75f49c6a7e8965 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 6 Jun 2012 10:55:40 -0700 Subject: x86/mm: Only add extra pages count for the first memory range during pre-allocation early page table space Robin found this regression: | I just tried to boot an 8TB system. It fails very early in boot with: | Kernel panic - not syncing: Cannot find space for the kernel page tables git bisect commit 722bc6b16771ed80871e1fd81c86d3627dda2ac8. A git revert of that commit does boot past that point on the 8TB configuration. That commit will add up extra pages for all memory range even above 4g. Try to limit that extra page count adding to first entry only. Bisected-by: Robin Holt Tested-by: Robin Holt Signed-off-by: Yinghai Lu Cc: WANG Cong Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/CAE9FiQUj3wyzQxtq9yzBNc9u220p8JZ1FYHG7t%3DMOzJ%3D9BZMYA@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 97141c26a13..bc4e9d84157 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en extra += PMD_SIZE; #endif /* The first 2/4M doesn't use large pages. */ - extra += mr->end - mr->start; + if (mr->start < PMD_SIZE) + extra += mr->end - mr->start; ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else -- cgit v1.2.3-70-g09d2 From d5d2d2eea84b0d8450b082edbc3dbde41fb8bfd8 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 7 Jun 2012 08:31:40 -0500 Subject: x86/uv: Fix UV2 BAU legacy mode The SGI Altix UV2 BAU (Broadcast Assist Unit) as used for tlb-shootdown (selective broadcast mode) always uses UV2 broadcast descriptor format. There is no need to clear the 'legacy' (UV1) mode, because the hardware always uses UV2 mode for selective broadcast. But the BIOS uses general broadcast and legacy mode, and the hardware pays attention to the legacy mode bit for general broadcast. So the kernel must not clear that mode bit. Signed-off-by: Cliff Wickman Cc: Link: http://lkml.kernel.org/r/E1SccoO-0002Lh-Cb@eag09.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 - arch/x86/platform/uv/tlb_uv.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index becf47b8173..6149b476d9d 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -149,7 +149,6 @@ /* 4 bits of software ack period */ #define UV2_ACK_MASK 0x7UL #define UV2_ACK_UNITS_SHFT 3 -#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT /* diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3ae0e61abd2..59880afa851 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image &= ~(1L << UV2_LEG_SHFT); mmr_image |= (1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); -- cgit v1.2.3-70-g09d2 From eeaaa96a3a2134a174100afd129bb0891d05f4b2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 6 Jun 2012 10:05:42 -0400 Subject: x86/nmi: Fix section mismatch warnings on 32-bit It was reported that compiling for 32-bit caused a bunch of section mismatch warnings: VDSOSYM arch/x86/vdso/vdso32-syms.lds LD arch/x86/vdso/built-in.o LD arch/x86/built-in.o WARNING: arch/x86/built-in.o(.data+0x5af0): Section mismatch in reference from the variable test_nmi_ipi_callback_na.10451 to the function .init.text:test_nmi_ipi_callback() [...] WARNING: arch/x86/built-in.o(.data+0x5b04): Section mismatch in reference from the variable nmi_unk_cb_na.10399 to the function .init.text:nmi_unk_cb() The variable nmi_unk_cb_na.10399 references the function __init nmi_unk_cb() [...] Both of these are attributed to the internal representation of the nmiaction struct created during register_nmi_handler. The reason for this is that those structs are not defined in the init section whereas the rest of the code in nmi_selftest.c is. To resolve this, I created a new #define, register_nmi_handler_initonly, that tags the struct as __initdata to resolve the mismatch. This #define should only be used in rare situations where the register/unregister is called during init of the kernel. Big thanks to Jan Beulich for decoding this for me as I didn't have a clue what was going on. Reported-by: Witold Baryluk Tested-by: Witold Baryluk Cc: Jan Beulich Signed-off-by: Don Zickus Link: http://lkml.kernel.org/r/1338991542-23000-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 14 ++++++++++++++ arch/x86/kernel/nmi_selftest.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 0e3793b821e..dc580c42851 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -54,6 +54,20 @@ struct nmiaction { __register_nmi_handler((t), &fn##_na); \ }) +/* + * For special handlers that register/unregister in the + * init section only. This should be considered rare. + */ +#define register_nmi_handler_initonly(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na __initdata = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index e31bf8d5c4d..149b8d9c6ad 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); } static void __init cleanup_nmi_testsuite(void) @@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, NMI_FLAG_FIRST, "nmi_selftest")) { nmi_fail = FAILURE; return; -- cgit v1.2.3-70-g09d2 From e32025a56403df4386cd61a741c0a36afe79ae8a Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 11 Jun 2012 23:18:33 -0300 Subject: x86: kvmclock: remove check_and_clear_guest_paused warning CPU offline path calls the hrtimer interrupt handler with interrupts disabled, without touching preempt_count, triggering this warning. Remove the warning since it is supposed to be used from hrtimer interrupt context only. Signed-off-by: Marcelo Tosatti --- arch/x86/kernel/kvmclock.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 086eb58c6e8..f1b42b3a186 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void) bool ret = false; struct pvclock_vcpu_time_info *src; - /* - * per_cpu() is safe here because this function is only called from - * timer functions where preemption is already disabled. - */ - WARN_ON(!in_atomic()); src = &__get_cpu_var(hv_clock); if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); -- cgit v1.2.3-70-g09d2