From 6ec3cfeca04622e3d80c9270191cd7f5f88214af Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Mon, 13 Apr 2009 15:20:58 -0700 Subject: x86, irq: Remove IRQ_DISABLED check in process context IRQ move As discussed in the thread here: http://marc.info/?l=linux-kernel&m=123964468521142&w=2 Eric W. Biederman observed: > It looks like some additional bugs have slipped in since last I looked. > > set_irq_affinity does this: > ifdef CONFIG_GENERIC_PENDING_IRQ > if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { > cpumask_copy(desc->affinity, cpumask); > desc->chip->set_affinity(irq, cpumask); > } else { > desc->status |= IRQ_MOVE_PENDING; > cpumask_copy(desc->pending_mask, cpumask); > } > #else > > That IRQ_DISABLED case is a software state and as such it has nothing to > do with how safe it is to move an irq in process context. [...] > > The only reason we migrate MSIs in interrupt context today is that there > wasn't infrastructure for support migration both in interrupt context > and outside of it. Yes. The idea here was to force the MSI migration to happen in process context. One of the patches in the series did disable_irq(dev->irq); irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); enable_irq(dev->irq); with the above patch adding irq/manage code check for interrupt disabled and moving the interrupt in process context. IIRC, there was no IRQ_MOVE_PCNTXT when we were developing this HPET code and we ended up having this ugly hack. IRQ_MOVE_PCNTXT was there when we eventually submitted the patch upstream. But, looks like I did a blind rebasing instead of using IRQ_MOVE_PCNTXT in hpet MSI code. Below patch fixes this. i.e., revert commit 932775a4ab622e3c99bd59f14cc and add PCNTXT to HPET MSI setup. Also removes copying of desc->affinity in generic code as set_affinity routines are doing it internally. Reported-by: "Eric W. Biederman" Signed-off-by: Venkatesh Pallipadi Acked-by: "Eric W. Biederman" Cc: "Li Shaohua" Cc: Gary Hade Cc: "lcm@us.ibm.com" Cc: suresh.b.siddha@intel.com LKML-Reference: <20090413222058.GB8211@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a2789e42e16..30da617d18e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3670,12 +3670,14 @@ int arch_setup_hpet_msi(unsigned int irq) { int ret; struct msi_msg msg; + struct irq_desc *desc = irq_to_desc(irq); ret = msi_compose_msg(NULL, irq, &msg); if (ret < 0) return ret; hpet_msi_write(irq, &msg); + desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, "edge"); -- cgit v1.2.3-70-g09d2 From ca713c2ab0eea3458962983e4a7e13430ea479b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 15 Apr 2009 18:39:13 -0700 Subject: x86/irq: mark NUMA_MIGRATE_IRQ_DESC broken It causes crash on system with lots of cards with MSI-X when irq_balancer enabled... The patches fixing it were both complex and fragile, according to Eric they were also doing quite dangerous things to the hardware. Instead we now have patches that solve this problem via static NUMA node mappings - not dynamic allocation and balancing. The patches are much simpler than this method but are still too large outside of the merge window, so we mark the dynamic balancer as broken for now, and queue up the new approach for v2.6.31. [ Impact: deactivate broken kernel feature ] Reported-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49E68C41.4020801@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc25b9f5e4c..b1d2be7d91a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -277,6 +277,7 @@ config SPARSE_IRQ config NUMA_MIGRATE_IRQ_DESC bool "Move irq desc when changing irq smp_affinity" depends on SPARSE_IRQ && NUMA + depends on BROKEN default n ---help--- This enables moving irq_desc to cpu/node that irq will use handled. -- cgit v1.2.3-70-g09d2 From dc098551918093901d8ac8936e9d1a1b891b56ed Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 17 Apr 2009 09:22:42 -0500 Subject: x86/uv: fix init of memory-less nodes Add support for nodes that have cpus but no memory. The current code was failing to add these nodes to the nodes_present_map. v2: Fixes case caught by David Rientjes - missed support for the x2apic SRAT table. [ Impact: fix potential boot crash on memory-less UV nodes. ] Reported-by: David Rientjes Signed-off-by: Jack Steiner LKML-Reference: <20090417142242.GA23743@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index c7d272b8574..33c5fa57e43 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -28,6 +28,7 @@ int acpi_numa __initdata; static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; +static nodemask_t cpu_nodes_parsed __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; @@ -141,6 +142,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apic_id = pa->apic_id; apicid_to_node[apic_id] = node; + node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", pxm, apic_id, node); @@ -174,6 +176,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) else apic_id = pa->apic_id; apicid_to_node[apic_id] = node; + node_set(node, cpu_nodes_parsed); acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", pxm, apic_id, node); @@ -402,7 +405,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - node_possible_map = nodes_parsed; + /* Account for nodes with cpus and no memory */ + nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); /* Finally register nodes */ for_each_node_mask(i, node_possible_map) -- cgit v1.2.3-70-g09d2 From 27229ca63269c1be0a710f074fa9de4024f283f1 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 17 Apr 2009 09:24:47 -0500 Subject: x86/uv: fix init of cpu-less nodes Fix an endcase in the UV initialization code for the "UV large system mode" of apicids. If node zero contains no cpus, cpus on another node will be the boot cpu. The percpu data that contains the extra apicid bits was not being initialized early enough. [ Impact: fix potential boot crash on cpu-less UV nodes ] Signed-off-by: Jack Steiner LKML-Reference: <20090417142447.GA23759@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1248318436e..49d2f5c739b 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,17 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; +static int early_get_nodeid(void) +{ + union uvh_node_id_u node_id; + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); + node_id.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + return node_id.s.node_id; +} + static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { @@ -42,6 +54,8 @@ static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) else if (!strcmp(oem_table_id, "UVX")) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { + __get_cpu_var(x2apic_extra_bits) = + early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3-70-g09d2 From 1648e4f805063137b55b869666e936ffd4419cdf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Apr 2009 10:46:37 -0700 Subject: x86, kbuild: make "make install" not depend on vmlinux It is common to use "make install" in restricted environments which differ from the one which was actually used to build the kernel. In such environments it is highly undesirable to trigger a rebuild of any part of the system. Worse, the rebuild may be spurious, triggered by differences in the environment. Signed-off-by: H. Peter Anvin Cc: Sam Ravnborg LKML-Reference: <20090415234642.GA28531@uranus.ravnborg.org> --- arch/x86/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f05d8c91d9e..8c86b72afdc 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -153,7 +153,7 @@ endif boot := arch/x86/boot -BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install +BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage PHONY += bzImage $(BOOT_TARGETS) @@ -171,6 +171,10 @@ bzImage: vmlinux $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ +PHONY += install +install: + $(Q)$(MAKE) $(build)=$(boot) $@ + PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/x86/vdso $@ -- cgit v1.2.3-70-g09d2 From a81b6314e0aa480b8ac6dd02779d44cd0bee0a34 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 17 Apr 2009 23:31:20 +0530 Subject: x86: mm/numa_32.c calculate_numa_remap_pages should use __init calculate_numa_remap_pages() is called only by __init initmem_init() further calculate_numa_remap_pages is calling: __init find_e820_area() and __init reserve_early() So calculate_numa_remap_pages() should be __init calculate_numa_remap_pages(). WARNING: arch/x86/built-in.o(.text+0x82ea3): Section mismatch in reference from the function calculate_numa_remap_pages() to the function .init.text:find_e820_area() The function calculate_numa_remap_pages() references the function __init find_e820_area(). This is often because calculate_numa_remap_pages lacks a __init annotation or the annotation of find_e820_area is wrong. WARNING: arch/x86/built-in.o(.text+0x82f5f): Section mismatch in reference from the function calculate_numa_remap_pages() to the function .init.text:reserve_early() The function calculate_numa_remap_pages() references the function __init reserve_early(). This is often because calculate_numa_remap_pages lacks a __init annotation or the annotation of reserve_early is wrong. [ Impact: save memory, address Section mismatch warning ] Signed-off-by: Jaswinder Singh Rajput Cc: Sam Ravnborg LKML-Reference: <1239991281.3153.4.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 3daefa04ace..d2530062fe0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -257,7 +257,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) } #endif -static unsigned long calculate_numa_remap_pages(void) +static __init unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; -- cgit v1.2.3-70-g09d2 From 0300e7f1a525ae4e4ac05344624adf0e5f13ea52 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 08:33:52 -0400 Subject: lockdep, x86: account for irqs enabled in paranoid_exit I hit the check_flags error of lockdep: WARNING: at kernel/lockdep.c:2893 check_flags+0x1a7/0x1d0() [...] hardirqs last enabled at (12567): [] local_bh_enable+0xaa/0x110 hardirqs last disabled at (12569): [] int3+0x16/0x40 softirqs last enabled at (12566): [] lock_sock_nested+0xfb/0x110 softirqs last disabled at (12568): [] tcp_prequeue_process+0x2e/0xa0 The check_flags warning of lockdep tells me that lockdep thought interrupts were disabled, but they were really enabled. The numbers in the above parenthesis show the order of events: 12566: softirqs last enabled: lock_sock_nested 12567: hardirqs last enabled: local_bh_enable 12568: softirqs last disabled: tcp_prequeue_process 12566: hardirqs last disabled: int3 int3 is a breakpoint! Examining this further, I have CONFIG_NET_TCPPROBE enabled which adds break points into the kernel. The paranoid_exit of the return of int3 does not account for enabling interrupts on return to kernel. This code is a bit tricky since it is also used by the nmi handler (when lockdep is off), and we must be careful about the swapgs. We can not call kernel code after the swapgs has been performed. [ Impact: fix lockdep check_flags warning + self-turn-off ] Acked-by: Peter Zijlsta Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a331ec38af9..38946c6e843 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1410,7 +1410,10 @@ ENTRY(paranoid_exit) paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + jmp irq_return paranoid_restore: + TRACE_IRQS_IRETQ 0 RESTORE_ALL 8 jmp irq_return paranoid_userspace: -- cgit v1.2.3-70-g09d2 From 093f13e23137b9e5f7629dd5932ceea1419e2b61 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 15 Apr 2009 10:37:33 -0700 Subject: x86, acpi_cpufreq: Fix the NULL pointer dereference in get_measured_perf Fix for a regression that was introduced by earlier commit 18b2646fe3babeb40b34a0c1751e0bf5adfdc64c on Mon Apr 6 11:26:08 2009 Regression resulted in the below error happened on systems with software coordination where per_cpu acpi data will not be initiated for secondary CPUs in a P-state domain. On Tue, 2009-04-14 at 23:01 -0700, Zhang, Yanmin wrote: My machine hanged with kernel 2.6.30-rc2 when script read > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor. > > opps happens in get_measured_perf: > > cur.aperf.whole = readin.aperf.whole - > per_cpu(drv_data, cpu)->saved_aperf; > > Because per_cpu(drv_data, cpu)=NULL. > > So function get_measured_perf should check if (per_cpu(drv_data, > cpu)==NULL) > and return 0 if it's NULL. --------------sys log------------------ BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] get_measured_perf+0x4a/0xf9 PGD a7dd88067 PUD a7ccf5067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor CPU 0 Modules linked in: video output Pid: 2091, comm: kondemand/0 Not tainted 2.6.30-rc2 #1 MP Server RIP: 0010:[] [] get_measured_perf+0x4a/0xf9 RSP: 0018:ffff880a7d56de20 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000046241a42b6 RCX: ffff88004d219000 RDX: 000000000000b660 RSI: 0000000000000020 RDI: 0000000000000001 RBP: ffff880a7f052000 R08: 00000046241a42b6 R09: ffffffff807639f0 R10: 00000000ffffffea R11: ffffffff802207f4 R12: ffff880a7f052000 R13: ffff88004d20e460 R14: 0000000000ddd5a6 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88004d200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000020 CR3: 0000000a7f1bf000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kondemand/0 (pid: 2091, threadinfo ffff880a7d56c000, task ffff880a7d4d18c0) Stack: ffff880a7f052078 ffffffff803efd54 00000046241a42b6 000000462ffa9e95 0000000000000001 0000000000000001 00000000ffffffea ffffffff8064f41a 0000000000000012 0000000000000012 ffff880a7f052000 ffffffff80650547 Call Trace: [] ? kobject_get+0x12/0x17 [] ? __cpufreq_driver_getavg+0x42/0x57 [] ? do_dbs_timer+0x147/0x272 [] ? do_dbs_timer+0x0/0x272 [] ? worker_thread+0x15b/0x1f5 [] ? autoremove_wake_function+0x0/0x2e [] ? worker_thread+0x0/0x1f5 [] ? kthread+0x54/0x83 [] ? child_rip+0xa/0x20 [] ? kthread+0x0/0x83 [] ? child_rip+0x0/0x20 Code: 99 a6 03 00 31 c9 85 c0 0f 85 c3 00 00 00 89 df 4c 8b 44 24 10 48 c7 c2 60 b6 00 00 48 8b 0c fd e0 30 a5 80 4c 89 c3 48 8b 04 0a <48> 2b 58 20 48 8b 44 24 18 48 89 1c 24 48 8b 34 0a 48 2b 46 28 RIP [] get_measured_perf+0x4a/0xf9 RSP CR2: 0000000000000020 ---[ end trace 2b8fac9a49e19ad4 ]--- Tested-by: "Zhang, Yanmin" Signed-off-by: Venkatesh Pallipadi Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ecdb682ab51..dd0bd76d14c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,11 +68,16 @@ struct acpi_cpufreq_data { unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; - u64 saved_aperf, saved_mperf; }; static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +struct acpi_msr_data { + u64 saved_aperf, saved_mperf; +}; + +static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); + DEFINE_TRACE(power_mark); /* acpi_perf_data is a pointer to percpu data. */ @@ -287,11 +292,11 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, return 0; cur.aperf.whole = readin.aperf.whole - - per_cpu(drv_data, cpu)->saved_aperf; + per_cpu(msr_data, cpu).saved_aperf; cur.mperf.whole = readin.mperf.whole - - per_cpu(drv_data, cpu)->saved_mperf; - per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole; - per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole; + per_cpu(msr_data, cpu).saved_mperf; + per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; + per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; #ifdef __i386__ /* -- cgit v1.2.3-70-g09d2 From e0e8c4e512e92bc25c19bd8d4926de17d2f8fbf2 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 17 Apr 2009 16:22:06 +0200 Subject: acpi-cpufreq: Cleanup: Use printk_once Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd0bd76d14c..4eab747a896 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -693,13 +693,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { - static int print_once; policy->cpuinfo.transition_latency = 20 * 1000; - if (!print_once) { - print_once = 1; - printk(KERN_INFO "Capping off P-state tranision latency" - " at 20 uS\n"); - } + printk_once(KERN_INFO "Capping off P-state tranision" + " latency at 20 uS\n"); } data->max_freq = perf->states[0].core_frequency * 1000; -- cgit v1.2.3-70-g09d2 From d91758f5ddb80e91176fa2cf80c88c1633950b3d Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 17 Apr 2009 16:22:07 +0200 Subject: acpi-cpufreq: style-only: add parens to math expression Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4eab747a896..aec3161abed 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -340,7 +340,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, #endif - retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; + retval = (per_cpu(drv_data, policy->cpu)->max_freq * perf_percent) / 100; return retval; } -- cgit v1.2.3-70-g09d2 From d876dfbbf5c8728102fb4f683450fa9ae3259cda Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 17 Apr 2009 16:22:08 +0200 Subject: acpi-cpufreq: Do not let get_measured perf depend on internal variable Take already available policy->cpuinfo.max_freq and get rid of acpi-cpufreq specific max_freq variable. This implies that P0 is always the highest frequency which should always be true as ACPI spec says: As a result, the zeroth entry describes the highest performance state Signed-off-by: Thomas Renninger Acked-by: Venkatesh Pallipadi Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index aec3161abed..208ecf6643d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -65,7 +65,6 @@ enum { struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; }; @@ -340,7 +339,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, #endif - retval = (per_cpu(drv_data, policy->cpu)->max_freq * perf_percent) / 100; + retval = (policy->cpuinfo.max_freq * perf_percent) / 100; return retval; } @@ -698,7 +697,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) " latency at 20 uS\n"); } - data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= @@ -717,6 +715,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (result) goto err_freqfree; + if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) + printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); + switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: /* Current speed is unknown and not detectable by IO port */ -- cgit v1.2.3-70-g09d2 From fc61e6636d13eb3a23eb29b4327eeee9de0ef3bc Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 20 Apr 2009 08:25:31 -0500 Subject: x86/uv: fix for no memory at paddr 0 Fix endcase where the memory at physical address 0 does not really exist AND one of the sockets on blade 0 has no active cpus. The memory that _appears_ to be at physical address 0 is actually memory that located at a different address but has been remapped by the chipset so that it appears to be at physical address 0. When determining the UV pnode, the algorithm for determining the pnode incorrectly used the relocated physical address instead of the actual (global) address. [ Impact: boot failure on partitioned systems ] Signed-off-by: Jack Steiner LKML-Reference: <20090420132530.GA23156@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d6712334ce4..2bda6935297 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -652,6 +652,7 @@ void __init uv_system_init(void) if (uv_node_to_blade[nid] >= 0) continue; paddr = node_start_pfn(nid) << PAGE_SHIFT; + paddr = uv_soc_phys_ram_to_gpa(paddr); pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; -- cgit v1.2.3-70-g09d2 From 06c38d5e36b12d040839ff224e805146c0368556 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 9 Apr 2009 15:24:34 -0700 Subject: x86-64: fix FPU corruption with signals and preemption In 64bit signal delivery path, clear_used_math() was happening before saving the current active FPU state on to the user stack for signal handling. Between clear_used_math() and the state store on to the user stack, potentially we can get a page fault for the user address and can block. Infact, while testing we were hitting the might_fault() in __clear_user() which can do a schedule(). At a later point in time, we will schedule back into this process and resume the save state (using "xsave/fxsave" instruction) which can lead to DNA fault. And as used_math was cleared before, we will reinit the FP state in the DNA fault and continue. This reinit will result in loosing the FPU state of the process. Move clear_used_math() to a point after the FPU state has been stored onto the user stack. This issue is present from a long time (even before the xsave changes and the x86 merge). But it can easily be exposed in 2.6.28.x and 2.6.29.x series because of the __clear_user() in this path, which has an explicit __cond_resched() leading to a context switch with CONFIG_PREEMPT_VOLUNTARY. [ Impact: fix FPU state corruption ] Signed-off-by: Suresh Siddha Cc: [2.6.28.x, 2.6.29.x] Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 0a5b04aa98f..c5ee17e8c6d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -89,7 +89,7 @@ int save_i387_xstate(void __user *buf) if (!used_math()) return 0; - clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { /* * Start with clearing the user buffer. This will present a @@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf) return -1; } + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_XSAVE) { struct _fpstate __user *fx = buf; struct _xstate __user *x = buf; -- cgit v1.2.3-70-g09d2 From 0112fc2229847feb6c4eb011e6833d8f1742a375 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Apr 2009 20:05:42 +0400 Subject: Separate out common fstatat code into vfs_fstatat This is a version incorporating Christoph's suggestion. Separate out common *fstatat functionality into a single function instead of duplicating it all over the code. Signed-off-by: Oleg Drokin Signed-off-by: Al Viro --- arch/arm/kernel/sys_oabi-compat.c | 19 ++++--------- arch/s390/kernel/compat_linux.c | 18 ++++--------- arch/sparc/kernel/sys_sparc32.c | 19 ++++--------- arch/x86/ia32/sys_ia32.c | 19 ++++--------- fs/compat.c | 19 ++++--------- fs/stat.c | 56 +++++++++++++++++++-------------------- include/linux/fs.h | 1 + 7 files changed, 54 insertions(+), 97 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index e04173c7e62..d59a0cd537f 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -177,21 +177,12 @@ asmlinkage long sys_oabi_fstatat64(int dfd, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_oldabi_stat64(&stat, statbuf); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_oldabi_stat64(&stat, statbuf); } struct oabi_flock64 { diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 6cc87d8c868..002c70d3cb7 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -702,20 +702,12 @@ asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, struct stat64_emu31 __user* statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); + int error; - if (!error) - error = cp_stat64(statbuf, &stat); -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e800503879e..f5000a460c0 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -206,21 +206,12 @@ asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, struct compat_stat64 __user * statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat64(&stat, statbuf); } asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index efac92fd1ef..085a8c35f14 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -129,21 +129,12 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, struct stat64 __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_stat64(statbuf, &stat); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f1588..dda72e26709 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -204,21 +204,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, struct compat_stat __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #endif diff --git a/fs/stat.c b/fs/stat.c index 2db740a0cfb..54711662b85 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -109,6 +109,24 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) EXPORT_SYMBOL(vfs_fstat); +int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) +{ + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, stat); + else + error = vfs_stat_fd(dfd, filename, stat); +out: + return error; +} + +EXPORT_SYMBOL(vfs_fstatat); + + #ifdef __ARCH_WANT_OLD_STAT /* @@ -264,21 +282,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -404,21 +413,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index e766be0d432..257f4d37ad2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2302,6 +2302,7 @@ extern int vfs_lstat(char __user *, struct kstat *); extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); +extern int vfs_fstatat(int , char __user *, struct kstat *, int); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -- cgit v1.2.3-70-g09d2 From 2f537a9f8e82f55c241b002c8cfbf34303b45ada Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 21 Apr 2009 16:00:15 +0930 Subject: x86: fix boot crash in NMI watchdog with CONFIG_CPUMASK_OFFSTACK=y and flat APIC fcef8576d8a64fc603e719c97d423f9f6d4e0e8b converted backtrace_mask to a cpumask_var_t, and assumed check_nmi_watchdog was called before nmi_watchdog_tick was ever called. Steven's oops shows I was wrong. This is something of a bandaid: I'm not sure we *should* be calling nmi_watchdog_tick before check_nmi_watchdog. Note that gcc eliminates this test for the CONFIG_CPUMASK_OFFSTACK=n case. [ Impact: fix boot crash in rare configs ] Reported-by: Steven Rostedt Signed-off-by: Rusty Russell LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/nmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index d6bd6240715..2ba52f35a88 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -414,7 +414,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } - if (cpumask_test_cpu(cpu, backtrace_mask)) { + /* We can be called before check_nmi_watchdog, hence NULL check. */ + if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); -- cgit v1.2.3-70-g09d2 From fcc5c4a2feea3886dc058498b28508b2731720d5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 21 Apr 2009 16:03:41 +0930 Subject: x86: avoid theoretical spurious NMI backtraces with CONFIG_CPUMASK_OFFSTACK=y In theory (though not shown in practice) alloc_cpumask_var() doesn't zero memory, so CPUs might print an "NMI backtrace for cpu %d" once on boot. (Bug introduced in fcef8576d8a64fc603e719c97d423f9f6d4e0e8b). [ Impact: avoid theoretical syslog noise in rare configs ] Signed-off-by: Rusty Russell Cc: Steven Rostedt LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 2ba52f35a88..ce4fbfa315a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -138,7 +138,7 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL); + alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP -- cgit v1.2.3-70-g09d2 From 8e19608e8b5c001e4a66ce482edc474f05fb7355 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 21 Apr 2009 12:24:00 -0700 Subject: clocksource: pass clocksource to read() callback Pass clocksource pointer to the read() callback for clocksources. This allows us to share the callback between multiple instances. [hugh@veritas.com: fix powerpc build of clocksource pass clocksource mods] [akpm@linux-foundation.org: cleanup] Signed-off-by: Magnus Damm Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-at91/at91rm9200_time.c | 2 +- arch/arm/mach-at91/at91sam926x_time.c | 2 +- arch/arm/mach-davinci/time.c | 2 +- arch/arm/mach-imx/time.c | 2 +- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-msm/timer.c | 4 ++-- arch/arm/mach-netx/time.c | 2 +- arch/arm/mach-ns9xxx/time-ns9360.c | 2 +- arch/arm/mach-omap1/time.c | 2 +- arch/arm/mach-omap2/timer-gp.c | 2 +- arch/arm/mach-pxa/time.c | 2 +- arch/arm/mach-realview/core.c | 2 +- arch/arm/mach-versatile/core.c | 2 +- arch/arm/plat-mxc/time.c | 2 +- arch/arm/plat-omap/common.c | 4 ++-- arch/arm/plat-orion/time.c | 2 +- arch/avr32/kernel/time.c | 2 +- arch/blackfin/kernel/time-ts.c | 12 ++++++------ arch/ia64/kernel/cyclone.c | 2 +- arch/ia64/kernel/time.c | 4 ++-- arch/ia64/sn/kernel/sn2/timer.c | 2 +- arch/m68knommu/platform/68328/timers.c | 2 +- arch/m68knommu/platform/coldfire/dma_timer.c | 2 +- arch/m68knommu/platform/coldfire/pit.c | 2 +- arch/m68knommu/platform/coldfire/timers.c | 2 +- arch/mips/kernel/cevt-txx9.c | 2 +- arch/mips/kernel/csrc-bcm1480.c | 2 +- arch/mips/kernel/csrc-ioasic.c | 6 +++--- arch/mips/kernel/csrc-r4k.c | 2 +- arch/mips/kernel/csrc-sb1250.c | 2 +- arch/mips/kernel/i8253.c | 2 +- arch/mips/nxp/pnx8550/common/time.c | 2 +- arch/mips/sgi-ip27/ip27-timer.c | 2 +- arch/powerpc/kernel/time.c | 8 ++++---- arch/s390/kernel/time.c | 2 +- arch/sh/kernel/time_32.c | 2 +- arch/sh/kernel/timers/timer-tmu.c | 2 +- arch/sparc/kernel/time_64.c | 7 ++++++- arch/um/kernel/time.c | 2 +- arch/x86/kernel/hpet.c | 6 +++--- arch/x86/kernel/i8253.c | 2 +- arch/x86/kernel/kvmclock.c | 7 ++++++- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/time.c | 7 ++++++- drivers/char/hpet.c | 2 +- drivers/clocksource/acpi_pm.c | 12 ++++++------ drivers/clocksource/cyclone.c | 2 +- drivers/clocksource/scx200_hrt.c | 2 +- drivers/clocksource/tcb_clksrc.c | 2 +- include/linux/clocksource.h | 6 +++--- kernel/time/clocksource.c | 8 ++++---- kernel/time/jiffies.c | 2 +- 54 files changed, 94 insertions(+), 79 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 1ff1bda0a89..309f3511aa2 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -85,7 +85,7 @@ static struct irqaction at91rm9200_timer_irq = { .handler = at91rm9200_timer_interrupt }; -static cycle_t read_clk32k(void) +static cycle_t read_clk32k(struct clocksource *cs) { return read_CRTR(); } diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index b63e1d5f1ba..4bd56aee437 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -31,7 +31,7 @@ static u32 pit_cnt; /* access only w/system irq blocked */ * Clocksource: just a monotonic counter of MCK/16 cycles. * We don't care whether or not PIT irqs are enabled. */ -static cycle_t read_pit_clk(void) +static cycle_t read_pit_clk(struct clocksource *cs) { unsigned long flags; u32 elapsed; diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index f8bcd29d17a..6c227d4ba99 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -238,7 +238,7 @@ static void __init timer_init(void) /* * clocksource */ -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { struct timer_s *t = &timers[TID_CLOCKSOURCE]; diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index aff0ebcfa84..5aef18b599e 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -73,7 +73,7 @@ static void __init imx_timer_hardware_init(void) IMX_TCTL(TIMER_BASE) = TCTL_FRR | TCTL_CLK_PCLK1 | TCTL_TEN; } -cycle_t imx_get_cycles(void) +cycle_t imx_get_cycles(struct clocksource *cs) { return IMX_TCN(TIMER_BASE); } diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index f4656d2ac8a..1e93dfee754 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -401,7 +401,7 @@ void __init ixp4xx_sys_init(void) /* * clocksource */ -cycle_t ixp4xx_get_cycles(void) +cycle_t ixp4xx_get_cycles(struct clocksource *cs) { return *IXP4XX_OSTS; } diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 444d9c0f5ca..4855b8ca510 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -57,12 +57,12 @@ static irqreturn_t msm_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static cycle_t msm_gpt_read(void) +static cycle_t msm_gpt_read(struct clocksource *cs) { return readl(MSM_GPT_BASE + TIMER_COUNT_VAL); } -static cycle_t msm_dgt_read(void) +static cycle_t msm_dgt_read(struct clocksource *cs) { return readl(MSM_DGT_BASE + TIMER_COUNT_VAL) >> MSM_DGT_SHIFT; } diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index f201fddb594..82801dbf057 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -104,7 +104,7 @@ static struct irqaction netx_timer_irq = { .handler = netx_timer_interrupt, }; -cycle_t netx_get_cycles(void) +cycle_t netx_get_cycles(struct clocksource *cs) { return readl(NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index 41df6972176..77281260358 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -25,7 +25,7 @@ #define TIMER_CLOCKEVENT 1 static u32 latch; -static cycle_t ns9360_clocksource_read(void) +static cycle_t ns9360_clocksource_read(struct clocksource *cs) { return __raw_readl(SYS_TR(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 495a32c287b..4d56408d3cf 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -198,7 +198,7 @@ static struct irqaction omap_mpu_timer2_irq = { .handler = omap_mpu_timer2_interrupt, }; -static cycle_t mpu_read(void) +static cycle_t mpu_read(struct clocksource *cs) { return ~omap_mpu_timer_read(1); } diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 9fc13a2cc3f..1cb2c0909c2 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -138,7 +138,7 @@ static inline void __init omap2_gp_clocksource_init(void) {} * clocksource */ static struct omap_dm_timer *gpt_clocksource; -static cycle_t clocksource_read_cycles(void) +static cycle_t clocksource_read_cycles(struct clocksource *cs) { return (cycle_t)omap_dm_timer_read_counter(gpt_clocksource); } diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8eb3830fbb0..750c448db67 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -125,7 +125,7 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .set_mode = pxa_osmr0_set_mode, }; -static cycle_t pxa_read_oscr(void) +static cycle_t pxa_read_oscr(struct clocksource *cs) { return OSCR; } diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 9ab947c14f2..942e1a7eb9b 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -715,7 +715,7 @@ static struct irqaction realview_timer_irq = { .handler = realview_timer_interrupt, }; -static cycle_t realview_get_cycles(void) +static cycle_t realview_get_cycles(struct clocksource *cs) { return ~readl(timer3_va_base + TIMER_VALUE); } diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 565776680d8..1f929c391af 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -948,7 +948,7 @@ static struct irqaction versatile_timer_irq = { .handler = versatile_timer_interrupt, }; -static cycle_t versatile_get_cycles(void) +static cycle_t versatile_get_cycles(struct clocksource *cs) { return ~readl(TIMER3_VA_BASE + TIMER_VALUE); } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index ef1b3cd85bd..dab3357196f 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -36,7 +36,7 @@ static enum clock_event_mode clockevent_mode = CLOCK_EVT_MODE_UNUSED; /* clock source */ -static cycle_t mxc_get_cycles(void) +static cycle_t mxc_get_cycles(struct clocksource *cs) { return __raw_readl(TIMER_BASE + MXC_TCN); } diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index d1797147732..433021f3d7c 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -185,7 +185,7 @@ console_initcall(omap_add_serial_console); #include -static cycle_t omap_32k_read(void) +static cycle_t omap_32k_read(struct clocksource *cs) { return omap_readl(TIMER_32K_SYNCHRONIZED); } @@ -207,7 +207,7 @@ unsigned long long sched_clock(void) { unsigned long long ret; - ret = (unsigned long long)omap_32k_read(); + ret = (unsigned long long)omap_32k_read(&clocksource_32k); ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; return ret; } diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 6fa2923e6dc..2faf9dba4ef 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -41,7 +41,7 @@ static u32 ticks_per_jiffy; /* * Clocksource handling. */ -static cycle_t orion_clksrc_read(void) +static cycle_t orion_clksrc_read(struct clocksource *cs) { return 0xffffffff - readl(TIMER0_VAL); } diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 0ff46bf873b..f27aa3b259f 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -18,7 +18,7 @@ #include -static cycle_t read_cycle_count(void) +static cycle_t read_cycle_count(struct clocksource *cs) { return (cycle_t)sysreg_read(COUNT); } diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index 0ed2badfd74..27646121280 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -58,16 +58,11 @@ static inline unsigned long long cycles_2_ns(cycle_t cyc) return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; } -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod); } -unsigned long long sched_clock(void) -{ - return cycles_2_ns(read_cycles()); -} - static struct clocksource clocksource_bfin = { .name = "bfin_cycles", .rating = 350, @@ -77,6 +72,11 @@ static struct clocksource clocksource_bfin = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +unsigned long long sched_clock(void) +{ + return cycles_2_ns(read_cycles(&clocksource_bfin)); +} + static int __init bfin_clocksource_init(void) { set_cyc2ns_scale(get_cclk() / 1000); diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 790ef0d87e1..71e35864d2e 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -21,7 +21,7 @@ void __init cyclone_setup(void) static void __iomem *cyclone_mc; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readq((void __iomem *)cyclone_mc); } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 641c8b61c4f..604c1a35db3 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -33,7 +33,7 @@ #include "fsyscall_gtod_data.h" -static cycle_t itc_get_cycles(void); +static cycle_t itc_get_cycles(struct clocksource *cs); struct fsyscall_gtod_data_t fsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, @@ -383,7 +383,7 @@ ia64_init_itm (void) } } -static cycle_t itc_get_cycles(void) +static cycle_t itc_get_cycles(struct clocksource *cs) { u64 lcycle, now, ret; diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index cf67fc56205..21d6f09e344 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -23,7 +23,7 @@ extern unsigned long sn_rtc_cycles_per_second; -static cycle_t read_sn2(void) +static cycle_t read_sn2(struct clocksource *cs) { return (cycle_t)readq(RTC_COUNTER_ADDR); } diff --git a/arch/m68knommu/platform/68328/timers.c b/arch/m68knommu/platform/68328/timers.c index 6bafefa546e..309f725995b 100644 --- a/arch/m68knommu/platform/68328/timers.c +++ b/arch/m68knommu/platform/68328/timers.c @@ -75,7 +75,7 @@ static struct irqaction m68328_timer_irq = { /***************************************************************************/ -static cycle_t m68328_read_clk(void) +static cycle_t m68328_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68knommu/platform/coldfire/dma_timer.c index 772578b1084..a5f562823d7 100644 --- a/arch/m68knommu/platform/coldfire/dma_timer.c +++ b/arch/m68knommu/platform/coldfire/dma_timer.c @@ -34,7 +34,7 @@ #define DMA_DTMR_CLK_DIV_16 (2 << 1) #define DMA_DTMR_ENABLE (1 << 0) -static cycle_t cf_dt_get_cycles(void) +static cycle_t cf_dt_get_cycles(struct clocksource *cs) { return __raw_readl(DTCN0); } diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index 2a12e7fa974..61b96211f8f 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -125,7 +125,7 @@ static struct irqaction pit_irq = { /***************************************************************************/ -static cycle_t pit_read_clk(void) +static cycle_t pit_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68knommu/platform/coldfire/timers.c index 454f2549349..1ba8a373165 100644 --- a/arch/m68knommu/platform/coldfire/timers.c +++ b/arch/m68knommu/platform/coldfire/timers.c @@ -78,7 +78,7 @@ static struct irqaction mcftmr_timer_irq = { /***************************************************************************/ -static cycle_t mcftmr_read_clk(void) +static cycle_t mcftmr_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index eccf7d6096b..2e911e3da8d 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -22,7 +22,7 @@ static struct txx9_tmr_reg __iomem *txx9_cs_tmrptr; -static cycle_t txx9_cs_read(void) +static cycle_t txx9_cs_read(struct clocksource *cs) { return __raw_readl(&txx9_cs_tmrptr->trr); } diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c index 868745e7184..51489f8a825 100644 --- a/arch/mips/kernel/csrc-bcm1480.c +++ b/arch/mips/kernel/csrc-bcm1480.c @@ -28,7 +28,7 @@ #include -static cycle_t bcm1480_hpt_read(void) +static cycle_t bcm1480_hpt_read(struct clocksource *cs) { return (cycle_t) __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT)); } diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c index 1d5f63cf899..b551f48d3a0 100644 --- a/arch/mips/kernel/csrc-ioasic.c +++ b/arch/mips/kernel/csrc-ioasic.c @@ -25,7 +25,7 @@ #include #include -static cycle_t dec_ioasic_hpt_read(void) +static cycle_t dec_ioasic_hpt_read(struct clocksource *cs) { return ioasic_read(IO_REG_FCTR); } @@ -47,13 +47,13 @@ void __init dec_ioasic_clocksource_init(void) while (!ds1287_timer_state()) ; - start = dec_ioasic_hpt_read(); + start = dec_ioasic_hpt_read(&clocksource_dec); while (i--) while (!ds1287_timer_state()) ; - end = dec_ioasic_hpt_read(); + end = dec_ioasic_hpt_read(&clocksource_dec); freq = (end - start) * 10; printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index f1a2893931e..e95a3cd48ee 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -10,7 +10,7 @@ #include -static cycle_t c0_hpt_read(void) +static cycle_t c0_hpt_read(struct clocksource *cs) { return read_c0_count(); } diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c index 92212bbb8e4..d14d3d1907f 100644 --- a/arch/mips/kernel/csrc-sb1250.c +++ b/arch/mips/kernel/csrc-sb1250.c @@ -33,7 +33,7 @@ * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over * again. */ -static cycle_t sb1250_hpt_read(void) +static cycle_t sb1250_hpt_read(struct clocksource *cs) { unsigned int count; diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index 689719e34f0..ed20e7fe65e 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -128,7 +128,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { unsigned long flags; int count; diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index cf293b27909..8df43e9e4d9 100644 --- a/arch/mips/nxp/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c @@ -35,7 +35,7 @@ static unsigned long cpj; -static cycle_t hpt_read(void) +static cycle_t hpt_read(struct clocksource *cs) { return read_c0_count2(); } diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index f024057a35f..f10a7cd64f7 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -159,7 +159,7 @@ static void __init hub_rt_clock_event_global_init(void) setup_irq(irq, &hub_rt_irqaction); } -static cycle_t hub_rt_read(void) +static cycle_t hub_rt_read(struct clocksource *cs) { return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 926ea864e34..48571ac56fb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -77,7 +77,7 @@ #include #include -static cycle_t rtc_read(void); +static cycle_t rtc_read(struct clocksource *); static struct clocksource clocksource_rtc = { .name = "rtc", .rating = 400, @@ -88,7 +88,7 @@ static struct clocksource clocksource_rtc = { .read = rtc_read, }; -static cycle_t timebase_read(void); +static cycle_t timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", .rating = 400, @@ -766,12 +766,12 @@ unsigned long read_persistent_clock(void) } /* clocksource code */ -static cycle_t rtc_read(void) +static cycle_t rtc_read(struct clocksource *cs) { return (cycle_t)get_rtc(); } -static cycle_t timebase_read(void) +static cycle_t timebase_read(struct clocksource *cs) { return (cycle_t)get_tb(); } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6ded50dfa75..ef596d02057 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -201,7 +201,7 @@ unsigned long read_persistent_clock(void) return ts.tv_sec; } -static cycle_t read_tod_clock(void) +static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); } diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index c34e1e0f9b0..1700d2465f6 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -208,7 +208,7 @@ unsigned long long sched_clock(void) if (!clocksource_sh.rating) return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); - cycles = clocksource_sh.read(); + cycles = clocksource_sh.read(&clocksource_sh); return cyc2ns(&clocksource_sh, cycles); } #endif diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index c5d3396f596..fe8d8930ccb 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -81,7 +81,7 @@ static int tmu_timer_stop(void) */ static int tmus_are_scaled; -static cycle_t tmu_timer_read(void) +static cycle_t tmu_timer_read(struct clocksource *cs) { return ((cycle_t)(~_tmu_read(TMU1)))<get_tick(); +} + void __init time_init(void) { unsigned long freq = sparc64_init_timers(); @@ -827,7 +832,7 @@ void __init time_init(void) clocksource_tick.mult = clocksource_hz2mult(freq, clocksource_tick.shift); - clocksource_tick.read = tick_ops->get_tick; + clocksource_tick.read = clocksource_tick_read; printk("clocksource: mult[%x] shift[%d]\n", clocksource_tick.mult, clocksource_tick.shift); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index b13a87a3ec9..c8b9c469fcd 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -65,7 +65,7 @@ static irqreturn_t um_timer(int irq, void *dev) return IRQ_HANDLED; } -static cycle_t itimer_read(void) +static cycle_t itimer_read(struct clocksource *cs) { return os_nsecs() / 1000; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 648b3a2a3a4..3f0019e0a22 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -722,7 +722,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, /* * Clock source related code */ -static cycle_t read_hpet(void) +static cycle_t read_hpet(struct clocksource *cs) { return (cycle_t)hpet_readl(HPET_COUNTER); } @@ -756,7 +756,7 @@ static int hpet_clocksource_register(void) hpet_restart_counter(); /* Verify whether hpet counter works */ - t1 = read_hpet(); + t1 = hpet_readl(HPET_COUNTER); rdtscll(start); /* @@ -770,7 +770,7 @@ static int hpet_clocksource_register(void) rdtscll(now); } while ((now - start) < 200000UL); - if (t1 == read_hpet()) { + if (t1 == hpet_readl(HPET_COUNTER)) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); return -ENODEV; diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 3475440baa5..c2e0bb0890d 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -129,7 +129,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { static int old_count; static u32 old_jifs; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 137f2e8132d..223af43f152 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void) return ret; } +static cycle_t kvm_clock_get_cycles(struct clocksource *cs) +{ + return kvm_clock_read(); +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void) static struct clocksource kvm_clock = { .name = "kvm-clock", - .read = kvm_clock_read, + .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << KVM_SCALE, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a567ebe636..d57de05dc43 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc; * code, which is necessary to support wrapping clocksources like pm * timer. */ -static cycle_t read_tsc(void) +static cycle_t read_tsc(struct clocksource *cs) { cycle_t ret = (cycle_t)get_cycles(); diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index d303369a7ba..2b3eb82efee 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void) /** vmi clocksource */ static struct clocksource clocksource_vmi; -static cycle_t read_real_cycles(void) +static cycle_t read_real_cycles(struct clocksource *cs) { cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); return max(ret, clocksource_vmi.cycle_last); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a2085368a3d..ca7ec44bafc 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -663,7 +663,7 @@ static unsigned long lguest_tsc_khz(void) /* If we can't use the TSC, the kernel falls back to our lower-priority * "lguest_clock", where we read the time value given to us by the Host. */ -static cycle_t lguest_clock_read(void) +static cycle_t lguest_clock_read(struct clocksource *cs) { unsigned long sec, nsec; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 14f24062349..0a5aa44299a 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -213,6 +213,11 @@ cycle_t xen_clocksource_read(void) return ret; } +static cycle_t xen_clocksource_get_cycles(struct clocksource *cs) +{ + return xen_clocksource_read(); +} + static void xen_read_wallclock(struct timespec *ts) { struct shared_info *s = HYPERVISOR_shared_info; @@ -241,7 +246,7 @@ int xen_set_wallclock(unsigned long now) static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, - .read = xen_clocksource_read, + .read = xen_clocksource_get_cycles, .mask = ~0, .mult = 1< value1) diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 8615059a872..64e528e8bfa 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -19,7 +19,7 @@ int use_cyclone = 0; static void __iomem *cyclone_ptr; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readl(cyclone_ptr); } diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c index b92da677aa5..27f4d9637b6 100644 --- a/drivers/clocksource/scx200_hrt.c +++ b/drivers/clocksource/scx200_hrt.c @@ -43,7 +43,7 @@ MODULE_PARM_DESC(ppm, "+-adjust to actual XO freq (ppm)"); /* The base timer frequency, * 27 if selected */ #define HRT_FREQ 1000000 -static cycle_t read_hrt(void) +static cycle_t read_hrt(struct clocksource *cs) { /* Read the timer value */ return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 254f1064d97..01b886e6882 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -39,7 +39,7 @@ static void __iomem *tcaddr; -static cycle_t tc_get_cycles(void) +static cycle_t tc_get_cycles(struct clocksource *cs) { unsigned long flags; u32 lower, upper; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 573819ef4cc..0d96cde9ee5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -143,7 +143,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * 400-499: Perfect * The ideal clocksource. A must-use where * available. - * @read: returns a cycle value + * @read: returns a cycle value, passes clocksource as argument * @mask: bitmask for two's complement * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier (adjusted by NTP) @@ -162,7 +162,7 @@ struct clocksource { char *name; struct list_head list; int rating; - cycle_t (*read)(void); + cycle_t (*read)(struct clocksource *cs); cycle_t mask; u32 mult; u32 mult_orig; @@ -271,7 +271,7 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) */ static inline cycle_t clocksource_read(struct clocksource *cs) { - return cs->read(); + return cs->read(cs); } /** diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c46c931a7fe..ecfd7b5187e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data) resumed = test_and_clear_bit(0, &watchdog_resumed); - wdnow = watchdog->read(); + wdnow = watchdog->read(watchdog); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { - csnow = cs->read(); + csnow = cs->read(cs); if (unlikely(resumed)) { cs->wd_last = csnow; @@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) list_add(&cs->wd_list, &watchdog_list); if (!started && watchdog) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); @@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) cse->flags &= ~CLOCK_SOURCE_WATCHDOG; /* Start if list is not empty */ if (!list_empty(&watchdog_list)) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 06f197560f3..c3f6c30816e 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -50,7 +50,7 @@ */ #define JIFFIES_SHIFT 8 -static cycle_t jiffies_read(void) +static cycle_t jiffies_read(struct clocksource *cs) { return (cycle_t) jiffies; } -- cgit v1.2.3-70-g09d2 From 2a3313f494c2f3f74a27d66f0f14b38558b7dba2 Mon Sep 17 00:00:00 2001 From: "Michael K. Johnson" Date: Tue, 21 Apr 2009 21:44:48 -0400 Subject: x86: more than 8 32-bit CPUs requires X86_BIGSMP $ cat x86-more-than-8-cpus-requires-bigsmp.patch Enforce NR_CPUS <= 8 limitation if X86_BIGSMP not set Configuring more than 8 logical CPUs on 32-bit x86 requires X86_BIGSMP to be set in order to boot successfully, if more than 8 logical CPUs are actually found at boot time. The X86_BIGSMP help text describes that it is required to be set if more than 8 CPUs are configured, but this was previously not enforced. This configuration error has affected multiple distributions: https://bugzilla.redhat.com/show_bug.cgi?id=480844 https://issues.rpath.com/browse/RPL-3022 Signed-off-by: Michael K Johnson LKML-Reference: <20090422014448.GB32541@logo.rdu.rpath.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9086e6307a..b5cda6c03d1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -664,6 +664,7 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP + range 2 8 if SMP && X86_32 && !X86_BIGSMP range 2 512 if SMP && !MAXSMP default "1" if !SMP default "4096" if MAXSMP -- cgit v1.2.3-70-g09d2 From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:24 +0100 Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU() In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU() does not agree with that specified by DEFINE_PER_CPU(). This means that architectures that have a small data section references relative to a base register may throw up linkage errors due to too great a displacement between where the base register points and the per-CPU variable. On FRV, the .h declaration says that the variable is in the .sdata section, but the .c definition says it's actually in the .data section. The linker throws up the following errors: kernel/built-in.o: In function `release_task': kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o To fix this, DECLARE_PER_CPU() should simply apply the same section attribute as does DEFINE_PER_CPU(). However, this is made slightly more complex by virtue of the fact that there are several variants on DEFINE, so these need to be matched by variants on DECLARE. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/percpu.h | 2 +- arch/ia64/include/asm/smp.h | 2 +- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/tlbflush.h | 2 +- include/asm-generic/percpu.h | 43 ++++++++++++++++++++++++++++++++++++++-- include/linux/percpu.h | 24 ---------------------- net/rds/rds.h | 2 +- 9 files changed, 50 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index 3495e8e00d7..e9e0bb5a23b 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -73,6 +73,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #endif /* SMP */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) +#include #endif /* __ALPHA_PERCPU_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 59840833625..d217d1d4e05 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -58,7 +58,7 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; extern cpumask_t cpu_core_map[NR_CPUS]; -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; extern void __iomem *ipi_base_addr; extern unsigned char smp_int_redirect; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 5623c50d67b..c45f415ce31 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -37,7 +37,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 039db6aa8e0..37555e52f98 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -26,7 +26,7 @@ typedef struct { #endif } ____cacheline_aligned irq_cpustat_t; -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fcf4d92e7e0..c2cceae709c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -138,7 +138,7 @@ extern struct tss_struct doublefault_tss; extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #define current_cpu_data __get_cpu_var(cpu_info) #else @@ -270,7 +270,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU(struct tss_struct, init_tss); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); /* * Save the original ist values for checking stack pointers during debugging @@ -393,7 +393,7 @@ union irq_stack_union { }; }; -DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f998f8..16a5c84b032 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,7 +152,7 @@ struct tlb_state { struct mm_struct *active_mm; int state; }; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); static inline void reset_lazy_tlbstate(void) { diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672eb..af47b9e1006 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -73,11 +73,50 @@ extern void setup_per_cpu_areas(void); #endif /* SMP */ +#ifndef PER_CPU_BASE_SECTION +#ifdef CONFIG_SMP +#define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP + +#ifdef MODULE +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#else +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" +#endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + #ifndef PER_CPU_ATTRIBUTES #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ - __typeof__(type) per_cpu_var(name) +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cfda2d5ad31..f052d818499 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,30 +9,6 @@ #include -#ifndef PER_CPU_BASE_SECTION -#ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" -#else -#define PER_CPU_BASE_SECTION ".data" -#endif -#endif - -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION ".first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_FIRST_SECTION "" - -#endif - #define DEFINE_PER_CPU_SECTION(type, name, section) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e..71794449ca4 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ -- cgit v1.2.3-70-g09d2 From bf47a760f66add7870fba33ab50f58b550d6bbd1 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sun, 5 Apr 2009 14:54:46 -0300 Subject: KVM: MMU: disable global page optimization Complexity to fix it not worthwhile the gains, as discussed in http://article.gmane.org/gmane.comp.emulators.kvm.devel/28649. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a36f7f7c4c..b6caf1329b1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1248,7 +1248,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); sp->gfn = gfn; sp->role = role; - sp->global = role.cr4_pge; + sp->global = 0; hlist_add_head(&sp->hash_link, bucket); if (!direct) { if (rmap_write_protect(vcpu->kvm, gfn)) -- cgit v1.2.3-70-g09d2 From 7f1ea208968f021943d4103ba59e06bb6d8239cb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 25 Feb 2009 16:08:31 +0100 Subject: KVM: x86: release time_page on vcpu destruction Not releasing the time_page causes a leak of that page or the compound page it is situated in. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ca100a9eca..a1ecec5c03e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4159,6 +4159,11 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + if (vcpu->arch.time_page) { + kvm_release_page_dirty(vcpu->arch.time_page); + vcpu->arch.time_page = NULL; + } + kvm_x86_ops->vcpu_free(vcpu); } -- cgit v1.2.3-70-g09d2 From 888d256e9c565cb61505bd218eb37c81fe77a325 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 17 Apr 2009 19:24:58 +0200 Subject: KVM: Unregister cpufreq notifier on unload Properly unregister cpufreq notifier on onload if it was registered during init. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1ecec5c03e..7c1ce5ac613 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2775,6 +2775,9 @@ out: void kvm_arch_exit(void) { + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); kvm_x86_ops = NULL; kvm_mmu_module_exit(); } -- cgit v1.2.3-70-g09d2 From 7a6f9cbb37120c745fc187083fb5c3de4dca4f97 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 21 Apr 2009 20:00:37 +0200 Subject: x86: hpet: fix periodic mode programming on AMD 81xx (See http://bugzilla.kernel.org/show_bug.cgi?id=12961) It partially reverts commit c23e253e67c9d8a91a0ffa33c1f571a17f0a2403 (x86: hpet: stop HPET_COUNTER when programming periodic mode) HPET on AMD 81xx chipset needs a second write (with HPET_TN_SETVAL cleared) to T0_CMP register to set the period in periodic mode. With this patch HPET_COUNTER is still stopped but not reset when HPET is programmed in periodic mode. This should help to avoid races when HPET is programmed in periodic mode and fixes a boot time hang that I've observed on a machine when using 1000HZ. [ Impact: fix boot time hang on machines with AMD 81xx chipset ] Reported-by: Jeff Mahoney Signed-off-by: Andreas Herrmann Tested-by: Jeff Mahoney LKML-Reference: <20090421180037.GA2763@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3f0019e0a22..81408b93f88 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -236,6 +236,10 @@ static void hpet_stop_counter(void) unsigned long cfg = hpet_readl(HPET_CFG); cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); +} + +static void hpet_reset_counter(void) +{ hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); } @@ -250,6 +254,7 @@ static void hpet_start_counter(void) static void hpet_restart_counter(void) { hpet_stop_counter(); + hpet_reset_counter(); hpet_start_counter(); } @@ -309,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg; + unsigned long cfg, cmp, now; uint64_t delta; switch (mode) { @@ -317,12 +322,23 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; + now = hpet_readl(HPET_COUNTER); + cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cmp, HPET_Tn_CMP(timer)); + udelay(1); + /* + * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL + * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL + * bit is automatically cleared after the first write. + * (See AMD-8111 HyperTransport I/O Hub Data Sheet, + * Publication # 24674) + */ hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); hpet_start_counter(); hpet_print_config(); -- cgit v1.2.3-70-g09d2 From c5428e950ad42640f00092949fd17e356dfdeafa Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 22 Apr 2009 23:21:56 +0800 Subject: uv_time: add parameter to uv_read_rtc() uv_read_rtc() is referenced by read member of struct clocksource clocksource_uv. In include/linux/clocksource.h, read of struct clocksource is declared as: cycle_t (*read)(struct clocksource *cs) This got introduced recently in: 8e19608: clocksource: pass clocksource to read() callback But arch/x86/kernel/uv_time.c was not properly converted by that pach. This patch adds a dummy parameter (struct clocksource type) to uv_read_rtc() to fix the incompatible reference in clocksource_uv, and add a NULL parameter in all places where uv_read_rtc() gets called. [ Impact: cleanup, address compiler warning ] Signed-off-by: Coly Li Cc: Dimitri Sivanich Cc: Magnus Damm Cc: Andrew Morton Cc: Hugh Dickins LKML-Reference: <49EF3614.1050806@suse.de> Signed-off-by: Ingo Molnar Cc: Dimitri Sivanich --- arch/x86/kernel/uv_time.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 2ffb6c53326..583f11d5c48 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -29,7 +29,7 @@ #define RTC_NAME "sgi_rtc" -static cycle_t uv_read_rtc(void); +static cycle_t uv_read_rtc(struct clocksource *cs); static int uv_rtc_next_event(unsigned long, struct clock_event_device *); static void uv_rtc_timer_setup(enum clock_event_mode, struct clock_event_device *); @@ -123,7 +123,7 @@ static int uv_setup_intr(int cpu, u64 expires) /* Initialize comparator value */ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); - return (expires < uv_read_rtc() && !uv_intr_pending(pnode)); + return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); } /* @@ -256,7 +256,7 @@ static int uv_rtc_unset_timer(int cpu) spin_lock_irqsave(&head->lock, flags); - if (head->next_cpu == bcpu && uv_read_rtc() >= *t) + if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) rc = 1; *t = ULLONG_MAX; @@ -278,7 +278,7 @@ static int uv_rtc_unset_timer(int cpu) /* * Read the RTC. */ -static cycle_t uv_read_rtc(void) +static cycle_t uv_read_rtc(struct clocksource *cs) { return (cycle_t)uv_read_local_mmr(UVH_RTC); } @@ -291,7 +291,7 @@ static int uv_rtc_next_event(unsigned long delta, { int ced_cpu = cpumask_first(ced->cpumask); - return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc()); + return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); } /* -- cgit v1.2.3-70-g09d2 From 6298c512bc1007c3ff5c9ce20e6996781651cc45 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Apr 2009 12:28:22 +0200 Subject: x86, mce: make polling timer interval per CPU The polling timer while running per CPU still uses a global next_interval variable, which lead to some CPUs either polling too fast or too slow. This was not a serious problem because all errors get picked up eventually, but it's still better to avoid it. Turn next_interval into a per cpu variable. v2: Fix check_interval == 0 case (Hidetoshi Seto) [ Impact: minor bug fix ] Signed-off-by: Andi Kleen Reviewed-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 863f89568b1..82614f1b923 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -452,13 +452,14 @@ void mce_log_therm_throt_event(__u64 status) */ static int check_interval = 5 * 60; /* 5 minutes */ -static int next_interval; /* in jiffies */ +static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ static void mcheck_timer(unsigned long); static DEFINE_PER_CPU(struct timer_list, mce_timer); static void mcheck_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); + int *n; WARN_ON(smp_processor_id() != data); @@ -470,14 +471,14 @@ static void mcheck_timer(unsigned long data) * Alert userspace if needed. If we logged an MCE, reduce the * polling interval, otherwise increase the polling interval. */ + n = &__get_cpu_var(next_interval); if (mce_notify_user()) { - next_interval = max(next_interval/2, HZ/100); + *n = max(*n/2, HZ/100); } else { - next_interval = min(next_interval * 2, - (int)round_jiffies_relative(check_interval*HZ)); + *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); } - t->expires = jiffies + next_interval; + t->expires = jiffies + *n; add_timer(t); } @@ -632,14 +633,13 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) static void mce_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); + int *n = &__get_cpu_var(next_interval); - /* data race harmless because everyone sets to the same value */ - if (!next_interval) - next_interval = check_interval * HZ; - if (!next_interval) + *n = check_interval * HZ; + if (!*n) return; setup_timer(t, mcheck_timer, smp_processor_id()); - t->expires = round_jiffies(jiffies + next_interval); + t->expires = round_jiffies(jiffies + *n); add_timer(t); } @@ -907,7 +907,6 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { - next_interval = check_interval * HZ; on_each_cpu(mce_cpu_restart, NULL, 1); } @@ -1110,7 +1109,8 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + next_interval); + t->expires = round_jiffies(jiffies + + __get_cpu_var(next_interval)); add_timer_on(t, cpu); smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; -- cgit v1.2.3-70-g09d2 From 5679af4c1625a1534a4321e1ecc3c48a1cf65eb8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Apr 2009 17:06:55 +0200 Subject: x86, mce: fix boot logging logic The earlier patch to change the poller to a separate function subtly broke the boot logging logic. This could lead to machine checks getting logged at boot even when disabled or defaulting to off on some systems. Fix that. [ Impact: bug fix - avoid spurious MCE in log ] Signed-off-by: Andi Kleen Reviewed-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce_64.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 563933e06a3..4f8c199584e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -137,6 +137,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); enum mcp_flags { MCP_TIMESTAMP = (1 << 0), /* log time stamp */ MCP_UC = (1 << 1), /* log uncorrected errors */ + MCP_DONTLOG = (1 << 2), /* only clear, don't log */ }; extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 82614f1b923..6fb0b359d2a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -239,9 +239,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - - mce_log(&m); - add_taint(TAINT_MACHINE_CHECK); + if (!(flags & MCP_DONTLOG)) { + mce_log(&m); + add_taint(TAINT_MACHINE_CHECK); + } /* * Clear state for this bank. @@ -585,7 +586,7 @@ static void mce_init(void *dummy) * Log the machine checks left over from the previous reset. */ bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC, &all_banks); + machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); set_in_cr4(X86_CR4_MCE); -- cgit v1.2.3-70-g09d2 From 044cd80942e47b9de0915b627902adf05c52377f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 18 Apr 2009 01:43:46 -0700 Subject: x86/PCI: don't call e820_all_mapped with -1 in the mmconfig case e820_all_mapped need end is (addr + size) instead of (addr + size - 1) Cc: stable@kernel.org Acked-by: Ingo Molnar Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 905bb526b13..5fa10bb9604 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -375,7 +375,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, if (!fixmem32) return AE_OK; if ((mcfg_res->start >= fixmem32->address) && - (mcfg_res->end < (fixmem32->address + + (mcfg_res->end <= (fixmem32->address + fixmem32->address_length))) { mcfg_res->flags = 1; return AE_CTRL_TERMINATE; @@ -392,7 +392,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, return AE_OK; if ((mcfg_res->start >= address.minimum) && - (mcfg_res->end < (address.minimum + address.address_length))) { + (mcfg_res->end <= (address.minimum + address.address_length))) { mcfg_res->flags = 1; return AE_CTRL_TERMINATE; } @@ -439,7 +439,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, u64 old_size = size; int valid = 0; - while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) { + while (!is_reserved(addr, addr + size, E820_RESERVED)) { size >>= 1; if (size < (16UL<<20)) break; -- cgit v1.2.3-70-g09d2 From 0bb1be3e30bfc3e09fa0ff1e887ac7da4a16c3a2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 16 Apr 2009 13:31:10 -0600 Subject: x86/PCI: Move set_pci_bus_resources_arch_default into arch/x86 Commit 30a18d6c3f1e774de656ebd8ff219d53e2ba4029 introduced a new function to set the PCI bus resources. Unfortunately, neither the author, nor the committers seemed to know that we already have somewhere to do that -- pcibios_fixup_bus(). This patch moves the hook (used only by the K8 code) into x86-specific code where it should have been in the first place. Cc: Yinghai Lu Signed-off-by: Matthew Wilcox Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 7 ++++++- drivers/pci/probe.c | 6 ------ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 8c362b96b64..f80ece51305 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -142,15 +142,20 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) } } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + /* * Called after each bus is probed, but before its children * are examined. */ -void __devinit pcibios_fixup_bus(struct pci_bus *b) +void __devinit pcibios_fixup_bus(struct pci_bus *b) { struct pci_dev *dev; + set_pci_bus_resources_arch_default(b); pci_read_bridge_bases(b); list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8eb50dffb78..e3c3e081b83 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1118,10 +1118,6 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } -void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) -{ -} - struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1180,8 +1176,6 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; - set_pci_bus_resources_arch_default(b); - return b; dev_create_file_err: -- cgit v1.2.3-70-g09d2 From 0e94ecd098347874e776f7818728613a335880d1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 18 Apr 2009 10:11:25 -0700 Subject: x86/PCI: set_pci_bus_resources_arch_default cleanups Rename set_pci_bus_resources_arch_default to x86_pci_root_bus_res_quirks, move the weak version from common.c to i386.c, and before calling, make sure it's a root bus. Reviewed-by: Matthew Wilcox Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/include/asm/topology.h | 2 +- arch/x86/pci/amd_bus.c | 2 +- arch/x86/pci/common.c | 8 +++----- arch/x86/pci/i386.c | 4 ++++ 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 892b119dba6..f44b49abca4 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -200,7 +200,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) } struct pci_bus; -void set_pci_bus_resources_arch_default(struct pci_bus *b); +void x86_pci_root_bus_res_quirks(struct pci_bus *b); #ifdef CONFIG_SMP #define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids) diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 9bb09823b36..e121ee050f7 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -94,7 +94,7 @@ struct pci_root_info { static int pci_root_num; static struct pci_root_info pci_root_info[PCI_ROOT_NR]; -void set_pci_bus_resources_arch_default(struct pci_bus *b) +void x86_pci_root_bus_res_quirks(struct pci_bus *b) { int i; int j; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f80ece51305..2202b6257b8 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -142,10 +142,6 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) } } -void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) -{ -} - /* * Called after each bus is probed, but before its children * are examined. @@ -155,7 +151,9 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b) { struct pci_dev *dev; - set_pci_bus_resources_arch_default(b); + /* root bus? */ + if (!b->parent) + x86_pci_root_bus_res_quirks(b); pci_read_bridge_bases(b); list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index f1817f71e00..a85bef20a3b 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -238,6 +238,10 @@ void __init pcibios_resource_survey(void) */ fs_initcall(pcibios_assign_resources); +void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b) +{ +} + /* * If we set up a device for bus mastering, we need to check the latency * timer as certain crappy BIOSes forget to set it properly. -- cgit v1.2.3-70-g09d2 From b10ceb5530df7ee6e81f92910589a34dd3e5690b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 20 Apr 2009 18:35:40 -0700 Subject: x86/PCI: don't bother with root quirks if _CRS is used It will be overwriten later if _CRS is used, so don't bother to set it. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/amd_bus.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index e121ee050f7..f893d6a6e80 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -100,6 +100,10 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) int j; struct pci_root_info *info; + /* don't go for it if _CRS is used */ + if (pci_probe & PCI_USE__CRS) + return; + /* if only one root bus, don't need to anything */ if (pci_root_num < 2) return; -- cgit v1.2.3-70-g09d2 From 4c31e92b97b6d7e7b19ee5e54a22571ffdebb305 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 22 Apr 2009 14:19:27 -0700 Subject: x86: check boundary in setup_node_bootmem() Commit dc09855 ("x86/uv: fix init of memory-less nodes") causes a two sockets system (where node-1 doesn't have RAM installed) to crash. That commit makes node_possible include cpu nodes that do not have memory. So check boundary in setup_node_bootmem(). [ Impact: fix boot crash on RAM-less NUMA node system ] Signed-off-by: Yinghai Lu Cc: Jack Steiner LKML-Reference: <49EF89DF.9090404@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_64.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index d73aaa89237..2d05a12029d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -188,6 +188,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); int nid; + if (!end) + return; + start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, -- cgit v1.2.3-70-g09d2 From d2c8604121648b744ebb127991f1c5876931885e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 23 Apr 2009 19:19:42 -0400 Subject: x86, hpet: Stop soliciting hpet=force users on ICH4M The HPET in the ICH4M is not documented in the data sheet because it was not officially validated. While it is fine for hackers to continue to use "hpet=force" to enable the hardware that they have, it is not prudent to solicit additional "hpet=force" users on this hardware. [ Impact: remove hpet=force syslog message on old-ICH systems ] Signed-off-by: Len Brown Acked-by: Venkatesh Pallipadi LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e95022e4f5d..7563b31b4f0 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -261,8 +261,6 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) { if (hpet_force_user) old_ich_force_enable_hpet(dev); - else - hpet_print_force_info(); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, -- cgit v1.2.3-70-g09d2 From 33015c85995716d03f6293346cf05a1908b0fb9a Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:48 +0100 Subject: tracing: x86, mmiotrace: fix range test Matching on (addr == (p->addr + p->len)) causes problems when mappings are adjacent. [ Impact: fix mmiotrace confusion on adjacent iomaps ] Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-2-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4f115e00486..50dc802a1c4 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -87,7 +87,7 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) { struct kmmio_probe *p; list_for_each_entry_rcu(p, &kmmio_probes, list) { - if (addr >= p->addr && addr <= (p->addr + p->len)) + if (addr >= p->addr && addr < (p->addr + p->len)) return p; } return NULL; -- cgit v1.2.3-70-g09d2 From 2f65dd475c6a8a997145ea83cc3d2d5e6dc55af1 Mon Sep 17 00:00:00 2001 From: John Wright Date: Wed, 29 Apr 2009 14:32:01 -0600 Subject: x86: gettimeofday() vDSO: fix segfault when tv == NULL According to the gettimeofday(2) manual: If either tv or tz is NULL, the corresponding structure is not set or returned. Since it is legal to give NULL as the tv argument, the code should make sure tv is not NULL before trying to dereference it. This issue manifests itself on x86_64 when vdso=0 is not on the kernel command-line and libc uses the vDSO for gettimeofday() (e.g. glibc >= 2.7). A simple reproducer: #include #include int main(void) { struct timezone tz; gettimeofday(NULL, &tz); return 0; } See http://bugs.debian.org/466491 for more details. [ Impact: fix gettimeofday(NULL, &tz) segfault ] Signed-off-by: John Wright Cc: Andi Kleen Cc: John Wright LKML-Reference: <1241037121-14805-1-git-send-email-john.wright@hp.com> Signed-off-by: Ingo Molnar --- arch/x86/vdso/vclock_gettime.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index d9d35824c56..6a40b78b46a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -104,11 +104,13 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { long ret; if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); - do_realtime((struct timespec *)tv); - tv->tv_usec /= 1000; + if (likely(tv != NULL)) { + BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != + offsetof(struct timespec, tv_nsec) || + sizeof(*tv) != sizeof(struct timespec)); + do_realtime((struct timespec *)tv); + tv->tv_usec /= 1000; + } if (unlikely(tz != NULL)) { /* Avoid memcpy. Some old compilers fail to inline it */ tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; -- cgit v1.2.3-70-g09d2 From f9a196b8dceba3c1e5fe885b81e45043ad7c60fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 May 2009 20:59:25 +0200 Subject: x86: initialize io_bitmap_base on 32bit commit db949bba3c7cf2e664ac12e237c6d4c914f0c69d (x86-32: use non-lazy io bitmap context switching) broke ioperm for 32bit because it removed the lazy initialization of io_bitmap_base and did not set it to the real bitmap offset. [ Impact: fix non-working sys_ioperm() on 32-bit kernels ] Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c4f667896c2..c1caefc82e6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1203,6 +1203,8 @@ void __cpuinit cpu_init(void) load_TR_desc(); load_LDT(&init_mm.context); + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -- cgit v1.2.3-70-g09d2 From 6da7342ff1c5274c51ada084974668d10f769c16 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2009 11:44:38 +0200 Subject: amd-iommu: fix iommu flag masks The feature bits should be set via bitmasks, not via feature IDs. [ Impact: fix feature enabling in newer IOMMU versions ] Signed-off-by: Joerg Roedel LKML-Reference: <20090504102028.GA30307@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 42c33cebf00..8c0be0902da 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -49,10 +49,10 @@ #define IVHD_DEV_EXT_SELECT 0x46 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 -#define IVHD_FLAG_HT_TUN_EN 0x00 -#define IVHD_FLAG_PASSPW_EN 0x01 -#define IVHD_FLAG_RESPASSPW_EN 0x02 -#define IVHD_FLAG_ISOC_EN 0x03 +#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 +#define IVHD_FLAG_PASSPW_EN_MASK 0x02 +#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 +#define IVHD_FLAG_ISOC_EN_MASK 0x08 #define IVMD_FLAG_EXCL_RANGE 0x08 #define IVMD_FLAG_UNITY_MAP 0x01 @@ -569,19 +569,19 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, * First set the recommended feature enable bits from ACPI * into the IOMMU control registers */ - h->flags & IVHD_FLAG_HT_TUN_EN ? + h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - h->flags & IVHD_FLAG_PASSPW_EN ? + h->flags & IVHD_FLAG_PASSPW_EN_MASK ? iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - h->flags & IVHD_FLAG_RESPASSPW_EN ? + h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - h->flags & IVHD_FLAG_ISOC_EN ? + h->flags & IVHD_FLAG_ISOC_EN_MASK ? iommu_feature_enable(iommu, CONTROL_ISOC_EN) : iommu_feature_disable(iommu, CONTROL_ISOC_EN); -- cgit v1.2.3-70-g09d2 From 35d11680a9d82c93eb92f08f9702b72877427b4a Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 4 May 2009 20:28:59 +0200 Subject: x86: show number of core_siblings instead of thread_siblings in /proc/cpuinfo Commit 7ad728f98162cb1af06a85b2a5fc422dddd4fb78 (cpumask: x86: convert cpu_sibling_map/cpu_core_map to cpumask_var_t) changed the output of /proc/cpuinfo for siblings: Example on an AMD Phenom: physical id : 0 siblings : 1 core id : 3 cpu cores : 4 Before that commit it was: physical id : 0 siblings : 4 core id : 3 cpu cores : 4 Instead of cpu_core_mask it now uses cpu_sibling_mask to count siblings. This is due to the following hunk of above commit: | --- a/arch/x86/kernel/cpu/proc.c | +++ b/arch/x86/kernel/cpu/proc.c | @@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinf | if (c->x86_max_cores * smp_num_siblings > 1) { | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | seq_printf(m, "siblings\t: %d\n", | - cpus_weight(per_cpu(cpu_core_map, cpu))); | + cpumask_weight(cpu_sibling_mask(cpu))); | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | seq_printf(m, "apicid\t\t: %d\n", c->apicid); This was a mistake, because the impact line shows that this side-effect was not anticipated: Impact: reduce per-cpu size for CONFIG_CPUMASK_OFFSTACK=y So revert the respective hunk to restore the old behavior. [ Impact: fix sibling-info regression in /proc/cpuinfo ] Signed-off-by: Andreas Herrmann Cc: Rusty Russell LKML-Reference: <20090504182859.GA29045@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index f93047fed79..d5e30397246 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", - cpumask_weight(cpu_sibling_mask(cpu))); + cpumask_weight(cpu_core_mask(cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); seq_printf(m, "apicid\t\t: %d\n", c->apicid); -- cgit v1.2.3-70-g09d2 From 7eccf7b227b6d3b1745b937ce35efc9c27f9b0e5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 5 May 2009 12:50:02 -0700 Subject: x86, srat: do not register nodes beyond e820 map The mem= option will truncate the memory map at a specified address so it's not possible to register nodes with memory beyond the e820 upper bound. unparse_node() is only called when then node had memory associated with it, although with the mem= option it is no longer addressable. [ Impact: fix boot hang on certain (large) systems ] Reported-by: "Zhang, Yanmin" Signed-off-by: David Rientjes Acked-by: Jack Steiner LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 33c5fa57e43..01765955baa 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -361,6 +361,7 @@ static void __init unparse_node(int node) { int i; node_clear(node, nodes_parsed); + node_clear(node, cpu_nodes_parsed); for (i = 0; i < MAX_LOCAL_APIC; i++) { if (apicid_to_node[i] == node) apicid_to_node[i] = NUMA_NO_NODE; -- cgit v1.2.3-70-g09d2 From e0e5ea3268db428d19e1c5fa00e6f583861cbdbd Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 4 May 2009 09:08:26 +0530 Subject: x86: Fix a typo in a printk message [ Impact: printk message cleanup ] Signed-off-by: Nikanth Karthikesan LKML-Reference: <200905040908.27299.knikanth@suse.de> Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 16ae70fc57e..29a0e37114f 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -216,7 +216,7 @@ int __init get_memcfg_from_srat(void) if (num_memory_chunks == 0) { printk(KERN_WARNING - "could not finy any ACPI SRAT memory areas.\n"); + "could not find any ACPI SRAT memory areas.\n"); goto out_fail; } -- cgit v1.2.3-70-g09d2 From 61438766514a2d7f191ce1b3cf6812eabbef4ef7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 May 2009 13:02:19 +0100 Subject: x86: fix boot hang in early_reserve_e820() If the first non-reserved (sub-)range doesn't fit the size requested, an endless loop will be entered. If a range returned from find_e820_area_size() turns out insufficient in size, the range must be skipped before calling the function again. [ Impact: fixes boot hang on some platforms ] Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ef2c3563357..00628130292 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1074,12 +1074,13 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 addr; u64 start; - start = startt; - while (size < sizet && (start + 1)) + for (start = startt; ; start += size) { start = find_e820_area_size(start, &size, align); - - if (size < sizet) - return 0; + if (!(start + 1)) + return 0; + if (size >= sizet) + break; + } #ifdef CONFIG_X86_32 if (start >= MAXMEM) -- cgit v1.2.3-70-g09d2 From 498343967613183611ac37dccb2846496d954c06 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 May 2009 13:06:47 +0100 Subject: x86-64: finish cleanup_highmaps()'s job wrt. _brk_end With the introduction of the .brk section, special care must be taken that no unused page table entries remain if _brk_end and _end are separated by a 2M page boundary. cleanup_highmap() runs very early and hence cannot take care of that, hence potential entries needing to be removed past _brk_end must be cleared once the brk allocator has done its job. [ Impact: avoids undesirable TLB aliases ] Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd3da1dda1c..ae4f7b5d710 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -304,8 +305,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, #endif #ifdef CONFIG_X86_64 - if (!after_bootmem) + if (!after_bootmem && !start) { + pud_t *pud; + pmd_t *pmd; + mmu_cr4_features = read_cr4(); + + /* + * _brk_end cannot change anymore, but it and _end may be + * located on different 2M pages. cleanup_highmap(), however, + * can only consider _end when it runs, so destroy any + * mappings beyond _brk_end here. + */ + pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); + pmd = pmd_offset(pud, _brk_end - 1); + while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) + pmd_clear(pmd); + } #endif __flush_tlb_all(); -- cgit v1.2.3-70-g09d2 From 6407df5ca54a511054200a1eb23f78f723ca1de4 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 8 May 2009 10:51:41 +0800 Subject: x86, kexec: fix crashdump panic with CONFIG_KEXEC_JUMP Tim Starling reported that crashdump will panic with kernel compiled with CONFIG_KEXEC_JUMP due to null pointer deference in machine_kexec_32.c: machine_kexec(), when deferencing kexec_image. Refering to: http://bugzilla.kernel.org/show_bug.cgi?id=13265 This patch fixes the BUG via replacing global variable reference: kexec_image in machine_kexec() with local variable reference: image, which is more appropriate, and will not be null. Same BUG is in machine_kexec_64.c too, so fixed too in the same way. [ Impact: fix crash on kexec ] Reported-by: Tim Starling Signed-off-by: Huang Ying LKML-Reference: <1241751101.6259.85.camel@yhuang-dev.sh.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/machine_kexec_32.c | 4 ++-- arch/x86/kernel/machine_kexec_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index e7368c1da01..c1c429d0013 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -194,7 +194,7 @@ void machine_kexec(struct kimage *image) unsigned int preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) save_processor_state(); #endif @@ -253,7 +253,7 @@ void machine_kexec(struct kimage *image) image->preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) restore_processor_state(); #endif diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 89cea4d4467..84c3bf209e9 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -274,7 +274,7 @@ void machine_kexec(struct kimage *image) int save_ftrace_enabled; #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) save_processor_state(); #endif @@ -333,7 +333,7 @@ void machine_kexec(struct kimage *image) image->preserve_context); #ifdef CONFIG_KEXEC_JUMP - if (kexec_image->preserve_context) + if (image->preserve_context) restore_processor_state(); #endif -- cgit v1.2.3-70-g09d2 From 33df4db04a79660150e1948e3296eeb451ac121b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 May 2009 11:56:44 -0700 Subject: x86: xen, i386: reserve Xen pagetables The Xen pagetables are no longer implicitly reserved as part of the other i386_start_kernel reservations, so make sure we explicitly reserve them. This prevents them from being released into the general kernel free page pool and reused. [ Impact: fix Xen guest crash ] Also-Bisected-by: Bryan Donlan Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel Cc: Linus Torvalds LKML-Reference: <4A032EEC.30509@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9842b121240..e25a78e1113 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1794,6 +1794,11 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + return swapper_pg_dir; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3-70-g09d2 From e5299926d7459d9fa7c7f856983147817aedb10e Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Fri, 8 May 2009 17:28:40 +0900 Subject: x86: MCE: make cmci_discover_lock irq-safe Lockdep reports the warning below when Li tries to offline one cpu: [ 110.835487] ================================= [ 110.835616] [ INFO: inconsistent lock state ] [ 110.835688] 2.6.30-rc4-00336-g8c9ed89 #52 [ 110.835757] --------------------------------- [ 110.835828] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 110.835908] swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 110.835982] (cmci_discover_lock){?.+...}, at: [] cmci_clear+0x30/0x9b cmci_clear() can be called via smp_call_function_single(). It is better to disable interrupt while holding cmci_discover_lock, to turn it into an irq-safe lock - we can deadlock otherwise. [ Impact: fix possible deadlock in the MCE code ] Reported-by: Shaohua Li Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Cc: Andrew Morton LKML-Reference: <4A03ED38.8000700@jp.fujitsu.com> Signed-off-by: Ingo Molnar Reported-by: Shaohua Li --- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index d6b72df89d6..cef3ee30744 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -151,10 +151,11 @@ static void print_update(char *type, int *hdr, int num) static void cmci_discover(int banks, int boot) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long flags; int hdr = 0; int i; - spin_lock(&cmci_discover_lock); + spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; @@ -184,7 +185,7 @@ static void cmci_discover(int banks, int boot) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock(&cmci_discover_lock); + spin_unlock_irqrestore(&cmci_discover_lock, flags); if (hdr) printk(KERN_CONT "\n"); } @@ -211,13 +212,14 @@ void cmci_recheck(void) */ void cmci_clear(void) { + unsigned long flags; int i; int banks; u64 val; if (!cmci_supported(&banks)) return; - spin_lock(&cmci_discover_lock); + spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { if (!test_bit(i, __get_cpu_var(mce_banks_owned))) continue; @@ -227,7 +229,7 @@ void cmci_clear(void) wrmsrl(MSR_IA32_MC0_CTL2 + i, val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } - spin_unlock(&cmci_discover_lock); + spin_unlock_irqrestore(&cmci_discover_lock, flags); } /* -- cgit v1.2.3-70-g09d2