diff options
Diffstat (limited to 'arch/i386/kernel')
64 files changed, 1167 insertions, 917 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 06da59f6f83..dbe5e87e0d6 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o +obj-$(CONFIG_MGEODE_LX) += geode.o obj-$(CONFIG_VMI) += vmi.o vmiclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index a574cd2c8b6..cacdd883bf2 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -618,6 +618,8 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table) #ifdef CONFIG_HPET_TIMER #include <asm/hpet.h> +static struct __initdata resource *hpet_res; + static int __init acpi_parse_hpet(struct acpi_table_header *table) { struct acpi_table_hpet *hpet_tbl; @@ -638,8 +640,42 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); + /* + * Allocate and initialize the HPET firmware resource for adding into + * the resource tree during the lateinit timeframe. + */ +#define HPET_RESOURCE_NAME_SIZE 9 + hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + + if (!hpet_res) + return 0; + + memset(hpet_res, 0, sizeof(*hpet_res)); + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", + hpet_tbl->sequence); + + hpet_res->start = hpet_address; + hpet_res->end = hpet_address + (1 * 1024) - 1; + return 0; } + +/* + * hpet_insert_resource inserts the HPET resources used into the resource + * tree. + */ +static __init int hpet_insert_resource(void) +{ + if (!hpet_res) + return 1; + + return insert_resource(&iomem_resource, hpet_res); +} + +late_initcall(hpet_insert_resource); + #else #define acpi_parse_hpet NULL #endif @@ -950,14 +986,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }, { .callback = force_acpi_ht, - .ident = "DELL GX240", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - }, - }, - { - .callback = force_acpi_ht, .ident = "HP VISUALIZE NT Workstation", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 4ee83577bf6..c42b5ab49de 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -14,7 +14,7 @@ /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; -unsigned long acpi_video_flags; +unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); @@ -68,9 +68,11 @@ static int __init acpi_sleep_setup(char *str) { while ((str != NULL) && (*str != '\0')) { if (strncmp(str, "s3_bios", 7) == 0) - acpi_video_flags = 1; + acpi_realmode_flags |= 1; if (strncmp(str, "s3_mode", 7) == 0) - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); @@ -80,9 +82,11 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); +/* Ouch, we want to delete this. We already have better version in userspace, in + s2ram from suspend.sf.net project */ static __init int reset_videomode_after_s3(struct dmi_system_id *d) { - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; return 0; } diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index a2295a34b2c..ed0a0f2c159 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -13,6 +13,21 @@ # cs = 0x1234, eip = 0x05 # +#define BEEP \ + inb $97, %al; \ + outb %al, $0x80; \ + movb $3, %al; \ + outb %al, $97; \ + outb %al, $0x80; \ + movb $-74, %al; \ + outb %al, $67; \ + outb %al, $0x80; \ + movb $-119, %al; \ + outb %al, $66; \ + outb %al, $0x80; \ + movb $15, %al; \ + outb %al, $66; + ALIGN .align 4096 ENTRY(wakeup_start) @@ -31,6 +46,11 @@ wakeup_code: movw %cs, %ax movw %ax, %ds # Make ds:0 point to wakeup_start movw %ax, %ss + + testl $4, realmode_flags - wakeup_code + jz 1f + BEEP +1: mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board movw $0x0e00 + 'S', %fs:(0x12) @@ -41,7 +61,7 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - testl $1, video_flags - wakeup_code + testl $1, realmode_flags - wakeup_code jz 1f lcall $0xc000,$3 movw %cs, %ax @@ -49,7 +69,7 @@ wakeup_code: movw %ax, %ss 1: - testl $2, video_flags - wakeup_code + testl $2, realmode_flags - wakeup_code jz 1f mov video_mode - wakeup_code, %ax call mode_set @@ -88,7 +108,11 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - ljmpl $__KERNEL_CS,$wakeup_pmode_return + testl $8, realmode_flags - wakeup_code + jz 1f + BEEP +1: + ljmpl $__KERNEL_CS, $wakeup_pmode_return real_save_gdt: .word 0 .long 0 @@ -97,7 +121,8 @@ real_save_cr3: .long 0 real_save_cr4: .long 0 real_magic: .long 0 video_mode: .long 0 -video_flags: .long 0 +realmode_flags: .long 0 +beep_flags: .long 0 real_efer_save_restore: .long 0 real_save_efer_edx: .long 0 real_save_efer_eax: .long 0 @@ -260,8 +285,8 @@ ENTRY(acpi_copy_wakeup_routine) movl saved_videomode, %edx movl %edx, video_mode - wakeup_start (%eax) - movl acpi_video_flags, %edx - movl %edx, video_flags - wakeup_start (%eax) + movl acpi_realmode_flags, %edx + movl %edx, realmode_flags - wakeup_start (%eax) movl $0x12345678, real_magic - wakeup_start (%eax) movl $0x12345678, saved_magic popl %ebx diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index d8cda14fff8..c3750c2c411 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -2,12 +2,17 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/list.h> +#include <linux/kprobes.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/sections.h> +#include <asm/pgtable.h> +#include <asm/mce.h> +#include <asm/nmi.h> -static int noreplace_smp = 0; -static int smp_alt_once = 0; -static int debug_alternative = 0; +#ifdef CONFIG_HOTPLUG_CPU +static int smp_alt_once; static int __init bootonly(char *str) { @@ -15,6 +20,11 @@ static int __init bootonly(char *str) return 1; } __setup("smp-alt-boot", bootonly); +#else +#define smp_alt_once 1 +#endif + +static int debug_alternative; static int __init debug_alt(char *str) { @@ -23,6 +33,8 @@ static int __init debug_alt(char *str) } __setup("debug-alternative", debug_alt); +static int noreplace_smp; + static int __init setup_noreplace_smp(char *str) { noreplace_smp = 1; @@ -144,7 +156,7 @@ static void nop_out(void *insns, unsigned int len) unsigned int noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); + text_poke(insns, noptable[noplen], noplen); insns += noplen; len -= noplen; } @@ -196,7 +208,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ }; } @@ -354,10 +366,6 @@ void apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ nop_out(p->instr + used, p->len - used); } - - /* Sync to be conservative, in case we patched following - * instructions */ - sync_core(); } extern struct paravirt_patch_site __start_parainstructions[], __stop_parainstructions[]; @@ -367,6 +375,14 @@ void __init alternative_instructions(void) { unsigned long flags; + /* The patching is not fully atomic, so try to avoid local interruptions + that might execute the to be patched code. + Other CPUs are not running. */ + stop_nmi(); +#ifdef CONFIG_MCE + stop_mce(); +#endif + local_irq_save(flags); apply_alternatives(__alt_instructions, __alt_instructions_end); @@ -376,8 +392,6 @@ void __init alternative_instructions(void) #ifdef CONFIG_HOTPLUG_CPU if (num_possible_cpus() < 2) smp_alt_once = 1; -#else - smp_alt_once = 1; #endif #ifdef CONFIG_SMP @@ -401,4 +415,37 @@ void __init alternative_instructions(void) #endif apply_paravirt(__parainstructions, __parainstructions_end); local_irq_restore(flags); + + restart_nmi(); +#ifdef CONFIG_MCE + restart_mce(); +#endif +} + +/* + * Warning: + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these instructions. + * And on the local CPU you need to be protected again NMI or MCE handlers + * seeing an inconsistent instruction while you patch. + */ +void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len) +{ + u8 *addr = oaddr; + if (!pte_write(*lookup_address((unsigned long)addr))) { + struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) }; + addr = vmap(p, 2, VM_MAP, PAGE_KERNEL); + if (!addr) + return; + addr += ((unsigned long)oaddr) % PAGE_SIZE; + } + memcpy(addr, opcode, len); + sync_core(); + /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline + case. */ + if (cpu_has_clflush) + asm("clflush (%0) " :: "r" (oaddr) : "memory"); + if (addr != oaddr) + vunmap(addr); } diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 67824f3bb97..bfc6cb7df7e 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write_around(APIC_LVTT, v); break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; } local_irq_restore(flags); @@ -315,7 +318,7 @@ static void __devinit setup_APIC_timer(void) #define LAPIC_CAL_LOOPS (HZ/10) -static __initdata volatile int lapic_cal_loops = -1; +static __initdata int lapic_cal_loops = -1; static __initdata long lapic_cal_t1, lapic_cal_t2; static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; @@ -485,7 +488,7 @@ void __init setup_boot_APIC_clock(void) /* Let the interrupts run */ local_irq_enable(); - while(lapic_cal_loops <= LAPIC_CAL_LOOPS) + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) cpu_relax(); local_irq_disable(); @@ -521,6 +524,9 @@ void __init setup_boot_APIC_clock(void) */ if (nmi_watchdog != NMI_IO_APIC) lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); } /* Setup the lapic or request the broadcast */ diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 4112afe712b..47001d50a08 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -222,6 +222,7 @@ #include <linux/capability.h> #include <linux/device.h> #include <linux/kernel.h> +#include <linux/freezer.h> #include <linux/smp.h> #include <linux/dmi.h> #include <linux/suspend.h> @@ -2311,7 +2312,6 @@ static int __init apm_init(void) remove_proc_entry("apm", NULL); return err; } - kapmd_task->flags |= PF_NOFREEZE; wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 27a776c9044..7288ac88d74 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -17,6 +17,13 @@ #include <asm/thread_info.h> #include <asm/elf.h> +#include <xen/interface/xen.h> + +#ifdef CONFIG_LGUEST_GUEST +#include <linux/lguest.h> +#include "../../../drivers/lguest/lg.h" +#endif + #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -59,6 +66,7 @@ void foo(void) OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); + OFFSET(TI_cpu, thread_info, cpu); BLANK(); OFFSET(GDS_size, Xgt_desc_struct, size); @@ -115,4 +123,25 @@ void foo(void) OFFSET(PARAVIRT_iret, paravirt_ops, iret); OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); #endif + +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#endif + +#ifdef CONFIG_LGUEST_GUEST + BLANK(); + OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); + OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); + OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); + OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp); + OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc); + OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc); + OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt); + OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum); + OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); + OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); +#endif } diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 74f27a463db..778396c78d6 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -8,8 +8,7 @@ obj-y += amd.o obj-y += cyrix.o obj-y += centaur.o obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o -obj-y += rise.o +obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o obj-y += nexgen.o obj-y += umc.o diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c new file mode 100644 index 00000000000..3e91d3ee26e --- /dev/null +++ b/arch/i386/kernel/cpu/addon_cpuid_features.c @@ -0,0 +1,50 @@ + +/* + * Routines to indentify additional cpu features that are scattered in + * cpuid space. + */ + +#include <linux/cpu.h> + +#include <asm/processor.h> + +struct cpuid_bit { + u16 feature; + u8 reg; + u8 bit; + u32 level; +}; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ + u32 max_level; + u32 regs[4]; + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { 0, 0, 0, 0 } + }; + + for (cb = cpuid_bits; cb->feature; cb++) { + + /* Verify that the level is valid */ + max_level = cpuid_eax(cb->level & 0xffff0000); + if (max_level < cb->level || + max_level > (cb->level | 0xffff)) + continue; + + cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], + ®s[CR_ECX], ®s[CR_EDX]); + + if (regs[cb->reg] & (1 << cb->bit)) + set_bit(cb->feature, c->x86_capability); + } +} diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 6f47eeeb93e..c7ba455d5ac 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -231,6 +231,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: set_bit(X86_FEATURE_K8, c->x86_capability); break; case 6: @@ -272,8 +275,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif - if (cpuid_eax(0x80000000) >= 0x80000006) - num_cache_leaves = 3; + if (cpuid_eax(0x80000000) >= 0x80000006) { + if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } if (amd_apic_timer_broken()) set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 794d593c47e..d506201d397 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -353,6 +353,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) if ( xlvl >= 0x80000004 ) get_model_name(c); /* Default name */ } + + init_scattered_cpuid_features(c); } early_intel_workaround(c); @@ -604,7 +606,6 @@ extern int nsc_init_cpu(void); extern int amd_init_cpu(void); extern int centaur_init_cpu(void); extern int transmeta_init_cpu(void); -extern int rise_init_cpu(void); extern int nexgen_init_cpu(void); extern int umc_init_cpu(void); @@ -616,7 +617,6 @@ void __init early_cpu_init(void) amd_init_cpu(); centaur_init_cpu(); transmeta_init_cpu(); - rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); early_cpu_detect(); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index e912aae9473..094118ba00d 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -90,10 +90,17 @@ config X86_POWERNOW_K8 If in doubt, say N. config X86_POWERNOW_K8_ACPI - bool - depends on X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) + bool "ACPI Support" + select ACPI_PROCESSOR + depends on X86_POWERNOW_K8 default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" @@ -109,7 +116,7 @@ config X86_GX_SUSPMOD config X86_SPEEDSTEP_CENTRINO tristate "Intel Enhanced SpeedStep" select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI) + select X86_SPEEDSTEP_CENTRINO_TABLE help This adds the CPUFreq driver for Enhanced SpeedStep enabled mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, @@ -121,20 +128,6 @@ config X86_SPEEDSTEP_CENTRINO If in doubt, say N. -config X86_SPEEDSTEP_CENTRINO_ACPI - bool "Use ACPI tables to decode valid frequency/voltage (deprecated)" - depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR - depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) - help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. - Use primarily the information provided in the BIOS ACPI tables - to determine valid CPU frequency and voltage pairings. It is - required for the driver to work on non-Banias CPUs. - - If in doubt, say Y. - config X86_SPEEDSTEP_CENTRINO_TABLE bool "Built-in tables for Banias CPUs" depends on X86_SPEEDSTEP_CENTRINO @@ -230,7 +223,7 @@ comment "shared options" config X86_ACPI_CPUFREQ_PROC_INTF bool "/proc/acpi/processor/../performance interface (deprecated)" depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI help This enables the deprecated /proc/acpi/processor/../performance interface. While it is helpful for debugging, the generic, diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 10baa3501ed..6f846bee210 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -167,11 +167,13 @@ static void do_drv_read(struct drv_cmd *cmd) static void do_drv_write(struct drv_cmd *cmd) { - u32 h = 0; + u32 lo, hi; switch (cmd->type) { case SYSTEM_INTEL_MSR_CAPABLE: - wrmsr(cmd->addr.msr.reg, cmd->val, h); + rdmsr(cmd->addr.msr.reg, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); + wrmsr(cmd->addr.msr.reg, lo, hi); break; case SYSTEM_IO_CAPABLE: acpi_os_write_port((acpi_io_address)cmd->addr.io.port, @@ -372,7 +374,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; struct drv_cmd cmd; - unsigned int msr; unsigned int next_state = 0; /* Index into freq_table */ unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; @@ -417,11 +418,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - msr = - (u32) perf->states[next_perf_state]. - control & INTEL_MSR_RANGE; - cmd.val = get_cur_val(online_policy_cpus); - cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr; + cmd.val = (u32) perf->states[next_perf_state].control; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; @@ -668,8 +665,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ for (i=0; i<perf->state_count; i++) { - if (i>0 && perf->states[i].core_frequency == - perf->states[i-1].core_frequency) + if (i>0 && perf->states[i].core_frequency >= + data->freq_table[valid_states-1].frequency / 1000) continue; data->freq_table[valid_states].index = i; diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 0d49d73d1b7..66acd503991 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -391,8 +391,6 @@ static struct cpufreq_driver nforce2_driver = { */ static unsigned int nforce2_detect_chipset(void) { - u8 revision; - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, PCI_ANY_ID, PCI_ANY_ID, NULL); @@ -400,10 +398,8 @@ static unsigned int nforce2_detect_chipset(void) if (nforce2_chipset_dev == NULL) return -ENODEV; - pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision); - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - revision); + nforce2_chipset_dev->revision); printk(KERN_INFO "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 6667e9cceb9..461dabc4e49 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -79,7 +79,7 @@ #include <linux/smp.h> #include <linux/cpufreq.h> #include <linux/pci.h> -#include <asm/processor.h> +#include <asm/processor-cyrix.h> #include <asm/errno.h> /* PCI config registers, all at F0 */ @@ -115,7 +115,6 @@ struct gxfreq_params { u8 pci_suscfg; u8 pci_pmer1; u8 pci_pmer2; - u8 pci_rev; struct pci_dev *cs55x0; }; @@ -276,7 +275,7 @@ static void gx_set_cpuspeed(unsigned int khz) pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ + if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ suscfg = gx_params->pci_suscfg | SUSMOD; } else { /* CS5530A,B.. */ suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; @@ -471,7 +470,6 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, ¶ms->pci_rev); if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index a3df9c039bd..ef8f0bc3fc7 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,6 +29,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/delay.h> #include <asm/msr.h> #include <asm/timex.h> @@ -55,7 +56,6 @@ /* Flags */ #define USE_ACPI_C3 (1 << 1) #define USE_NORTHBRIDGE (1 << 2) -#define USE_VT8235 (1 << 3) static int cpu_model; static unsigned int numscales=16; @@ -63,22 +63,19 @@ static unsigned int fsb; static const struct mV_pos *vrm_mV_table; static const unsigned char *mV_vrm_table; -struct f_msr { - u8 vrm; - u8 pos; -}; -static struct f_msr f_msr_table[32]; static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; +static u32 acpi_regs_addr; static u8 longhaul_flags; -static u8 longhaul_pos; +static unsigned int longhaul_index; /* Module parameters */ static int scale_voltage; +static int disable_acpi_c3; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -144,7 +141,7 @@ static void do_longhaul1(unsigned int clock_ratio_index) rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index; + bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; /* Sync to timer tick */ safe_halt(); @@ -163,14 +160,12 @@ static void do_longhaul1(unsigned int clock_ratio_index) /* For processor with Longhaul MSR */ -static void do_powersaver(int cx_address, unsigned int clock_ratio_index) +static void do_powersaver(int cx_address, unsigned int clock_ratio_index, + unsigned int dir) { union msr_longhaul longhaul; - u8 dest_pos; u32 t; - dest_pos = f_msr_table[clock_ratio_index].pos; - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Setup new frequency */ longhaul.bits.RevisionKey = longhaul.bits.RevisionID; @@ -178,11 +173,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; /* Setup new voltage */ if (can_scale_voltage) - longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; + longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; /* Sync to timer tick */ safe_halt(); /* Raise voltage if necessary */ - if (can_scale_voltage && longhaul_pos < dest_pos) { + if (can_scale_voltage && dir) { longhaul.bits.EnableSoftVID = 1; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ @@ -199,7 +194,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) } longhaul.bits.EnableSoftVID = 0; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - longhaul_pos = dest_pos; } /* Change frequency on next halt or sleep */ @@ -220,7 +214,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Reduce voltage if necessary */ - if (can_scale_voltage && longhaul_pos > dest_pos) { + if (can_scale_voltage && !dir) { longhaul.bits.EnableSoftVID = 1; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ @@ -237,7 +231,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) } longhaul.bits.EnableSoftVID = 0; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - longhaul_pos = dest_pos; } } @@ -248,25 +241,28 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) * Sets a new clock ratio. */ -static void longhaul_setstate(unsigned int clock_ratio_index) +static void longhaul_setstate(unsigned int table_index) { + unsigned int clock_ratio_index; int speed, mult; struct cpufreq_freqs freqs; - static unsigned int old_ratio=-1; unsigned long flags; unsigned int pic1_mask, pic2_mask; + u16 bm_status = 0; + u32 bm_timeout = 1000; + unsigned int dir = 0; - if (old_ratio == clock_ratio_index) - return; - old_ratio = clock_ratio_index; - - mult = clock_ratio[clock_ratio_index]; + clock_ratio_index = longhaul_table[table_index].index; + /* Safety precautions */ + mult = clock_ratio[clock_ratio_index & 0x1f]; if (mult == -1) return; - speed = calc_speed(mult); if ((speed > highest_speed) || (speed < lowest_speed)) return; + /* Voltage transition before frequency transition? */ + if (can_scale_voltage && longhaul_index < table_index) + dir = 1; freqs.old = calc_speed(longhaul_get_cpu_mult()); freqs.new = speed; @@ -285,11 +281,24 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ + /* Wait while PCI bus is busy. */ + if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE + || ((pr != NULL) && pr->flags.bm_control))) { + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + while (bm_status && bm_timeout) { + outw(1 << 4, acpi_regs_addr); + bm_timeout--; + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + } + } + if (longhaul_flags & USE_NORTHBRIDGE) { /* Disable AGP and PCI arbiters */ outb(3, 0x22); } else if ((pr != NULL) && pr->flags.bm_control) { - /* Disable bus master arbitration */ + /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); } switch (longhaul_version) { @@ -314,9 +323,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) if (longhaul_flags & USE_ACPI_C3) { /* Don't allow wakeup */ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index); + do_powersaver(cx->address, clock_ratio_index, dir); } else { - do_powersaver(0, clock_ratio_index); + do_powersaver(0, clock_ratio_index, dir); } break; } @@ -336,6 +345,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) freqs.new = calc_speed(longhaul_get_cpu_mult()); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + if (!bm_timeout) + printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); } /* @@ -369,7 +381,8 @@ static int guess_fsb(int mult) static int __init longhaul_get_ranges(void) { - unsigned int j, k = 0; + unsigned int i, j, k = 0; + unsigned int ratio; int mult; /* Get current frequency */ @@ -423,8 +436,7 @@ static int __init longhaul_get_ranges(void) if(!longhaul_table) return -ENOMEM; - for (j=0; j < numscales; j++) { - unsigned int ratio; + for (j = 0; j < numscales; j++) { ratio = clock_ratio[j]; if (ratio == -1) continue; @@ -434,13 +446,41 @@ static int __init longhaul_get_ranges(void) longhaul_table[k].index = j; k++; } + if (k <= 1) { + kfree(longhaul_table); + return -ENODEV; + } + /* Sort */ + for (j = 0; j < k - 1; j++) { + unsigned int min_f, min_i; + min_f = longhaul_table[j].frequency; + min_i = j; + for (i = j + 1; i < k; i++) { + if (longhaul_table[i].frequency < min_f) { + min_f = longhaul_table[i].frequency; + min_i = i; + } + } + if (min_i != j) { + unsigned int temp; + temp = longhaul_table[j].frequency; + longhaul_table[j].frequency = longhaul_table[min_i].frequency; + longhaul_table[min_i].frequency = temp; + temp = longhaul_table[j].index; + longhaul_table[j].index = longhaul_table[min_i].index; + longhaul_table[min_i].index = temp; + } + } longhaul_table[k].frequency = CPUFREQ_TABLE_END; - if (!k) { - kfree (longhaul_table); - return -EINVAL; - } + /* Find index we are running on */ + for (j = 0; j < k; j++) { + if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + longhaul_index = j; + break; + } + } return 0; } @@ -448,7 +488,7 @@ static int __init longhaul_get_ranges(void) static void __init longhaul_setup_voltagescaling(void) { union msr_longhaul longhaul; - struct mV_pos minvid, maxvid; + struct mV_pos minvid, maxvid, vid; unsigned int j, speed, pos, kHz_step, numvscales; int min_vid_speed; @@ -459,11 +499,11 @@ static void __init longhaul_setup_voltagescaling(void) } if (!longhaul.bits.VRMRev) { - printk (KERN_INFO PFX "VRM 8.5\n"); + printk(KERN_INFO PFX "VRM 8.5\n"); vrm_mV_table = &vrm85_mV[0]; mV_vrm_table = &mV_vrm85[0]; } else { - printk (KERN_INFO PFX "Mobile VRM\n"); + printk(KERN_INFO PFX "Mobile VRM\n"); if (cpu_model < CPU_NEHEMIAH) return; vrm_mV_table = &mobilevrm_mV[0]; @@ -523,7 +563,6 @@ static void __init longhaul_setup_voltagescaling(void) /* Calculate kHz for one voltage step */ kHz_step = (highest_speed - min_vid_speed) / numvscales; - j = 0; while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { speed = longhaul_table[j].frequency; @@ -531,15 +570,14 @@ static void __init longhaul_setup_voltagescaling(void) pos = (speed - min_vid_speed) / kHz_step + minvid.pos; else pos = minvid.pos; - f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; - f_msr_table[longhaul_table[j].index].pos = pos; + longhaul_table[j].index |= mV_vrm_table[pos] << 8; + vid = vrm_mV_table[mV_vrm_table[pos]]; + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); j++; } - longhaul_pos = maxvid.pos; can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled. " - "Use of \"conservative\" governor is highly recommended.\n"); + printk(KERN_INFO PFX "Voltage scaling enabled.\n"); } @@ -553,15 +591,44 @@ static int longhaul_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int table_index = 0; - unsigned int new_clock_ratio = 0; + unsigned int i; + unsigned int dir = 0; + u8 vid, current_vid; if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) return -EINVAL; - new_clock_ratio = longhaul_table[table_index].index & 0xFF; - - longhaul_setstate(new_clock_ratio); + /* Don't set same frequency again */ + if (longhaul_index == table_index) + return 0; + if (!can_scale_voltage) + longhaul_setstate(table_index); + else { + /* On test system voltage transitions exceeding single + * step up or down were turning motherboard off. Both + * "ondemand" and "userspace" are unsafe. C7 is doing + * this in hardware, C3 is old and we need to do this + * in software. */ + i = longhaul_index; + current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + if (table_index > longhaul_index) + dir = 1; + while (i != table_index) { + vid = (longhaul_table[i].index >> 8) & 0x1f; + if (vid != current_vid) { + longhaul_setstate(i); + current_vid = vid; + msleep(200); + } + if (dir) + i++; + else + i--; + } + longhaul_setstate(table_index); + } + longhaul_index = table_index; return 0; } @@ -590,11 +657,10 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, static int enable_arbiter_disable(void) { struct pci_dev *dev; - int status; + int status = 1; int reg; u8 pci_cmd; - status = 1; /* Find PLE133 host bridge */ reg = 0x78; dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, @@ -627,13 +693,17 @@ static int enable_arbiter_disable(void) return 0; } -static int longhaul_setup_vt8235(void) +static int longhaul_setup_southbridge(void) { struct pci_dev *dev; u8 pci_cmd; /* Find VT8235 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); + if (dev == NULL) + /* Find VT8237 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8237, NULL); if (dev != NULL) { /* Set transition time to max */ pci_read_config_byte(dev, 0xec, &pci_cmd); @@ -645,6 +715,14 @@ static int longhaul_setup_vt8235(void) pci_read_config_byte(dev, 0xe5, &pci_cmd); pci_cmd |= 1 << 7; pci_write_config_byte(dev, 0xe5, pci_cmd); + /* Get address of ACPI registers block*/ + pci_read_config_byte(dev, 0x81, &pci_cmd); + if (pci_cmd & 1 << 7) { + pci_read_config_dword(dev, 0x88, &acpi_regs_addr); + acpi_regs_addr &= 0xff00; + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + } + pci_dev_put(dev); return 1; } @@ -657,7 +735,6 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) char *cpuname=NULL; int ret; u32 lo, hi; - int vt8235_present; /* Check what we have on this motherboard */ switch (c->x86_model) { @@ -755,7 +832,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) }; /* Doesn't hurt */ - vt8235_present = longhaul_setup_vt8235(); + longhaul_setup_southbridge(); /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, @@ -765,35 +842,29 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Check ACPI support for C3 state */ if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address > 0 && cx->latency <= 1000) { + if (cx->address > 0 && cx->latency <= 1000) longhaul_flags |= USE_ACPI_C3; - goto print_support_type; - } } + /* Disable if it isn't working */ + if (disable_acpi_c3) + longhaul_flags &= ~USE_ACPI_C3; /* Check if northbridge is friendly */ - if (enable_arbiter_disable()) { + if (enable_arbiter_disable()) longhaul_flags |= USE_NORTHBRIDGE; - goto print_support_type; - } - /* Use VT8235 southbridge if present */ - if (longhaul_version == TYPE_POWERSAVER && vt8235_present) { - longhaul_flags |= USE_VT8235; - goto print_support_type; - } + /* Check ACPI support for bus master arbiter disable */ - if ((pr == NULL) || !(pr->flags.bm_control)) { + if (!(longhaul_flags & USE_ACPI_C3 + || longhaul_flags & USE_NORTHBRIDGE) + && ((pr == NULL) || !(pr->flags.bm_control))) { printk(KERN_ERR PFX "No ACPI support. Unsupported northbridge.\n"); return -ENODEV; } -print_support_type: if (longhaul_flags & USE_NORTHBRIDGE) - printk (KERN_INFO PFX "Using northbridge support.\n"); - else if (longhaul_flags & USE_VT8235) - printk (KERN_INFO PFX "Using VT8235 support.\n"); - else - printk (KERN_INFO PFX "Using ACPI support.\n"); + printk(KERN_INFO PFX "Using northbridge support.\n"); + if (longhaul_flags & USE_ACPI_C3) + printk(KERN_INFO PFX "Using ACPI support.\n"); ret = longhaul_get_ranges(); if (ret != 0) @@ -885,6 +956,9 @@ static void __exit longhaul_exit(void) kfree(longhaul_table); } +module_param (disable_acpi_c3, int, 0644); +MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); + module_param (scale_voltage, int, 0644); MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index 102548f1284..4fcc320997d 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -180,7 +180,7 @@ static const int __initdata ezrat_clock_ratio[32] = { -1, /* 0000 -> RESERVED (10.0x) */ 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ + -1, /* 0010 -> 12.0x */ -1, /* 0011 -> RESERVED (9.0x)*/ 105, /* 0100 -> 10.5x */ 115, /* 0101 -> 11.5x */ @@ -237,7 +237,7 @@ static const int __initdata ezrat_eblcr[32] = { static const int __initdata nehemiah_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ - 160, /* 0001 -> 16.0x */ + -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ 90, /* 0011 -> 9.0x */ 95, /* 0100 -> 9.5x */ @@ -252,10 +252,10 @@ static const int __initdata nehemiah_clock_ratio[32] = { 75, /* 1101 -> 7.5x */ 85, /* 1110 -> 8.5x */ 120, /* 1111 -> 12.0x */ - 100, /* 0000 -> 10.0x */ + -1, /* 0000 -> 10.0x */ 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - 90, /* 0011 -> 9.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> 9.0x */ 105, /* 0100 -> 10.5x */ 115, /* 0101 -> 11.5x */ 125, /* 0110 -> 12.5x */ @@ -267,7 +267,7 @@ static const int __initdata nehemiah_clock_ratio[32] = { 145, /* 1100 -> 14.5x */ 155, /* 1101 -> 15.5x */ -1, /* 1110 -> RESERVED (13.0x) */ - 120, /* 1111 -> 12.0x */ + -1, /* 1111 -> 12.0x */ }; static const int __initdata nehemiah_eblcr[32] = { diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 4ade55c5f33..34ed53a0673 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -599,14 +599,17 @@ static void print_basics(struct powernow_k8_data *data) for (j = 0; j < data->numps; j++) { if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8, - (data->powernow_table[j].index & 0xff0000) >> 16, - data->powernow_table[j].frequency/1000); + printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", + j, + (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, - data->powernow_table[j].index & 0xff, - data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8); + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + j, + data->powernow_table[j].index & 0xff, + data->powernow_table[j].frequency/1000, + data->powernow_table[j].index >> 8); } } } @@ -1086,7 +1089,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (cpu_family == CPU_HW_PSTATE) dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currvid); + data->currfid, data->currdid); else { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1322,16 +1325,22 @@ static struct cpufreq_driver cpufreq_amd64_driver = { static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; + unsigned int booted_cores = 1; for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } +#ifdef CONFIG_SMP + booted_cores = cpu_data[0].booted_cores; +#endif + if (supported_cpus == num_online_cpus()) { printk(KERN_INFO PFX "Found %d %s " - "processors (" VERSION ")\n", supported_cpus, - boot_cpu_data.x86_model_id); + "processors (%d cpu cores) (" VERSION ")\n", + supported_cpus/booted_cores, + boot_cpu_data.x86_model_id, supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 35489fd6885..6c5dc2c85ae 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -21,12 +21,6 @@ #include <linux/delay.h> #include <linux/compiler.h> -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI -#include <linux/acpi.h> -#include <linux/dmi.h> -#include <acpi/processor.h> -#endif - #include <asm/msr.h> #include <asm/processor.h> #include <asm/cpufeature.h> @@ -257,9 +251,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) /* Matched a non-match */ dprintk("no table support for CPU model \"%s\"\n", cpu->x86_model_id); -#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); -#endif + dprintk("try using the acpi-cpufreq driver\n"); return -ENOENT; } @@ -346,213 +338,6 @@ static unsigned int get_cur_freq(unsigned int cpu) } -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - -static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; - -/* - * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States - * library - * - * Before doing the actual init, we need to do _PSD related setup whenever - * supported by the BIOS. These are handled by this early_init routine. - */ -static int centrino_cpu_early_init_acpi(void) -{ - unsigned int i, j; - struct acpi_processor_performance *data; - - for_each_possible_cpu(i) { - data = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!data) { - for_each_possible_cpu(j) { - kfree(acpi_perf_data[j]); - acpi_perf_data[j] = NULL; - } - return (-ENOMEM); - } - acpi_perf_data[i] = data; - } - - acpi_processor_preregister_performance(acpi_perf_data); - return 0; -} - - -#ifdef CONFIG_SMP -/* - * Some BIOSes do SW_ANY coordination internally, either set it up in hw - * or do it in BIOS firmware and won't inform about it to OS. If not - * detected, this has a side effect of making CPU run at a different speed - * than OS intended it to run at. Detect it and handle it cleanly. - */ -static int bios_with_sw_any_bug; -static int sw_any_bug_found(struct dmi_system_id *d) -{ - bios_with_sw_any_bug = 1; - return 0; -} - -static struct dmi_system_id sw_any_bug_dmi_table[] = { - { - .callback = sw_any_bug_found, - .ident = "Supermicro Server X6DLP", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), - DMI_MATCH(DMI_BIOS_VERSION, "080010"), - DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), - }, - }, - { } -}; -#endif - -/* - * centrino_cpu_init_acpi - register with ACPI P-States library - * - * Register with the ACPI P-States library (part of drivers/acpi/processor.c) - * in order to determine correct frequency and voltage pairings by reading - * the _PSS of the ACPI DSDT or SSDT tables. - */ -static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) -{ - unsigned long cur_freq; - int result = 0, i; - unsigned int cpu = policy->cpu; - struct acpi_processor_performance *p; - - p = acpi_perf_data[cpu]; - - /* register with ACPI core */ - if (acpi_processor_register_performance(p, cpu)) { - dprintk(PFX "obtaining ACPI data failed\n"); - return -EIO; - } - - policy->shared_type = p->shared_type; - /* - * Will let policy->cpus know about dependency only when software - * coordination is required. - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = p->shared_cpu_map; - } - -#ifdef CONFIG_SMP - dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; - } -#endif - - /* verify the acpi_data */ - if (p->state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Invalid control/status registers (%x - %x)\n", - p->control_register.space_id, p->status_register.space_id); - result = -EIO; - goto err_unreg; - } - - for (i=0; i<p->state_count; i++) { - if ((p->states[i].control & INTEL_MSR_RANGE) != - (p->states[i].status & INTEL_MSR_RANGE)) { - dprintk("Different MSR bits in control (%llu) and status (%llu)\n", - p->states[i].control, p->states[i].status); - result = -EINVAL; - goto err_unreg; - } - - if (!p->states[i].core_frequency) { - dprintk("Zero core frequency for state %u\n", i); - result = -EINVAL; - goto err_unreg; - } - - if (p->states[i].core_frequency > p->states[0].core_frequency) { - dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i, - p->states[i].core_frequency, p->states[0].core_frequency); - p->states[i].core_frequency = 0; - continue; - } - } - - centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL); - if (!centrino_model[cpu]) { - result = -ENOMEM; - goto err_unreg; - } - - centrino_model[cpu]->model_name=NULL; - centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000; - centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) * - (p->state_count + 1), GFP_KERNEL); - if (!centrino_model[cpu]->op_points) { - result = -ENOMEM; - goto err_kfree; - } - - for (i=0; i<p->state_count; i++) { - centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE; - centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000; - dprintk("adding state %i with frequency %u and control value %04x\n", - i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); - } - centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END; - - cur_freq = get_cur_freq(cpu); - - for (i=0; i<p->state_count; i++) { - if (!p->states[i].core_frequency) { - dprintk("skipping state %u\n", i); - centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) != - (centrino_model[cpu]->op_points[i].frequency)) { - dprintk("Invalid encoded frequency (%u vs. %u)\n", - extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0), - centrino_model[cpu]->op_points[i].frequency); - result = -EINVAL; - goto err_kfree_all; - } - - if (cur_freq == centrino_model[cpu]->op_points[i].frequency) - p->state = i; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI " - "config is deprecated.\n " - "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" ); - - return 0; - - err_kfree_all: - kfree(centrino_model[cpu]->op_points); - err_kfree: - kfree(centrino_model[cpu]); - err_unreg: - acpi_processor_unregister_performance(p, cpu); - dprintk(PFX "invalid ACPI data\n"); - return (result); -} -#else -static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; } -static inline int centrino_cpu_early_init_acpi(void) { return 0; } -#endif - static int centrino_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; @@ -568,27 +353,25 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - if (centrino_cpu_init_acpi(policy)) { - if (policy->cpu != 0) - return -ENODEV; + if (policy->cpu != 0) + return -ENODEV; - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; - if (!centrino_cpu[policy->cpu]) { - dprintk("found unsupported CPU with " - "Enhanced SpeedStep: send /proc/cpuinfo to " - MAINTAINER "\n"); - return -ENODEV; - } + if (!centrino_cpu[policy->cpu]) { + dprintk("found unsupported CPU with " + "Enhanced SpeedStep: send /proc/cpuinfo to " + MAINTAINER "\n"); + return -ENODEV; + } - if (centrino_cpu_init_table(policy)) { - return -ENODEV; - } + if (centrino_cpu_init_table(policy)) { + return -ENODEV; } /* Check to see if Enhanced SpeedStep is enabled, and try to @@ -634,20 +417,6 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) cpufreq_frequency_table_put_attr(cpu); -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - if (!centrino_model[cpu]->model_name) { - static struct acpi_processor_performance *p; - - if (acpi_perf_data[cpu]) { - p = acpi_perf_data[cpu]; - dprintk("unregistering and freeing ACPI data\n"); - acpi_processor_unregister_performance(p, cpu); - kfree(centrino_model[cpu]->op_points); - kfree(centrino_model[cpu]); - } - } -#endif - centrino_model[cpu] = NULL; return 0; @@ -849,25 +618,12 @@ static int __init centrino_init(void) if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - centrino_cpu_early_init_acpi(); - return cpufreq_register_driver(¢rino_driver); } static void __exit centrino_exit(void) { -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - unsigned int j; -#endif - cpufreq_unregister_driver(¢rino_driver); - -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - for_each_possible_cpu(j) { - kfree(acpi_perf_data[j]); - acpi_perf_data[j] = NULL; - } -#endif } MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 698f980eb44..a5b2346faf1 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -205,7 +205,6 @@ static unsigned int speedstep_detect_chipset (void) * host brige. Abort on these systems. */ static struct pci_dev *hostbridge; - u8 rev = 0; hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_MC, @@ -216,8 +215,7 @@ static unsigned int speedstep_detect_chipset (void) if (!hostbridge) return 2; /* 2-M */ - pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev); - if (rev < 5) { + if (hostbridge->revision < 5) { dprintk("hostbridge does not support speedstep\n"); speedstep_chipset_dev = NULL; pci_dev_put(hostbridge); diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index e88d2fba156..122d2d75aa9 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -4,7 +4,7 @@ #include <linux/pci.h> #include <asm/dma.h> #include <asm/io.h> -#include <asm/processor.h> +#include <asm/processor-cyrix.h> #include <asm/timer.h> #include <asm/pci-direct.h> #include <asm/tsc.h> diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index e5be819492e..d5a456d27d8 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -4,7 +4,7 @@ * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. - * Andi Kleen : CPUID4 emulation on AMD. + * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ #include <linux/init.h> @@ -135,7 +135,7 @@ unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: - No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. + L2 not shared, no SMT etc. that is currently true on AMD CPUs. In theory the TLBs could be reported as fake type (they are in "dummy"). Maybe later */ @@ -159,13 +159,26 @@ union l2_cache { unsigned val; }; +union l3_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned res : 2; + unsigned size_encoded : 14; + }; + unsigned val; +}; + static const unsigned short assocs[] = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, + [8] = 16, [0xa] = 32, [0xb] = 48, + [0xc] = 64, [0xf] = 0xffff // ?? - }; -static const unsigned char levels[] = { 1, 1, 2 }; -static const unsigned char types[] = { 1, 2, 3 }; +}; + +static const unsigned char levels[] = { 1, 1, 2, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, @@ -175,37 +188,58 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, unsigned line_size, lines_per_tag, assoc, size_in_kb; union l1_cache l1i, l1d; union l2_cache l2; + union l3_cache l3; + union l1_cache *l1 = &l1d; eax->full = 0; ebx->full = 0; ecx->full = 0; cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); - cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); - - if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) - return; - - eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; - - if (leaf <= 1) { - union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; + cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + + switch (leaf) { + case 1: + l1 = &l1i; + case 0: + if (!l1->val) + return; assoc = l1->assoc; line_size = l1->line_size; lines_per_tag = l1->lines_per_tag; size_in_kb = l1->size_in_kb; - } else { + break; + case 2: + if (!l2.val) + return; assoc = l2.assoc; line_size = l2.line_size; lines_per_tag = l2.lines_per_tag; /* cpu_data has errata corrections for K7 applied */ size_in_kb = current_cpu_data.x86_cache_size; + break; + case 3: + if (!l3.val) + return; + assoc = l3.assoc; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; + break; + default: + return; } + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + if (leaf == 3) + eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + else + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + if (assoc == 0xf) eax->split.is_fully_associative = 1; ebx->split.coherency_line_size = line_size - 1; @@ -239,8 +273,7 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le return 0; } -/* will only be called once; __init is safe here */ -static int __init find_num_cache_leaves(void) +static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; @@ -710,7 +743,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) return retval; } -static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) +static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 56cd485b127..34c781eddee 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -60,6 +60,20 @@ void mcheck_init(struct cpuinfo_x86 *c) } } +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + static int __init mcheck_disable(char *str) { mce_disabled = 1; diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c index 6b5d3518a1c..bf39409b383 100644 --- a/arch/i386/kernel/cpu/mcheck/non-fatal.c +++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c @@ -57,7 +57,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); static void mce_work_fn(struct work_struct *work) { on_each_cpu(mce_checkregs, NULL, 1, 1); - schedule_delayed_work(&mce_work, MCE_RATE); + schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); } static int __init init_nonfatal_mce_checker(void) @@ -82,7 +82,7 @@ static int __init init_nonfatal_mce_checker(void) /* * Check for non-fatal errors every MCE_RATE s */ - schedule_delayed_work(&mce_work, MCE_RATE); + schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); printk(KERN_INFO "Machine check exception polling timer started.\n"); return 0; } diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 7ba7c3abd3a..1203dc5ab87 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, int err; sys_dev = get_cpu_sysdev(cpu); - mutex_lock(&therm_cpu_lock); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: + mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + mutex_lock(&therm_cpu_lock); thermal_throttle_remove_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); break; } - mutex_unlock(&therm_cpu_lock); return NOTIFY_OK; } diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 1001f1e0fe6..2287d4863a8 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -3,6 +3,7 @@ #include <asm/mtrr.h> #include <asm/msr.h> #include <asm/io.h> +#include <asm/processor-cyrix.h> #include "mtrr.h" int arr3_protected; diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index f6e46943e6e..56f64e34829 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -79,7 +79,7 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) } /* Grab all of the MTRR state for this CPU into *state */ -void get_mtrr_state(void) +void __init get_mtrr_state(void) { unsigned int i; struct mtrr_var_range *vrs; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 75dc6d5214b..c48b6fea5ab 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -643,7 +643,7 @@ static struct sysdev_driver mtrr_sysdev_driver = { * initialized (i.e. before smp_init()). * */ -__init void mtrr_bp_init(void) +void __init mtrr_bp_init(void) { init_ifs(); diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c index 7b39a2f954d..c9014ca4a57 100644 --- a/arch/i386/kernel/cpu/mtrr/state.c +++ b/arch/i386/kernel/cpu/mtrr/state.c @@ -3,6 +3,7 @@ #include <asm/io.h> #include <asm/mtrr.h> #include <asm/msr.h> +#include <asm-i386/processor-cyrix.h> #include "mtrr.h" diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c index 4d26d514c56..4be488e73be 100644 --- a/arch/i386/kernel/cpu/perfctr-watchdog.c +++ b/arch/i386/kernel/cpu/perfctr-watchdog.c @@ -325,7 +325,7 @@ static struct wd_ops k7_wd_ops = { .stop = single_msr_stop_watchdog, .perfctr = MSR_K7_PERFCTR0, .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL<<63, + .checkbit = 1ULL<<47, }; /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ @@ -346,7 +346,9 @@ static int setup_p6_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P6_PERFCTR0; evntsel_msr = MSR_P6_EVNTSEL0; - wrmsrl(perfctr_msr, 0UL); + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS @@ -599,8 +601,8 @@ static struct wd_ops intel_arch_wd_ops = { .setup = setup_intel_arch_watchdog, .rearm = p6_rearm, .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; static void probe_nmi_watchdog(void) diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89d91e6cc97..1e31b6caffb 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -29,7 +29,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -40,8 +41,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ @@ -57,9 +59,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", - "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c deleted file mode 100644 index 50076f22e90..00000000000 --- a/arch/i386/kernel/cpu/rise.c +++ /dev/null @@ -1,52 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <asm/processor.h> - -#include "cpu.h" - -static void __cpuinit init_rise(struct cpuinfo_x86 *c) -{ - printk("CPU: Rise iDragon"); - if (c->x86_model > 2) - printk(" II"); - printk("\n"); - - /* Unhide possibly hidden capability flags - The mp6 iDragon family don't have MSRs. - We switch on extra features with this cpuid weirdness: */ - __asm__ ( - "movl $0x6363452a, %%eax\n\t" - "movl $0x3231206c, %%ecx\n\t" - "movl $0x2a32313a, %%edx\n\t" - "cpuid\n\t" - "movl $0x63634523, %%eax\n\t" - "movl $0x32315f6c, %%ecx\n\t" - "movl $0x2333313a, %%edx\n\t" - "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx" - ); - set_bit(X86_FEATURE_CX8, c->x86_capability); -} - -static struct cpu_dev rise_cpu_dev __cpuinitdata = { - .c_vendor = "Rise", - .c_ident = { "RiseRiseRise" }, - .c_models = { - { .vendor = X86_VENDOR_RISE, .family = 5, .model_names = - { - [0] = "iDragon", - [2] = "iDragon", - [8] = "iDragon II", - [9] = "iDragon II" - } - }, - }, - .c_init = init_rise, -}; - -int __init rise_init_cpu(void) -{ - cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev; - return 0; -} - diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 9645bb51f76..e60cddbc4cf 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -10,6 +10,7 @@ #include <linux/efi.h> #include <linux/pfn.h> #include <linux/uaccess.h> +#include <linux/suspend.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -320,6 +321,37 @@ static int __init request_standard_resources(void) subsys_initcall(request_standard_resources); +#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) +/** + * e820_mark_nosave_regions - Find the ranges of physical addresses that do not + * correspond to e820 RAM areas and mark the corresponding pages as nosave for + * hibernation. + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(void) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= max_low_pfn) + break; + } +} +#endif + void __init add_memory_region(unsigned long long start, unsigned long long size, int type) { @@ -734,7 +766,7 @@ void __init print_memory_map(char *who) case E820_NVS: printk("(ACPI NVS)\n"); break; - default: printk("type %lu\n", e820.map[i].type); + default: printk("type %u\n", e820.map[i].type); break; } } diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index a1808022ea1..2452c6fbe99 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -278,7 +278,7 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) struct range { unsigned long start; unsigned long end; - } prev, curr; + } uninitialized_var(prev), curr; efi_memory_desc_t *md; unsigned long start, end; void *p; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3c3c220488c..a714d6b4350 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -409,8 +409,6 @@ restore_nocheck_notrace: 1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -1023,6 +1021,91 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + CFI_STARTPROC + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + + /* Check to see if we got the event in the critical + region in xen_iret_direct, after we've reenabled + events and checked for pending events. This simulates + iret instruction's behaviour where it delivers a + pending interrupt when enabling interrupts. */ + movl PT_EIP(%esp),%eax + cmpl $xen_iret_start_crit,%eax + jb 1f + cmpl $xen_iret_end_crit,%eax + jae 1f + + call xen_iret_crit_fixup + +1: mov %esp, %eax + call xen_evtchn_do_upcall + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(xen_hypervisor_callback) + +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we fix up by reattempting the load, and zeroing the segment +# register if the load fails. +# Category 2 we fix up by jumping to do_iret_error. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by maintaining a status value in EAX. +ENTRY(xen_failsafe_callback) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl $1,%eax +1: mov 4(%esp),%ds +2: mov 8(%esp),%es +3: mov 12(%esp),%fs +4: mov 16(%esp),%gs + testl %eax,%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + lea 16(%esp),%esp + CFI_ADJUST_CFA_OFFSET -16 + jz 5f + addl $16,%esp + jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) +5: pushl $0 # EAX == 0 => Category 1 (Bad segment) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + jmp ret_from_exception + CFI_ENDPROC + +.section .fixup,"ax" +6: xorl %eax,%eax + movl %eax,4(%esp) + jmp 1b +7: xorl %eax,%eax + movl %eax,8(%esp) + jmp 2b +8: xorl %eax,%eax + movl %eax,12(%esp) + jmp 3b +9: xorl %eax,%eax + movl %eax,16(%esp) + jmp 4b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,6b + .long 2b,7b + .long 3b,8b + .long 4b,9b +.previous +ENDPROC(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/geode.c b/arch/i386/kernel/geode.c new file mode 100644 index 00000000000..41e8aec4c61 --- /dev/null +++ b/arch/i386/kernel/geode.c @@ -0,0 +1,155 @@ +/* + * AMD Geode southbridge support code + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ioport.h> +#include <linux/io.h> +#include <asm/msr.h> +#include <asm/geode.h> + +static struct { + char *name; + u32 msr; + int size; + u32 base; +} lbars[] = { + { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, + { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, + { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, + { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } +}; + +static void __init init_lbars(void) +{ + u32 lo, hi; + int i; + + for (i = 0; i < ARRAY_SIZE(lbars); i++) { + rdmsr(lbars[i].msr, lo, hi); + if (hi & 0x01) + lbars[i].base = lo & 0x0000ffff; + + if (lbars[i].base == 0) + printk(KERN_ERR "geode: Couldn't initialize '%s'\n", + lbars[i].name); + } +} + +int geode_get_dev_base(unsigned int dev) +{ + BUG_ON(dev >= ARRAY_SIZE(lbars)); + return lbars[dev].base; +} +EXPORT_SYMBOL_GPL(geode_get_dev_base); + +/* === GPIO API === */ + +void geode_gpio_set(unsigned int gpio, unsigned int reg) +{ + u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + + if (!base) + return; + + if (gpio < 16) + outl(1 << gpio, base + reg); + else + outl(1 << (gpio - 16), base + 0x80 + reg); +} +EXPORT_SYMBOL_GPL(geode_gpio_set); + +void geode_gpio_clear(unsigned int gpio, unsigned int reg) +{ + u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + + if (!base) + return; + + if (gpio < 16) + outl(1 << (gpio + 16), base + reg); + else + outl(1 << gpio, base + 0x80 + reg); +} +EXPORT_SYMBOL_GPL(geode_gpio_clear); + +int geode_gpio_isset(unsigned int gpio, unsigned int reg) +{ + u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + + if (!base) + return 0; + + if (gpio < 16) + return (inl(base + reg) & (1 << gpio)) ? 1 : 0; + else + return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; +} +EXPORT_SYMBOL_GPL(geode_gpio_isset); + +void geode_gpio_set_irq(unsigned int group, unsigned int irq) +{ + u32 lo, hi; + + if (group > 7 || irq > 15) + return; + + rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); + + lo &= ~(0xF << (group * 4)); + lo |= (irq & 0xF) << (group * 4); + + wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); +} +EXPORT_SYMBOL_GPL(geode_gpio_set_irq); + +void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) +{ + u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + u32 offset, shift, val; + + if (gpio >= 24) + offset = GPIO_MAP_W; + else if (gpio >= 16) + offset = GPIO_MAP_Z; + else if (gpio >= 8) + offset = GPIO_MAP_Y; + else + offset = GPIO_MAP_X; + + shift = (gpio % 8) * 4; + + val = inl(base + offset); + + /* Clear whatever was there before */ + val &= ~(0xF << shift); + + /* And set the new value */ + + val |= ((pair & 7) << shift); + + /* Set the PME bit if this is a PME event */ + + if (pme) + val |= (1 << (shift + 3)); + + outl(val, base + offset); +} +EXPORT_SYMBOL_GPL(geode_gpio_setup_event); + +static int __init geode_southbridge_init(void) +{ + if (!is_geode()) + return -ENODEV; + + init_lbars(); + return 0; +} + +postcore_initcall(geode_southbridge_init); diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index f74dfc419b5..7c52b222207 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -168,6 +168,12 @@ page_pde_offset = (__PAGE_OFFSET >> 20); .section .init.text,"ax",@progbits #endif + /* Do an early initialization of the fixmap area */ + movl $(swapper_pg_dir - __PAGE_OFFSET), %edx + movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax + addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ + movl %eax, 4092(%edx) + #ifdef CONFIG_SMP ENTRY(startup_32_smp) cld @@ -504,9 +510,12 @@ ENTRY(_stext) /* * BSS section */ -.section ".bss.page_aligned","w" +.section ".bss.page_aligned","wa" + .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) .fill 1024,4,0 +ENTRY(swapper_pg_pmd) + .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 @@ -530,6 +539,8 @@ fault_msg: .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" .asciz "Stack: %p %p %p %p %p %p %p %p\n" +#include "../xen/xen-head.S" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 17d73459fc5..533d4932bc7 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -5,6 +5,7 @@ #include <linux/init.h> #include <linux/sysdev.h> #include <linux/pm.h> +#include <linux/delay.h> #include <asm/hpet.h> #include <asm/io.h> @@ -187,6 +188,10 @@ static void hpet_set_mode(enum clock_event_mode mode, cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T0_CFG); break; + + case CLOCK_EVT_MODE_RESUME: + hpet_enable_int(); + break; } } @@ -217,6 +222,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = hpet_start_counter, }; /* @@ -226,7 +232,8 @@ int __init hpet_enable(void) { unsigned long id; uint64_t hpet_freq; - u64 tmp; + u64 tmp, start, now; + cycle_t t1; if (!is_hpet_capable()) return 0; @@ -273,6 +280,27 @@ int __init hpet_enable(void) /* Start the counter */ hpet_start_counter(); + /* Verify whether hpet counter works */ + t1 = read_hpet(); + rdtscll(start); + + /* + * We don't know the TSC frequency yet, but waiting for + * 200000 TSC cycles is safe: + * 4 GHz == 50us + * 1 GHz == 200us + */ + do { + rep_nop(); + rdtscll(now); + } while ((now - start) < 200000UL); + + if (t1 == read_hpet()) { + printk(KERN_WARNING + "HPET counter not counting. HPET disabled\n"); + goto out_nohpet; + } + /* Initialize and register HPET clocksource * * hpet period is in femto seconds per cycle @@ -291,7 +319,6 @@ int __init hpet_enable(void) clocksource_register(&clocksource_hpet); - if (id & HPET_ID_LEGSUP) { hpet_enable_int(); hpet_reserve_platform_timers(id); @@ -299,7 +326,7 @@ int __init hpet_enable(void) * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask =cpumask_of_cpu(0); + hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); clockevents_register_device(&hpet_clockevent); global_clock_event = &hpet_clockevent; return 1; @@ -524,68 +551,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } #endif - - -/* - * Suspend/resume part - */ - -#ifdef CONFIG_PM - -static int hpet_suspend(struct sys_device *sys_device, pm_message_t state) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY); - hpet_writel(cfg, HPET_CFG); - - return 0; -} - -static int hpet_resume(struct sys_device *sys_device) -{ - unsigned int id; - - hpet_start_counter(); - - id = hpet_readl(HPET_ID); - - if (id & HPET_ID_LEGSUP) - hpet_enable_int(); - - return 0; -} - -static struct sysdev_class hpet_class = { - set_kset_name("hpet"), - .suspend = hpet_suspend, - .resume = hpet_resume, -}; - -static struct sys_device hpet_device = { - .id = 0, - .cls = &hpet_class, -}; - - -static __init int hpet_register_sysfs(void) -{ - int err; - - if (!is_hpet_capable()) - return 0; - - err = sysdev_class_register(&hpet_class); - - if (!err) { - err = sysdev_register(&hpet_device); - if (err) - sysdev_class_unregister(&hpet_class); - } - - return err; -} - -device_initcall(hpet_register_sysfs); - -#endif diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index f8a3c4054c7..6d839f2f1b1 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -3,18 +3,17 @@ * */ #include <linux/clockchips.h> -#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/interrupt.h> #include <linux/jiffies.h> -#include <linux/sysdev.h> #include <linux/module.h> -#include <linux/init.h> +#include <linux/spinlock.h> #include <asm/smp.h> #include <asm/delay.h> #include <asm/i8253.h> #include <asm/io.h> - -#include "io_ports.h" +#include <asm/timer.h> DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -41,26 +40,27 @@ static void init_pit_timer(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: /* binary, mode 2, LSB/MSB, ch 0 */ outb_p(0x34, PIT_MODE); - udelay(10); outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); outb(LATCH >> 8 , PIT_CH0); /* MSB */ break; - /* - * Avoid unnecessary state transitions, as it confuses - * Geode / Cyrix based boxen. - */ case CLOCK_EVT_MODE_SHUTDOWN: - if (evt->mode == CLOCK_EVT_MODE_UNUSED) - break; case CLOCK_EVT_MODE_UNUSED: - if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN) - break; + if (evt->mode == CLOCK_EVT_MODE_PERIODIC || + evt->mode == CLOCK_EVT_MODE_ONESHOT) { + outb_p(0x30, PIT_MODE); + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); + } + break; + case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ outb_p(0x38, PIT_MODE); - udelay(10); + break; + + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ break; } spin_unlock_irqrestore(&i8253_lock, flags); diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index cff95d10a4d..d26fc063a76 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7f8b7af2b95..893df828075 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -353,14 +353,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # include <linux/slab.h> /* kmalloc() */ # include <linux/timer.h> /* time_after() */ -#ifdef CONFIG_BALANCED_IRQ_DEBUG -# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) -# define Dprintk(x...) do { TDprintk(x); } while (0) -# else -# define TDprintk(x...) -# define Dprintk(x...) -# endif - #define IRQBALANCE_CHECK_ARCH -999 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -443,7 +435,7 @@ static inline void balance_irq(int cpu, int irq) static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; - Dprintk("Rotating IRQs among CPUs.\n"); + for_each_online_cpu(i) { for (j = 0; j < NR_IRQS; j++) { if (!irq_desc[j].action) @@ -560,19 +552,11 @@ tryanothercpu: max_loaded = tmp_loaded; /* processor */ imbalance = (max_cpu_irq - min_cpu_irq) / 2; - Dprintk("max_loaded cpu = %d\n", max_loaded); - Dprintk("min_loaded cpu = %d\n", min_loaded); - Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); - Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); - Dprintk("load imbalance = %lu\n", imbalance); - /* if imbalance is less than approx 10% of max load, then * observe diminishing returns action. - quit */ - if (imbalance < (max_cpu_irq >> 3)) { - Dprintk("Imbalance too trivial\n"); + if (imbalance < (max_cpu_irq >> 3)) goto not_worth_the_effort; - } tryanotherirq: /* if we select an IRQ to move that can't go where we want, then @@ -629,9 +613,6 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - - Dprintk("irq = %d moved to cpu = %d\n", - selected_irq, min_loaded); /* mark for change destination */ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); @@ -651,7 +632,6 @@ not_worth_the_effort: */ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - Dprintk("IRQ worth rotating not found\n"); return; } @@ -667,6 +647,7 @@ static int balanced_irq(void *unused) set_pending_irq(i, cpumask_of_cpu(0)); } + set_freezable(); for ( ; ; ) { time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); @@ -1901,7 +1882,7 @@ __setup("no_timer_check", notimercheck); * - if this function detects that timer IRQs are defunct, then we fall * back to ISA timer IRQs */ -int __init timer_irq_works(void) +static int __init timer_irq_works(void) { unsigned long t1 = jiffies; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index d2daf672f4a..dd2b97fc00b 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -21,7 +21,7 @@ #include <asm/apic.h> #include <asm/uaccess.h> -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); @@ -149,15 +149,11 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_4KSTACKS -/* - * These should really be __section__(".bss.page_aligned") as well, but - * gcc's 3.0 and earlier don't handle that correctly. - */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index dde828a333c..448a50b1324 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -35,6 +35,7 @@ #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/uaccess.h> +#include <asm/alternative.h> void jprobe_return_end(void); @@ -169,16 +170,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) void __kprobes arch_arm_kprobe(struct kprobe *p) { - *p->addr = BREAKPOINT_INSTRUCTION; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + text_poke(p->addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index fba121f7973..99beac7f96c 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -295,7 +295,7 @@ static unsigned int last_irq_sums [NR_CPUS], alert_counter [NR_CPUS]; -void touch_nmi_watchdog (void) +void touch_nmi_watchdog(void) { if (nmi_watchdog > 0) { unsigned cpu; @@ -304,8 +304,10 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for_each_present_cpu (cpu) - alert_counter[cpu] = 0; + for_each_present_cpu(cpu) { + if (alert_counter[cpu]) + alert_counter[cpu] = 0; + } } /* @@ -351,7 +353,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) * Take the local apic timer and PIT/HPET into account. We don't * know which one is active, when we have highres/dyntick on */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index faab09abca5..ea962c0667d 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -124,20 +124,28 @@ unsigned paravirt_patch_ignore(unsigned len) return len; } +struct branch { + unsigned char opcode; + u32 delta; +} __attribute__((packed)); + unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, void *site, u16 site_clobbers, unsigned len) { unsigned char *call = site; unsigned long delta = (unsigned long)target - (unsigned long)(call+5); + struct branch b; if (tgt_clobbers & ~site_clobbers) return len; /* target would clobber too much for this site */ if (len < 5) return len; /* call too long for patch site */ - *call++ = 0xe8; /* call */ - *(unsigned long *)call = delta; + b.opcode = 0xe8; /* call */ + b.delta = delta; + BUILD_BUG_ON(sizeof(b) != 5); + text_poke(call, (unsigned char *)&b, 5); return 5; } @@ -146,12 +154,14 @@ unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) { unsigned char *jmp = site; unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); + struct branch b; if (len < 5) return len; /* call too long for patch site */ - *jmp++ = 0xe9; /* jmp */ - *(unsigned long *)jmp = delta; + b.opcode = 0xe9; /* jmp */ + b.delta = delta; + text_poke(jmp, (unsigned char *)&b, 5); return 5; } @@ -228,6 +238,41 @@ static int __init print_banner(void) } core_initcall(print_banner); +static struct resource reserve_ioports = { + .start = 0, + .end = IO_SPACE_LIMIT, + .name = "paravirt-ioport", + .flags = IORESOURCE_IO | IORESOURCE_BUSY, +}; + +static struct resource reserve_iomem = { + .start = 0, + .end = -1, + .name = "paravirt-iomem", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +/* + * Reserve the whole legacy IO space to prevent any legacy drivers + * from wasting time probing for their hardware. This is a fairly + * brute-force approach to disabling all non-virtual drivers. + * + * Note that this must be called very early to have any effect. + */ +int paravirt_disable_iospace(void) +{ + int ret; + + ret = request_resource(&ioport_resource, &reserve_ioports); + if (ret == 0) { + ret = request_resource(&iomem_resource, &reserve_iomem); + if (ret) + release_resource(&reserve_ioports); + } + + return ret; +} + struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, @@ -267,7 +312,7 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .get_scheduled_cycles = native_read_tsc, + .sched_clock = native_sched_clock, .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 06dfa65ad18..84664710b78 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -300,6 +300,7 @@ early_param("idle", idle_setup); void show_regs(struct pt_regs * regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; + unsigned long d0, d1, d2, d3, d6, d7; printk("\n"); printk("Pid: %d, comm: %20s\n", current->pid, current->comm); @@ -324,6 +325,17 @@ void show_regs(struct pt_regs * regs) cr3 = read_cr3(); cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + + get_debugreg(d0, 0); + get_debugreg(d1, 1); + get_debugreg(d2, 2); + get_debugreg(d3, 3); + printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + d0, d1, d2, d3); + get_debugreg(d6, 6); + get_debugreg(d7, 7); + printk("DR6: %08lx DR7: %08lx\n", d6, d7); + show_trace(NULL, regs, ®s->esp); } @@ -538,8 +550,31 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) return 1; } -static noinline void __switch_to_xtra(struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_SECCOMP +void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} +void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} +#endif /* CONFIG_SECCOMP */ + +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) { struct thread_struct *next; @@ -555,6 +590,17 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, set_debugreg(next->debugreg[7], 7); } +#ifdef CONFIG_SECCOMP + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } +#endif + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache @@ -586,33 +632,6 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, } /* - * This function selects if the context switch from prev to next - * has to tweak the TSC disable bit in the cr4. - */ -static inline void disable_tsc(struct task_struct *prev_p, - struct task_struct *next_p) -{ - struct thread_info *prev, *next; - - /* - * gcc should eliminate the ->thread_info dereference if - * has_secure_computing returns 0 at compile time (SECCOMP=n). - */ - prev = task_thread_info(prev_p); - next = task_thread_info(next_p); - - if (has_secure_computing(prev) || has_secure_computing(next)) { - /* slow path here */ - if (has_secure_computing(prev) && - !has_secure_computing(next)) { - write_cr4(read_cr4() & ~X86_CR4_TSD); - } else if (!has_secure_computing(prev) && - has_secure_computing(next)) - write_cr4(read_cr4() | X86_CR4_TSD); - } -} - -/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time @@ -689,11 +708,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas /* * Now maybe handle debug registers and/or IO bitmaps */ - if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) - __switch_to_xtra(next_p, tss); - - disable_tsc(prev_p, next_p); + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) + __switch_to_xtra(prev_p, next_p, tss); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 0c0ceec5de0..0c8f00e69c4 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -164,14 +164,22 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ u32 *desc; unsigned long base; - down(&child->mm->context.sem); - desc = child->mm->context.ldt + (seg & ~7); - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + seg &= ~7UL; - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; + down(&child->mm->context.sem); + if (unlikely((seg >> 3) >= child->mm->context.size)) + addr = -1L; /* bogus selector, access would fault */ + else { + desc = child->mm->context.ldt + seg; + base = ((desc[0] >> 16) | + ((desc[1] & 0xff) << 16) | + (desc[1] & 0xff000000)); + + /* 16-bit code segment? */ + if (!((desc[1] >> 22) & 1)) + addr &= 0xffff; + addr += base; + } up(&child->mm->context.sem); } return addr; @@ -358,17 +366,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, datap); + case PTRACE_PEEKDATA: + ret = generic_ptrace_peekdata(child, addr, data); break; - } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { @@ -395,10 +395,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: - ret = 0; - if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) - break; - ret = -EIO; + ret = generic_ptrace_pokedata(child, addr, data); break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 5513f8d5b5b..0d796248866 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -113,6 +113,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 745", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), + DMI_MATCH(DMI_BOARD_NAME, "0WF810"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 698c24fe482..d474cd639bc 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -102,19 +102,10 @@ static unsigned int highmem_pages = -1; /* * Setup options */ -struct drive_info_struct { char dummy[32]; } drive_info; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ - defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -EXPORT_SYMBOL(drive_info); -#endif struct screen_info screen_info; EXPORT_SYMBOL(screen_info); struct apm_info apm_info; EXPORT_SYMBOL(apm_info); -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); struct ist_info ist_info; @@ -134,7 +125,7 @@ unsigned long saved_videomode; static char __initdata command_line[COMMAND_LINE_SIZE]; -unsigned char __initdata boot_params[PARAM_SIZE]; +struct boot_params __initdata boot_params; #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -282,18 +273,18 @@ unsigned long __init find_max_low_pfn(void) printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); else printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); max_pfn = MAXMEM_PFN; #else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_X86_PAE +#ifndef CONFIG_HIGHMEM64G if (max_pfn > MAX_NONPAE_PFN) { max_pfn = MAX_NONPAE_PFN; printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); } -#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM64G */ #endif /* !CONFIG_HIGHMEM */ } else { if (highmem_pages == -1) @@ -475,7 +466,7 @@ void __init setup_bootmem_allocator(void) * * This should all compile down to nothing when NUMA is off. */ -void __init remapped_pgdat_init(void) +static void __init remapped_pgdat_init(void) { int nid; @@ -528,7 +519,6 @@ void __init setup_arch(char **cmdline_p) #endif ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; edid_info = EDID_INFO; apm_info.bios = APM_BIOS_INFO; @@ -611,6 +601,8 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ + paravirt_post_allocator_init(); + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH @@ -648,6 +640,7 @@ void __init setup_arch(char **cmdline_p) #endif e820_register_memory(); + e820_mark_nosave_regions(); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index d574e38f0f7..f5dd85656c1 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -199,6 +199,13 @@ asmlinkage int sys_sigreturn(unsigned long __unused) return eax; badframe: + if (show_unhandled_signals && printk_ratelimit()) + printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" + " esp:%lx oeax:%lx\n", + current->pid > 1 ? KERN_INFO : KERN_EMERG, + current->comm, current->pid, frame, regs->eip, + regs->esp, regs->orig_eax); + force_sig(SIGSEGV, current); return 0; } diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6299c080f6e..2d35d850202 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -22,6 +22,7 @@ #include <asm/mtrr.h> #include <asm/tlbflush.h> +#include <asm/mmu_context.h> #include <mach_apic.h> /* @@ -249,13 +250,13 @@ static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); /* - * We cannot call mmdrop() because we are in interrupt context, + * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. * * We need to reload %cr3 since the page tables may be going * away from under us.. */ -static inline void leave_mm (unsigned long cpu) +void leave_mm(unsigned long cpu) { if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) BUG(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 0b2954534b8..e4f61d1c624 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __cpuinit smp_store_cpu_info(int id) +void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -static inline void -set_cpu_sibling_map(int cpu) +void __cpuinit set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = cpu_data; @@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -static void -remove_siblinginfo(int cpu) +void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = cpu_data; diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c index 1868ae18eb4..bbfe85a0f69 100644 --- a/arch/i386/kernel/smpcommon.c +++ b/arch/i386/kernel/smpcommon.c @@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic, EXPORT_SYMBOL(smp_call_function); /** - * smp_call_function_single - Run a function on another CPU + * smp_call_function_single - Run a function on a specific CPU * @cpu: The target CPU. Cannot be the calling CPU. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int ret; int me = get_cpu(); if (cpu == me) { - WARN_ON(1); + local_irq_disable(); + func(info); + local_irq_enable(); put_cpu(); - return -EBUSY; + return 0; } ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index bf6adce5226..8344c70adf6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -323,3 +323,4 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_fallocate diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index ff4ee6f3326..6deb159d08e 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -336,7 +336,9 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - return 0; + const struct vm_area_struct *vma = get_gate_vma(task); + + return vma && addr >= vma->vm_start && addr < vma->vm_end; } int in_gate_area_no_task(unsigned long addr) diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a665df61f08..19a6c678d02 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void) return retval; } -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); -int no_sync_cmos_clock; - -static void sync_cmos_clock(unsigned long dummy) -{ - struct timeval now, next; - int fail = 1; - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; - } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) +int update_persistent_clock(struct timespec now) { - if (!no_sync_cmos_clock) - mod_timer(&sync_cmos_timer, jiffies + 1); + return set_rtc_mmss(now.tv_sec); } extern void (*late_time_init)(void); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 90da0575fcf..cfffe3dd9e8 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -41,6 +41,10 @@ #include <linux/mca.h> #endif +#if defined(CONFIG_EDAC) +#include <linux/edac.h> +#endif + #include <asm/processor.h> #include <asm/system.h> #include <asm/io.h> @@ -148,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; stack = &dummy; - if (task && task != current) + if (task != current) stack = (unsigned long *)task->thread.esp; } @@ -207,6 +211,7 @@ static void print_trace_address(void *data, unsigned long addr) { printk("%s [<%08lx>] ", (char *)data, addr); print_symbol("%s\n", addr); + touch_nmi_watchdog(); } static struct stacktrace_ops print_trace_ops = { @@ -390,7 +395,7 @@ void die(const char * str, struct pt_regs * regs, long err) unsigned long esp; unsigned short ss; - report_bug(regs->eip); + report_bug(regs->eip, regs); printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT @@ -433,6 +438,7 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; + add_taint(TAINT_DIE); spin_unlock_irqrestore(&die.lock, flags); if (!regs) @@ -517,10 +523,12 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ siginfo_t info; \ + if (irq) \ + local_irq_enable(); \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ @@ -560,13 +568,13 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) fastcall void __kprobes do_general_protection(struct pt_regs * regs, long error_code) @@ -610,6 +618,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, current->thread.error_code = error_code; current->thread.trap_no = 13; + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && + printk_ratelimit()) + printk(KERN_INFO + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", + current->comm, current->pid, + regs->eip, regs->esp, error_code); + force_sig(SIGSEGV, current); return; @@ -635,6 +650,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); @@ -752,6 +775,8 @@ static __kprobes void default_do_nmi(struct pt_regs * regs) reassert_nmi(); } +static int ignore_nmis; + fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; @@ -762,11 +787,24 @@ fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - default_do_nmi(regs); + if (!ignore_nmis) + default_do_nmi(regs); nmi_exit(); } +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { @@ -1053,6 +1091,7 @@ asmlinkage void math_state_restore(void) thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index ea63a30ca3e..debd7dbb415 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -27,6 +27,7 @@ static int tsc_enabled; * an extra value to store the TSC freq */ unsigned int tsc_khz; +EXPORT_SYMBOL_GPL(tsc_khz); int tsc_disable; @@ -58,10 +59,11 @@ __setup("notsc", tsc_setup); */ static int tsc_unstable; -static inline int check_tsc_unstable(void) +int check_tsc_unstable(void) { return tsc_unstable; } +EXPORT_SYMBOL_GPL(check_tsc_unstable); /* Accellerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) @@ -84,7 +86,7 @@ static inline int check_tsc_unstable(void) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale __read_mostly; +unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ @@ -93,15 +95,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz) cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long native_sched_clock(void) { unsigned long long this_offset; @@ -118,12 +115,24 @@ unsigned long long sched_clock(void) return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - get_scheduled_cycles(this_offset); + rdtscll(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long sched_clock(void) + __attribute__((alias("native_sched_clock"))); +#endif + unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S deleted file mode 100644 index f1d1eacf4ab..00000000000 --- a/arch/i386/kernel/verify_cpu.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Check if CPU has some minimum CPUID bits - This runs in 16bit mode so that the caller can still use the BIOS - to output errors on the screen */ -#include <asm/cpufeature.h> -#include <asm/msr.h> - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - -#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4 - pushfl - pop %eax - orl $(1<<18),%eax # try setting AC - push %eax - popfl - pushfl - popl %eax - testl $(1<<18),%eax - jz bad -#endif -#if REQUIRED_MASK1 != 0 - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz bad # REQUIRED_MASK1 != 0 requires CPUID - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb bad # no cpuid 1 - -#if REQUIRED_MASK1 & NEED_CMPXCHG64 - /* Some VIA C3s need magic MSRs to enable CX64. Do this here */ - cmpl $0x746e6543,%ebx # Cent - jne 1f - cmpl $0x48727561,%edx # aurH - jne 1f - cmpl $0x736c7561,%ecx # auls - jne 1f - movl $1,%eax # check model - cpuid - movl %eax,%ebx - shr $8,%ebx - andl $0xf,%ebx - cmp $6,%ebx # check family == 6 - jne 1f - shr $4,%eax - andl $0xf,%eax - cmpl $6,%eax # check model >= 6 - jb 1f - # assume models >= 6 all support this MSR - movl $MSR_VIA_FCR,%ecx - rdmsr - orl $((1<<1)|(1<<7)),%eax # enable CMPXCHG64 and PGE - wrmsr -1: -#endif - movl $0x1,%eax # Does the cpu have what it takes - cpuid - -#if CONFIG_X86_MINIMUM_CPU_MODEL > 4 -#error add proper model checking here -#endif - - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz bad -#endif /* REQUIRED_MASK1 */ - - popfl - xor %eax,%eax - ret - -bad: - popfl - movl $1,%eax - ret diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index c12720d7cbc..72042bb7ec9 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(u32 pfn) +static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); @@ -891,7 +891,7 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_time_bsp_init; paravirt_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.sched_clock = vmi_sched_clock; paravirt_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c index 26a37f8a876..b1b5ab08b26 100644 --- a/arch/i386/kernel/vmiclock.c +++ b/arch/i386/kernel/vmiclock.c @@ -32,6 +32,7 @@ #include <asm/apicdef.h> #include <asm/apic.h> #include <asm/timer.h> +#include <asm/i8253.h> #include <irq_vectors.h> #include "io_ports.h" @@ -64,10 +65,10 @@ int vmi_set_wallclock(unsigned long now) return 0; } -/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ -unsigned long long vmi_get_sched_cycles(void) +/* paravirt_ops.sched_clock = vmi_sched_clock */ +unsigned long long vmi_sched_clock(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); + return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ @@ -142,6 +143,7 @@ static void vmi_timer_set_mode(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: break; case CLOCK_EVT_MODE_PERIODIC: cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index aa87b06c7c8..7d72cce0052 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -60,7 +60,9 @@ SECTIONS __stop___ex_table = .; } - BUG_TABLE + NOTES :text :note + + BUG_TABLE :text . = ALIGN(4); .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { @@ -88,6 +90,7 @@ SECTIONS . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) *(.data.idt) } @@ -180,6 +183,7 @@ SECTIONS .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; *(.data.percpu) + *(.data.percpu.shared_aligned) __per_cpu_end = .; } . = ALIGN(4096); @@ -206,6 +210,4 @@ SECTIONS STABS_DEBUG DWARF_DEBUG - - NOTES } diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S index d4b5be4f3d5..07c0daf7823 100644 --- a/arch/i386/kernel/vsyscall-note.S +++ b/arch/i386/kernel/vsyscall-note.S @@ -3,23 +3,43 @@ * Here we can supply some information useful to userland. */ -#include <linux/uts.h> #include <linux/version.h> +#include <linux/elfnote.h> -#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ - .section name, flags; \ - .balign 4; \ - .long 1f - 0f; /* name length */ \ - .long 3f - 2f; /* data length */ \ - .long type; /* note type */ \ -0: .asciz vendor; /* vendor name */ \ -1: .balign 4; \ -2: +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END -#define ASM_ELF_NOTE_END \ -3: .balign 4; /* pad out section */ \ - .previous +#ifdef CONFIG_XEN +/* + * Add a special note telling glibc's dynamic linker a fake hardware + * flavor that it will use to choose the search path for libraries in the + * same way it uses real hardware capabilities like "mmx". + * We supply "nosegneg" as the fake capability, to indicate that we + * do not like negative offsets in instructions using segment overrides, + * since we implement those inefficiently. This makes it possible to + * install libraries optimized to avoid those access patterns in someplace + * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file + * corresponding to the bits here is needed to make ldconfig work right. + * It should contain: + * hwcap 1 nosegneg + * to match the mapping of bit to name that we give here. + * + * At runtime, the fake hardware feature will be considered to be present + * if its bit is set in the mask word. So, we start with the mask 0, and + * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. + */ - ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) - .long LINUX_VERSION_CODE - ASM_ELF_NOTE_END +#include "../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ + + .globl VDSO_NOTE_MASK +ELFNOTE_START(GNU, 2, "a") + .long 1 /* ncaps */ +VDSO_NOTE_MASK: + .long 0 /* mask */ + .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ +ELFNOTE_END +#endif |