diff options
Diffstat (limited to 'arch/i386/kernel')
45 files changed, 633 insertions, 659 deletions
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 280898b045b..a574cd2c8b6 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -621,8 +621,6 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table) static int __init acpi_parse_hpet(struct acpi_table_header *table) { struct acpi_table_hpet *hpet_tbl; - struct resource *hpet_res; - resource_size_t res_start; hpet_tbl = (struct acpi_table_hpet *)table; if (!hpet_tbl) { @@ -636,29 +634,10 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) return -1; } -#define HPET_RESOURCE_NAME_SIZE 9 - hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - if (hpet_res) { - memset(hpet_res, 0, sizeof(*hpet_res)); - hpet_res->name = (void *)&hpet_res[1]; - hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, - "HPET %u", hpet_tbl->sequence); - hpet_res->end = (1 * 1024) - 1; - } - hpet_address = hpet_tbl->address.address; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); - res_start = hpet_address; - - if (hpet_res) { - hpet_res->start = res_start; - hpet_res->end += res_start; - insert_resource(&iomem_resource, hpet_res); - } - return 0; } #else diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index b781b38131c..a2295a34b2c 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -230,6 +230,7 @@ bogus_magic: # ENTRY(acpi_copy_wakeup_routine) + pushl %ebx sgdt saved_gdt sidt saved_idt sldt saved_ldt @@ -263,6 +264,7 @@ ENTRY(acpi_copy_wakeup_routine) movl %edx, video_flags - wakeup_start (%eax) movl $0x12345678, real_magic - wakeup_start (%eax) movl $0x12345678, saved_magic + popl %ebx ret save_registers: diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 4112afe712b..47001d50a08 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -222,6 +222,7 @@ #include <linux/capability.h> #include <linux/device.h> #include <linux/kernel.h> +#include <linux/freezer.h> #include <linux/smp.h> #include <linux/dmi.h> #include <linux/suspend.h> @@ -2311,7 +2312,6 @@ static int __init apm_init(void) remove_proc_entry("apm", NULL); return err; } - kapmd_task->flags |= PF_NOFREEZE; wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 27a776c9044..25f7eb51392 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -17,6 +17,8 @@ #include <asm/thread_info.h> #include <asm/elf.h> +#include <xen/interface/xen.h> + #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -59,6 +61,7 @@ void foo(void) OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); + OFFSET(TI_cpu, thread_info, cpu); BLANK(); OFFSET(GDS_size, Xgt_desc_struct, size); @@ -115,4 +118,10 @@ void foo(void) OFFSET(PARAVIRT_iret, paravirt_ops, iret); OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); #endif + +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#endif } diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 74f27a463db..0b6a8551e9e 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -8,7 +8,7 @@ obj-y += amd.o obj-y += cyrix.o obj-y += centaur.o obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o +obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o obj-y += rise.o obj-y += nexgen.o obj-y += umc.o diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c new file mode 100644 index 00000000000..3e91d3ee26e --- /dev/null +++ b/arch/i386/kernel/cpu/addon_cpuid_features.c @@ -0,0 +1,50 @@ + +/* + * Routines to indentify additional cpu features that are scattered in + * cpuid space. + */ + +#include <linux/cpu.h> + +#include <asm/processor.h> + +struct cpuid_bit { + u16 feature; + u8 reg; + u8 bit; + u32 level; +}; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ + u32 max_level; + u32 regs[4]; + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { 0, 0, 0, 0 } + }; + + for (cb = cpuid_bits; cb->feature; cb++) { + + /* Verify that the level is valid */ + max_level = cpuid_eax(cb->level & 0xffff0000); + if (max_level < cb->level || + max_level > (cb->level | 0xffff)) + continue; + + cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], + ®s[CR_ECX], ®s[CR_EDX]); + + if (regs[cb->reg] & (1 << cb->bit)) + set_bit(cb->feature, c->x86_capability); + } +} diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 794d593c47e..e5419a9dec8 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -353,6 +353,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) if ( xlvl >= 0x80000004 ) get_model_name(c); /* Default name */ } + + init_scattered_cpuid_features(c); } early_intel_workaround(c); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index e912aae9473..094118ba00d 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -90,10 +90,17 @@ config X86_POWERNOW_K8 If in doubt, say N. config X86_POWERNOW_K8_ACPI - bool - depends on X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) + bool "ACPI Support" + select ACPI_PROCESSOR + depends on X86_POWERNOW_K8 default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" @@ -109,7 +116,7 @@ config X86_GX_SUSPMOD config X86_SPEEDSTEP_CENTRINO tristate "Intel Enhanced SpeedStep" select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI) + select X86_SPEEDSTEP_CENTRINO_TABLE help This adds the CPUFreq driver for Enhanced SpeedStep enabled mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, @@ -121,20 +128,6 @@ config X86_SPEEDSTEP_CENTRINO If in doubt, say N. -config X86_SPEEDSTEP_CENTRINO_ACPI - bool "Use ACPI tables to decode valid frequency/voltage (deprecated)" - depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR - depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) - help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. - Use primarily the information provided in the BIOS ACPI tables - to determine valid CPU frequency and voltage pairings. It is - required for the driver to work on non-Banias CPUs. - - If in doubt, say Y. - config X86_SPEEDSTEP_CENTRINO_TABLE bool "Built-in tables for Banias CPUs" depends on X86_SPEEDSTEP_CENTRINO @@ -230,7 +223,7 @@ comment "shared options" config X86_ACPI_CPUFREQ_PROC_INTF bool "/proc/acpi/processor/../performance interface (deprecated)" depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI help This enables the deprecated /proc/acpi/processor/../performance interface. While it is helpful for debugging, the generic, diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 10baa3501ed..18c8b67ea3a 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -167,11 +167,13 @@ static void do_drv_read(struct drv_cmd *cmd) static void do_drv_write(struct drv_cmd *cmd) { - u32 h = 0; + u32 lo, hi; switch (cmd->type) { case SYSTEM_INTEL_MSR_CAPABLE: - wrmsr(cmd->addr.msr.reg, cmd->val, h); + rdmsr(cmd->addr.msr.reg, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); + wrmsr(cmd->addr.msr.reg, lo, hi); break; case SYSTEM_IO_CAPABLE: acpi_os_write_port((acpi_io_address)cmd->addr.io.port, @@ -372,7 +374,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; struct drv_cmd cmd; - unsigned int msr; unsigned int next_state = 0; /* Index into freq_table */ unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; @@ -417,11 +418,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - msr = - (u32) perf->states[next_perf_state]. - control & INTEL_MSR_RANGE; - cmd.val = get_cur_val(online_policy_cpus); - cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr; + cmd.val = (u32) perf->states[next_perf_state].control; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 0d49d73d1b7..66acd503991 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -391,8 +391,6 @@ static struct cpufreq_driver nforce2_driver = { */ static unsigned int nforce2_detect_chipset(void) { - u8 revision; - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, PCI_ANY_ID, PCI_ANY_ID, NULL); @@ -400,10 +398,8 @@ static unsigned int nforce2_detect_chipset(void) if (nforce2_chipset_dev == NULL) return -ENODEV; - pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision); - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - revision); + nforce2_chipset_dev->revision); printk(KERN_INFO "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 6667e9cceb9..194144539a6 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -115,7 +115,6 @@ struct gxfreq_params { u8 pci_suscfg; u8 pci_pmer1; u8 pci_pmer2; - u8 pci_rev; struct pci_dev *cs55x0; }; @@ -276,7 +275,7 @@ static void gx_set_cpuspeed(unsigned int khz) pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ + if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ suscfg = gx_params->pci_suscfg | SUSMOD; } else { /* CS5530A,B.. */ suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; @@ -471,7 +470,6 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, ¶ms->pci_rev); if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index a3df9c039bd..ef8f0bc3fc7 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,6 +29,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/delay.h> #include <asm/msr.h> #include <asm/timex.h> @@ -55,7 +56,6 @@ /* Flags */ #define USE_ACPI_C3 (1 << 1) #define USE_NORTHBRIDGE (1 << 2) -#define USE_VT8235 (1 << 3) static int cpu_model; static unsigned int numscales=16; @@ -63,22 +63,19 @@ static unsigned int fsb; static const struct mV_pos *vrm_mV_table; static const unsigned char *mV_vrm_table; -struct f_msr { - u8 vrm; - u8 pos; -}; -static struct f_msr f_msr_table[32]; static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; +static u32 acpi_regs_addr; static u8 longhaul_flags; -static u8 longhaul_pos; +static unsigned int longhaul_index; /* Module parameters */ static int scale_voltage; +static int disable_acpi_c3; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -144,7 +141,7 @@ static void do_longhaul1(unsigned int clock_ratio_index) rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index; + bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; /* Sync to timer tick */ safe_halt(); @@ -163,14 +160,12 @@ static void do_longhaul1(unsigned int clock_ratio_index) /* For processor with Longhaul MSR */ -static void do_powersaver(int cx_address, unsigned int clock_ratio_index) +static void do_powersaver(int cx_address, unsigned int clock_ratio_index, + unsigned int dir) { union msr_longhaul longhaul; - u8 dest_pos; u32 t; - dest_pos = f_msr_table[clock_ratio_index].pos; - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Setup new frequency */ longhaul.bits.RevisionKey = longhaul.bits.RevisionID; @@ -178,11 +173,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; /* Setup new voltage */ if (can_scale_voltage) - longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; + longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; /* Sync to timer tick */ safe_halt(); /* Raise voltage if necessary */ - if (can_scale_voltage && longhaul_pos < dest_pos) { + if (can_scale_voltage && dir) { longhaul.bits.EnableSoftVID = 1; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ @@ -199,7 +194,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) } longhaul.bits.EnableSoftVID = 0; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - longhaul_pos = dest_pos; } /* Change frequency on next halt or sleep */ @@ -220,7 +214,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Reduce voltage if necessary */ - if (can_scale_voltage && longhaul_pos > dest_pos) { + if (can_scale_voltage && !dir) { longhaul.bits.EnableSoftVID = 1; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); /* Change voltage */ @@ -237,7 +231,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) } longhaul.bits.EnableSoftVID = 0; wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - longhaul_pos = dest_pos; } } @@ -248,25 +241,28 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) * Sets a new clock ratio. */ -static void longhaul_setstate(unsigned int clock_ratio_index) +static void longhaul_setstate(unsigned int table_index) { + unsigned int clock_ratio_index; int speed, mult; struct cpufreq_freqs freqs; - static unsigned int old_ratio=-1; unsigned long flags; unsigned int pic1_mask, pic2_mask; + u16 bm_status = 0; + u32 bm_timeout = 1000; + unsigned int dir = 0; - if (old_ratio == clock_ratio_index) - return; - old_ratio = clock_ratio_index; - - mult = clock_ratio[clock_ratio_index]; + clock_ratio_index = longhaul_table[table_index].index; + /* Safety precautions */ + mult = clock_ratio[clock_ratio_index & 0x1f]; if (mult == -1) return; - speed = calc_speed(mult); if ((speed > highest_speed) || (speed < lowest_speed)) return; + /* Voltage transition before frequency transition? */ + if (can_scale_voltage && longhaul_index < table_index) + dir = 1; freqs.old = calc_speed(longhaul_get_cpu_mult()); freqs.new = speed; @@ -285,11 +281,24 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ + /* Wait while PCI bus is busy. */ + if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE + || ((pr != NULL) && pr->flags.bm_control))) { + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + while (bm_status && bm_timeout) { + outw(1 << 4, acpi_regs_addr); + bm_timeout--; + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + } + } + if (longhaul_flags & USE_NORTHBRIDGE) { /* Disable AGP and PCI arbiters */ outb(3, 0x22); } else if ((pr != NULL) && pr->flags.bm_control) { - /* Disable bus master arbitration */ + /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); } switch (longhaul_version) { @@ -314,9 +323,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) if (longhaul_flags & USE_ACPI_C3) { /* Don't allow wakeup */ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index); + do_powersaver(cx->address, clock_ratio_index, dir); } else { - do_powersaver(0, clock_ratio_index); + do_powersaver(0, clock_ratio_index, dir); } break; } @@ -336,6 +345,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) freqs.new = calc_speed(longhaul_get_cpu_mult()); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + if (!bm_timeout) + printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); } /* @@ -369,7 +381,8 @@ static int guess_fsb(int mult) static int __init longhaul_get_ranges(void) { - unsigned int j, k = 0; + unsigned int i, j, k = 0; + unsigned int ratio; int mult; /* Get current frequency */ @@ -423,8 +436,7 @@ static int __init longhaul_get_ranges(void) if(!longhaul_table) return -ENOMEM; - for (j=0; j < numscales; j++) { - unsigned int ratio; + for (j = 0; j < numscales; j++) { ratio = clock_ratio[j]; if (ratio == -1) continue; @@ -434,13 +446,41 @@ static int __init longhaul_get_ranges(void) longhaul_table[k].index = j; k++; } + if (k <= 1) { + kfree(longhaul_table); + return -ENODEV; + } + /* Sort */ + for (j = 0; j < k - 1; j++) { + unsigned int min_f, min_i; + min_f = longhaul_table[j].frequency; + min_i = j; + for (i = j + 1; i < k; i++) { + if (longhaul_table[i].frequency < min_f) { + min_f = longhaul_table[i].frequency; + min_i = i; + } + } + if (min_i != j) { + unsigned int temp; + temp = longhaul_table[j].frequency; + longhaul_table[j].frequency = longhaul_table[min_i].frequency; + longhaul_table[min_i].frequency = temp; + temp = longhaul_table[j].index; + longhaul_table[j].index = longhaul_table[min_i].index; + longhaul_table[min_i].index = temp; + } + } longhaul_table[k].frequency = CPUFREQ_TABLE_END; - if (!k) { - kfree (longhaul_table); - return -EINVAL; - } + /* Find index we are running on */ + for (j = 0; j < k; j++) { + if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + longhaul_index = j; + break; + } + } return 0; } @@ -448,7 +488,7 @@ static int __init longhaul_get_ranges(void) static void __init longhaul_setup_voltagescaling(void) { union msr_longhaul longhaul; - struct mV_pos minvid, maxvid; + struct mV_pos minvid, maxvid, vid; unsigned int j, speed, pos, kHz_step, numvscales; int min_vid_speed; @@ -459,11 +499,11 @@ static void __init longhaul_setup_voltagescaling(void) } if (!longhaul.bits.VRMRev) { - printk (KERN_INFO PFX "VRM 8.5\n"); + printk(KERN_INFO PFX "VRM 8.5\n"); vrm_mV_table = &vrm85_mV[0]; mV_vrm_table = &mV_vrm85[0]; } else { - printk (KERN_INFO PFX "Mobile VRM\n"); + printk(KERN_INFO PFX "Mobile VRM\n"); if (cpu_model < CPU_NEHEMIAH) return; vrm_mV_table = &mobilevrm_mV[0]; @@ -523,7 +563,6 @@ static void __init longhaul_setup_voltagescaling(void) /* Calculate kHz for one voltage step */ kHz_step = (highest_speed - min_vid_speed) / numvscales; - j = 0; while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { speed = longhaul_table[j].frequency; @@ -531,15 +570,14 @@ static void __init longhaul_setup_voltagescaling(void) pos = (speed - min_vid_speed) / kHz_step + minvid.pos; else pos = minvid.pos; - f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; - f_msr_table[longhaul_table[j].index].pos = pos; + longhaul_table[j].index |= mV_vrm_table[pos] << 8; + vid = vrm_mV_table[mV_vrm_table[pos]]; + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); j++; } - longhaul_pos = maxvid.pos; can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled. " - "Use of \"conservative\" governor is highly recommended.\n"); + printk(KERN_INFO PFX "Voltage scaling enabled.\n"); } @@ -553,15 +591,44 @@ static int longhaul_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int table_index = 0; - unsigned int new_clock_ratio = 0; + unsigned int i; + unsigned int dir = 0; + u8 vid, current_vid; if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) return -EINVAL; - new_clock_ratio = longhaul_table[table_index].index & 0xFF; - - longhaul_setstate(new_clock_ratio); + /* Don't set same frequency again */ + if (longhaul_index == table_index) + return 0; + if (!can_scale_voltage) + longhaul_setstate(table_index); + else { + /* On test system voltage transitions exceeding single + * step up or down were turning motherboard off. Both + * "ondemand" and "userspace" are unsafe. C7 is doing + * this in hardware, C3 is old and we need to do this + * in software. */ + i = longhaul_index; + current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + if (table_index > longhaul_index) + dir = 1; + while (i != table_index) { + vid = (longhaul_table[i].index >> 8) & 0x1f; + if (vid != current_vid) { + longhaul_setstate(i); + current_vid = vid; + msleep(200); + } + if (dir) + i++; + else + i--; + } + longhaul_setstate(table_index); + } + longhaul_index = table_index; return 0; } @@ -590,11 +657,10 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, static int enable_arbiter_disable(void) { struct pci_dev *dev; - int status; + int status = 1; int reg; u8 pci_cmd; - status = 1; /* Find PLE133 host bridge */ reg = 0x78; dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, @@ -627,13 +693,17 @@ static int enable_arbiter_disable(void) return 0; } -static int longhaul_setup_vt8235(void) +static int longhaul_setup_southbridge(void) { struct pci_dev *dev; u8 pci_cmd; /* Find VT8235 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); + if (dev == NULL) + /* Find VT8237 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8237, NULL); if (dev != NULL) { /* Set transition time to max */ pci_read_config_byte(dev, 0xec, &pci_cmd); @@ -645,6 +715,14 @@ static int longhaul_setup_vt8235(void) pci_read_config_byte(dev, 0xe5, &pci_cmd); pci_cmd |= 1 << 7; pci_write_config_byte(dev, 0xe5, pci_cmd); + /* Get address of ACPI registers block*/ + pci_read_config_byte(dev, 0x81, &pci_cmd); + if (pci_cmd & 1 << 7) { + pci_read_config_dword(dev, 0x88, &acpi_regs_addr); + acpi_regs_addr &= 0xff00; + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + } + pci_dev_put(dev); return 1; } @@ -657,7 +735,6 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) char *cpuname=NULL; int ret; u32 lo, hi; - int vt8235_present; /* Check what we have on this motherboard */ switch (c->x86_model) { @@ -755,7 +832,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) }; /* Doesn't hurt */ - vt8235_present = longhaul_setup_vt8235(); + longhaul_setup_southbridge(); /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, @@ -765,35 +842,29 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Check ACPI support for C3 state */ if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address > 0 && cx->latency <= 1000) { + if (cx->address > 0 && cx->latency <= 1000) longhaul_flags |= USE_ACPI_C3; - goto print_support_type; - } } + /* Disable if it isn't working */ + if (disable_acpi_c3) + longhaul_flags &= ~USE_ACPI_C3; /* Check if northbridge is friendly */ - if (enable_arbiter_disable()) { + if (enable_arbiter_disable()) longhaul_flags |= USE_NORTHBRIDGE; - goto print_support_type; - } - /* Use VT8235 southbridge if present */ - if (longhaul_version == TYPE_POWERSAVER && vt8235_present) { - longhaul_flags |= USE_VT8235; - goto print_support_type; - } + /* Check ACPI support for bus master arbiter disable */ - if ((pr == NULL) || !(pr->flags.bm_control)) { + if (!(longhaul_flags & USE_ACPI_C3 + || longhaul_flags & USE_NORTHBRIDGE) + && ((pr == NULL) || !(pr->flags.bm_control))) { printk(KERN_ERR PFX "No ACPI support. Unsupported northbridge.\n"); return -ENODEV; } -print_support_type: if (longhaul_flags & USE_NORTHBRIDGE) - printk (KERN_INFO PFX "Using northbridge support.\n"); - else if (longhaul_flags & USE_VT8235) - printk (KERN_INFO PFX "Using VT8235 support.\n"); - else - printk (KERN_INFO PFX "Using ACPI support.\n"); + printk(KERN_INFO PFX "Using northbridge support.\n"); + if (longhaul_flags & USE_ACPI_C3) + printk(KERN_INFO PFX "Using ACPI support.\n"); ret = longhaul_get_ranges(); if (ret != 0) @@ -885,6 +956,9 @@ static void __exit longhaul_exit(void) kfree(longhaul_table); } +module_param (disable_acpi_c3, int, 0644); +MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); + module_param (scale_voltage, int, 0644); MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index 102548f1284..4fcc320997d 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -180,7 +180,7 @@ static const int __initdata ezrat_clock_ratio[32] = { -1, /* 0000 -> RESERVED (10.0x) */ 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ + -1, /* 0010 -> 12.0x */ -1, /* 0011 -> RESERVED (9.0x)*/ 105, /* 0100 -> 10.5x */ 115, /* 0101 -> 11.5x */ @@ -237,7 +237,7 @@ static const int __initdata ezrat_eblcr[32] = { static const int __initdata nehemiah_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ - 160, /* 0001 -> 16.0x */ + -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ 90, /* 0011 -> 9.0x */ 95, /* 0100 -> 9.5x */ @@ -252,10 +252,10 @@ static const int __initdata nehemiah_clock_ratio[32] = { 75, /* 1101 -> 7.5x */ 85, /* 1110 -> 8.5x */ 120, /* 1111 -> 12.0x */ - 100, /* 0000 -> 10.0x */ + -1, /* 0000 -> 10.0x */ 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - 90, /* 0011 -> 9.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> 9.0x */ 105, /* 0100 -> 10.5x */ 115, /* 0101 -> 11.5x */ 125, /* 0110 -> 12.5x */ @@ -267,7 +267,7 @@ static const int __initdata nehemiah_clock_ratio[32] = { 145, /* 1100 -> 14.5x */ 155, /* 1101 -> 15.5x */ -1, /* 1110 -> RESERVED (13.0x) */ - 120, /* 1111 -> 12.0x */ + -1, /* 1111 -> 12.0x */ }; static const int __initdata nehemiah_eblcr[32] = { diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 4ade55c5f33..34ed53a0673 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -599,14 +599,17 @@ static void print_basics(struct powernow_k8_data *data) for (j = 0; j < data->numps; j++) { if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8, - (data->powernow_table[j].index & 0xff0000) >> 16, - data->powernow_table[j].frequency/1000); + printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", + j, + (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, - data->powernow_table[j].index & 0xff, - data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8); + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + j, + data->powernow_table[j].index & 0xff, + data->powernow_table[j].frequency/1000, + data->powernow_table[j].index >> 8); } } } @@ -1086,7 +1089,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (cpu_family == CPU_HW_PSTATE) dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currvid); + data->currfid, data->currdid); else { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1322,16 +1325,22 @@ static struct cpufreq_driver cpufreq_amd64_driver = { static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; + unsigned int booted_cores = 1; for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } +#ifdef CONFIG_SMP + booted_cores = cpu_data[0].booted_cores; +#endif + if (supported_cpus == num_online_cpus()) { printk(KERN_INFO PFX "Found %d %s " - "processors (" VERSION ")\n", supported_cpus, - boot_cpu_data.x86_model_id); + "processors (%d cpu cores) (" VERSION ")\n", + supported_cpus/booted_cores, + boot_cpu_data.x86_model_id, supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 35489fd6885..6c5dc2c85ae 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -21,12 +21,6 @@ #include <linux/delay.h> #include <linux/compiler.h> -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI -#include <linux/acpi.h> -#include <linux/dmi.h> -#include <acpi/processor.h> -#endif - #include <asm/msr.h> #include <asm/processor.h> #include <asm/cpufeature.h> @@ -257,9 +251,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) /* Matched a non-match */ dprintk("no table support for CPU model \"%s\"\n", cpu->x86_model_id); -#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); -#endif + dprintk("try using the acpi-cpufreq driver\n"); return -ENOENT; } @@ -346,213 +338,6 @@ static unsigned int get_cur_freq(unsigned int cpu) } -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - -static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; - -/* - * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States - * library - * - * Before doing the actual init, we need to do _PSD related setup whenever - * supported by the BIOS. These are handled by this early_init routine. - */ -static int centrino_cpu_early_init_acpi(void) -{ - unsigned int i, j; - struct acpi_processor_performance *data; - - for_each_possible_cpu(i) { - data = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!data) { - for_each_possible_cpu(j) { - kfree(acpi_perf_data[j]); - acpi_perf_data[j] = NULL; - } - return (-ENOMEM); - } - acpi_perf_data[i] = data; - } - - acpi_processor_preregister_performance(acpi_perf_data); - return 0; -} - - -#ifdef CONFIG_SMP -/* - * Some BIOSes do SW_ANY coordination internally, either set it up in hw - * or do it in BIOS firmware and won't inform about it to OS. If not - * detected, this has a side effect of making CPU run at a different speed - * than OS intended it to run at. Detect it and handle it cleanly. - */ -static int bios_with_sw_any_bug; -static int sw_any_bug_found(struct dmi_system_id *d) -{ - bios_with_sw_any_bug = 1; - return 0; -} - -static struct dmi_system_id sw_any_bug_dmi_table[] = { - { - .callback = sw_any_bug_found, - .ident = "Supermicro Server X6DLP", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), - DMI_MATCH(DMI_BIOS_VERSION, "080010"), - DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), - }, - }, - { } -}; -#endif - -/* - * centrino_cpu_init_acpi - register with ACPI P-States library - * - * Register with the ACPI P-States library (part of drivers/acpi/processor.c) - * in order to determine correct frequency and voltage pairings by reading - * the _PSS of the ACPI DSDT or SSDT tables. - */ -static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) -{ - unsigned long cur_freq; - int result = 0, i; - unsigned int cpu = policy->cpu; - struct acpi_processor_performance *p; - - p = acpi_perf_data[cpu]; - - /* register with ACPI core */ - if (acpi_processor_register_performance(p, cpu)) { - dprintk(PFX "obtaining ACPI data failed\n"); - return -EIO; - } - - policy->shared_type = p->shared_type; - /* - * Will let policy->cpus know about dependency only when software - * coordination is required. - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = p->shared_cpu_map; - } - -#ifdef CONFIG_SMP - dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; - } -#endif - - /* verify the acpi_data */ - if (p->state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Invalid control/status registers (%x - %x)\n", - p->control_register.space_id, p->status_register.space_id); - result = -EIO; - goto err_unreg; - } - - for (i=0; i<p->state_count; i++) { - if ((p->states[i].control & INTEL_MSR_RANGE) != - (p->states[i].status & INTEL_MSR_RANGE)) { - dprintk("Different MSR bits in control (%llu) and status (%llu)\n", - p->states[i].control, p->states[i].status); - result = -EINVAL; - goto err_unreg; - } - - if (!p->states[i].core_frequency) { - dprintk("Zero core frequency for state %u\n", i); - result = -EINVAL; - goto err_unreg; - } - - if (p->states[i].core_frequency > p->states[0].core_frequency) { - dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i, - p->states[i].core_frequency, p->states[0].core_frequency); - p->states[i].core_frequency = 0; - continue; - } - } - - centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL); - if (!centrino_model[cpu]) { - result = -ENOMEM; - goto err_unreg; - } - - centrino_model[cpu]->model_name=NULL; - centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000; - centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) * - (p->state_count + 1), GFP_KERNEL); - if (!centrino_model[cpu]->op_points) { - result = -ENOMEM; - goto err_kfree; - } - - for (i=0; i<p->state_count; i++) { - centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE; - centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000; - dprintk("adding state %i with frequency %u and control value %04x\n", - i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); - } - centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END; - - cur_freq = get_cur_freq(cpu); - - for (i=0; i<p->state_count; i++) { - if (!p->states[i].core_frequency) { - dprintk("skipping state %u\n", i); - centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) != - (centrino_model[cpu]->op_points[i].frequency)) { - dprintk("Invalid encoded frequency (%u vs. %u)\n", - extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0), - centrino_model[cpu]->op_points[i].frequency); - result = -EINVAL; - goto err_kfree_all; - } - - if (cur_freq == centrino_model[cpu]->op_points[i].frequency) - p->state = i; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI " - "config is deprecated.\n " - "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" ); - - return 0; - - err_kfree_all: - kfree(centrino_model[cpu]->op_points); - err_kfree: - kfree(centrino_model[cpu]); - err_unreg: - acpi_processor_unregister_performance(p, cpu); - dprintk(PFX "invalid ACPI data\n"); - return (result); -} -#else -static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; } -static inline int centrino_cpu_early_init_acpi(void) { return 0; } -#endif - static int centrino_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; @@ -568,27 +353,25 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - if (centrino_cpu_init_acpi(policy)) { - if (policy->cpu != 0) - return -ENODEV; + if (policy->cpu != 0) + return -ENODEV; - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; - if (!centrino_cpu[policy->cpu]) { - dprintk("found unsupported CPU with " - "Enhanced SpeedStep: send /proc/cpuinfo to " - MAINTAINER "\n"); - return -ENODEV; - } + if (!centrino_cpu[policy->cpu]) { + dprintk("found unsupported CPU with " + "Enhanced SpeedStep: send /proc/cpuinfo to " + MAINTAINER "\n"); + return -ENODEV; + } - if (centrino_cpu_init_table(policy)) { - return -ENODEV; - } + if (centrino_cpu_init_table(policy)) { + return -ENODEV; } /* Check to see if Enhanced SpeedStep is enabled, and try to @@ -634,20 +417,6 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) cpufreq_frequency_table_put_attr(cpu); -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - if (!centrino_model[cpu]->model_name) { - static struct acpi_processor_performance *p; - - if (acpi_perf_data[cpu]) { - p = acpi_perf_data[cpu]; - dprintk("unregistering and freeing ACPI data\n"); - acpi_processor_unregister_performance(p, cpu); - kfree(centrino_model[cpu]->op_points); - kfree(centrino_model[cpu]); - } - } -#endif - centrino_model[cpu] = NULL; return 0; @@ -849,25 +618,12 @@ static int __init centrino_init(void) if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - centrino_cpu_early_init_acpi(); - return cpufreq_register_driver(¢rino_driver); } static void __exit centrino_exit(void) { -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - unsigned int j; -#endif - cpufreq_unregister_driver(¢rino_driver); - -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - for_each_possible_cpu(j) { - kfree(acpi_perf_data[j]); - acpi_perf_data[j] = NULL; - } -#endif } MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 698f980eb44..a5b2346faf1 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -205,7 +205,6 @@ static unsigned int speedstep_detect_chipset (void) * host brige. Abort on these systems. */ static struct pci_dev *hostbridge; - u8 rev = 0; hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_MC, @@ -216,8 +215,7 @@ static unsigned int speedstep_detect_chipset (void) if (!hostbridge) return 2; /* 2-M */ - pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev); - if (rev < 5) { + if (hostbridge->revision < 5) { dprintk("hostbridge does not support speedstep\n"); speedstep_chipset_dev = NULL; pci_dev_put(hostbridge); diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 7ba7c3abd3a..1203dc5ab87 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, int err; sys_dev = get_cpu_sysdev(cpu); - mutex_lock(&therm_cpu_lock); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: + mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + mutex_lock(&therm_cpu_lock); thermal_throttle_remove_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); break; } - mutex_unlock(&therm_cpu_lock); return NOTIFY_OK; } diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c index 9edf5625584..1001f1e0fe6 100644 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -233,12 +233,12 @@ typedef struct { mtrr_type type; } arr_state_t; -static arr_state_t arr_state[8] __devinitdata = { +static arr_state_t arr_state[8] = { {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} }; -static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 }; +static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; static void cyrix_set_all(void) { diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 6d5937891b4..f6e46943e6e 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -65,7 +65,8 @@ get_fixed_ranges(mtrr_type * frs) void mtrr_save_fixed_ranges(void *info) { - get_fixed_ranges(mtrr_state.fixed_ranges); + if (cpu_has_mtrr) + get_fixed_ranges(mtrr_state.fixed_ranges); } static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) @@ -469,11 +470,6 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i } } - if (base < 0x100) { - printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", - base, size); - return -EINVAL; - } /* Check upper bits of base and last are equal and lower bits are 0 for base and 1 for last */ last = base + size - 1; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 55b005152a1..75dc6d5214b 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -229,6 +229,8 @@ static void set_mtrr(unsigned int reg, unsigned long base, data.smp_size = size; data.smp_type = type; atomic_set(&data.count, num_booting_cpus() - 1); + /* make sure data.count is visible before unleashing other CPUs */ + smp_wmb(); atomic_set(&data.gate,0); /* Start the ball rolling on other CPUs */ @@ -242,6 +244,7 @@ static void set_mtrr(unsigned int reg, unsigned long base, /* ok, reset count and toggle gate */ atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); atomic_set(&data.gate,1); /* do our MTRR business */ @@ -260,6 +263,7 @@ static void set_mtrr(unsigned int reg, unsigned long base, cpu_relax(); atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); atomic_set(&data.gate,0); /* diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c index f0b67630b90..4d26d514c56 100644 --- a/arch/i386/kernel/cpu/perfctr-watchdog.c +++ b/arch/i386/kernel/cpu/perfctr-watchdog.c @@ -55,14 +55,45 @@ static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { - return wd_ops ? msr - wd_ops->perfctr : 0; + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; } /* converts an msr to an appropriate reservation bit */ /* returns the bit offset of the event selection register */ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) { - return wd_ops ? msr - wd_ops->evntsel : 0; + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; + } /* checks for a bit availability (hack for oprofile) */ diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89d91e6cc97..1e31b6caffb 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -29,7 +29,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -40,8 +41,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ @@ -57,9 +59,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", - "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 9645bb51f76..fc822a46897 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -734,7 +734,7 @@ void __init print_memory_map(char *who) case E820_NVS: printk("(ACPI NVS)\n"); break; - default: printk("type %lu\n", e820.map[i].type); + default: printk("type %u\n", e820.map[i].type); break; } } diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index a1808022ea1..2452c6fbe99 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -278,7 +278,7 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) struct range { unsigned long start; unsigned long end; - } prev, curr; + } uninitialized_var(prev), curr; efi_memory_desc_t *md; unsigned long start, end; void *p; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index b1f16ee65e4..a714d6b4350 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -367,10 +367,6 @@ ENTRY(system_call) CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) - testl $TF_MASK,PT_EFLAGS(%esp) - jz no_singlestep - orl $_TIF_SINGLESTEP,TI_flags(%ebp) -no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) @@ -385,6 +381,10 @@ syscall_exit: # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF + testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work @@ -409,8 +409,6 @@ restore_nocheck_notrace: 1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -1023,6 +1021,91 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + CFI_STARTPROC + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + + /* Check to see if we got the event in the critical + region in xen_iret_direct, after we've reenabled + events and checked for pending events. This simulates + iret instruction's behaviour where it delivers a + pending interrupt when enabling interrupts. */ + movl PT_EIP(%esp),%eax + cmpl $xen_iret_start_crit,%eax + jb 1f + cmpl $xen_iret_end_crit,%eax + jae 1f + + call xen_iret_crit_fixup + +1: mov %esp, %eax + call xen_evtchn_do_upcall + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(xen_hypervisor_callback) + +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we fix up by reattempting the load, and zeroing the segment +# register if the load fails. +# Category 2 we fix up by jumping to do_iret_error. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by maintaining a status value in EAX. +ENTRY(xen_failsafe_callback) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl $1,%eax +1: mov 4(%esp),%ds +2: mov 8(%esp),%es +3: mov 12(%esp),%fs +4: mov 16(%esp),%gs + testl %eax,%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + lea 16(%esp),%esp + CFI_ADJUST_CFA_OFFSET -16 + jz 5f + addl $16,%esp + jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) +5: pushl $0 # EAX == 0 => Category 1 (Bad segment) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + jmp ret_from_exception + CFI_ENDPROC + +.section .fixup,"ax" +6: xorl %eax,%eax + movl %eax,4(%esp) + jmp 1b +7: xorl %eax,%eax + movl %eax,8(%esp) + jmp 2b +8: xorl %eax,%eax + movl %eax,12(%esp) + jmp 3b +9: xorl %eax,%eax + movl %eax,16(%esp) + jmp 4b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,6b + .long 2b,7b + .long 3b,8b + .long 4b,9b +.previous +ENDPROC(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index f74dfc419b5..7c52b222207 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -168,6 +168,12 @@ page_pde_offset = (__PAGE_OFFSET >> 20); .section .init.text,"ax",@progbits #endif + /* Do an early initialization of the fixmap area */ + movl $(swapper_pg_dir - __PAGE_OFFSET), %edx + movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax + addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ + movl %eax, 4092(%edx) + #ifdef CONFIG_SMP ENTRY(startup_32_smp) cld @@ -504,9 +510,12 @@ ENTRY(_stext) /* * BSS section */ -.section ".bss.page_aligned","w" +.section ".bss.page_aligned","wa" + .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) .fill 1024,4,0 +ENTRY(swapper_pg_pmd) + .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 @@ -530,6 +539,8 @@ fault_msg: .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" .asciz "Stack: %p %p %p %p %p %p %p %p\n" +#include "../xen/xen-head.S" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7f8b7af2b95..21db8f56c9a 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -667,6 +667,7 @@ static int balanced_irq(void *unused) set_pending_irq(i, cpumask_of_cpu(0)); } + set_freezable(); for ( ; ; ) { time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index fba121f7973..03b7f5584d7 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -295,7 +295,7 @@ static unsigned int last_irq_sums [NR_CPUS], alert_counter [NR_CPUS]; -void touch_nmi_watchdog (void) +void touch_nmi_watchdog(void) { if (nmi_watchdog > 0) { unsigned cpu; @@ -304,8 +304,10 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for_each_present_cpu (cpu) - alert_counter[cpu] = 0; + for_each_present_cpu(cpu) { + if (alert_counter[cpu]) + alert_counter[cpu] = 0; + } } /* diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index faab09abca5..53f07a8275e 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -228,6 +228,41 @@ static int __init print_banner(void) } core_initcall(print_banner); +static struct resource reserve_ioports = { + .start = 0, + .end = IO_SPACE_LIMIT, + .name = "paravirt-ioport", + .flags = IORESOURCE_IO | IORESOURCE_BUSY, +}; + +static struct resource reserve_iomem = { + .start = 0, + .end = -1, + .name = "paravirt-iomem", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +/* + * Reserve the whole legacy IO space to prevent any legacy drivers + * from wasting time probing for their hardware. This is a fairly + * brute-force approach to disabling all non-virtual drivers. + * + * Note that this must be called very early to have any effect. + */ +int paravirt_disable_iospace(void) +{ + int ret; + + ret = request_resource(&ioport_resource, &reserve_ioports); + if (ret == 0) { + ret = request_resource(&iomem_resource, &reserve_iomem); + if (ret) + release_resource(&reserve_ioports); + } + + return ret; +} + struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, @@ -267,7 +302,7 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .get_scheduled_cycles = native_read_tsc, + .sched_clock = native_sched_clock, .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 06dfa65ad18..6c49acb9698 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -538,8 +538,31 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) return 1; } -static noinline void __switch_to_xtra(struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_SECCOMP +void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} +void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} +#endif /* CONFIG_SECCOMP */ + +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) { struct thread_struct *next; @@ -555,6 +578,17 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, set_debugreg(next->debugreg[7], 7); } +#ifdef CONFIG_SECCOMP + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } +#endif + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache @@ -586,33 +620,6 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, } /* - * This function selects if the context switch from prev to next - * has to tweak the TSC disable bit in the cr4. - */ -static inline void disable_tsc(struct task_struct *prev_p, - struct task_struct *next_p) -{ - struct thread_info *prev, *next; - - /* - * gcc should eliminate the ->thread_info dereference if - * has_secure_computing returns 0 at compile time (SECCOMP=n). - */ - prev = task_thread_info(prev_p); - next = task_thread_info(next_p); - - if (has_secure_computing(prev) || has_secure_computing(next)) { - /* slow path here */ - if (has_secure_computing(prev) && - !has_secure_computing(next)) { - write_cr4(read_cr4() & ~X86_CR4_TSD); - } else if (!has_secure_computing(prev) && - has_secure_computing(next)) - write_cr4(read_cr4() | X86_CR4_TSD); - } -} - -/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time @@ -689,11 +696,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas /* * Now maybe handle debug registers and/or IO bitmaps */ - if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) - __switch_to_xtra(next_p, tss); - - disable_tsc(prev_p, next_p); + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) + __switch_to_xtra(prev_p, next_p, tss); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 0c0ceec5de0..0c8f00e69c4 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -164,14 +164,22 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ u32 *desc; unsigned long base; - down(&child->mm->context.sem); - desc = child->mm->context.ldt + (seg & ~7); - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + seg &= ~7UL; - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; + down(&child->mm->context.sem); + if (unlikely((seg >> 3) >= child->mm->context.size)) + addr = -1L; /* bogus selector, access would fault */ + else { + desc = child->mm->context.ldt + seg; + base = ((desc[0] >> 16) | + ((desc[1] & 0xff) << 16) | + (desc[1] & 0xff000000)); + + /* 16-bit code segment? */ + if (!((desc[1] >> 22) & 1)) + addr &= 0xffff; + addr += base; + } up(&child->mm->context.sem); } return addr; @@ -358,17 +366,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, datap); + case PTRACE_PEEKDATA: + ret = generic_ptrace_peekdata(child, addr, data); break; - } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { @@ -395,10 +395,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: - ret = 0; - if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) - break; - ret = -EIO; + ret = generic_ptrace_pokedata(child, addr, data); break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 9f6ab1789bb..6722469c263 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -20,8 +20,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (rev > 0x9) return; - printk(KERN_INFO "Intel E7520/7320/7525 detected."); - /* enable access to config space*/ pci_read_config_byte(dev, 0xf4, &config); pci_write_config_byte(dev, 0xf4, config|0x2); @@ -30,7 +28,8 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); if (!(word & (1 << 13))) { - printk(KERN_INFO "Disabling irq balancing and affinity\n"); + printk(KERN_INFO "Intel E7520/7320/7525 detected. " + "Disabling irq balancing and affinity\n"); #ifdef CONFIG_IRQBALANCE irqbalance_disable(""); #endif diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 2d78d918340..03e1cce58f4 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -5,12 +5,14 @@ * * List of supported fixups: * geode-gx1/cs5530a - Jaya Kumar <jayalk@intworks.biz> + * geode-gx/lx/cs5536 - Andres Salomon <dilinger@debian.org> * */ #include <asm/delay.h> #include <linux/pci.h> #include <asm/reboot_fixups.h> +#include <asm/msr.h> static void cs5530a_warm_reset(struct pci_dev *dev) { @@ -21,6 +23,16 @@ static void cs5530a_warm_reset(struct pci_dev *dev) return; } +static void cs5536_warm_reset(struct pci_dev *dev) +{ + /* + * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) + * writing 1 to the LSB of this MSR causes a hard reset. + */ + wrmsrl(0x51400017, 1ULL); + udelay(50); /* shouldn't get here but be safe and spin a while */ +} + struct device_fixup { unsigned int vendor; unsigned int device; @@ -29,6 +41,7 @@ struct device_fixup { static struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, +{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, }; /* diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 698c24fe482..74871d066c2 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -102,19 +102,10 @@ static unsigned int highmem_pages = -1; /* * Setup options */ -struct drive_info_struct { char dummy[32]; } drive_info; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ - defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -EXPORT_SYMBOL(drive_info); -#endif struct screen_info screen_info; EXPORT_SYMBOL(screen_info); struct apm_info apm_info; EXPORT_SYMBOL(apm_info); -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); struct ist_info ist_info; @@ -134,7 +125,7 @@ unsigned long saved_videomode; static char __initdata command_line[COMMAND_LINE_SIZE]; -unsigned char __initdata boot_params[PARAM_SIZE]; +struct boot_params __initdata boot_params; #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -528,7 +519,6 @@ void __init setup_arch(char **cmdline_p) #endif ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; edid_info = EDID_INFO; apm_info.bios = APM_BIOS_INFO; @@ -611,6 +601,8 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ + paravirt_post_allocator_init(); + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6299c080f6e..2d35d850202 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -22,6 +22,7 @@ #include <asm/mtrr.h> #include <asm/tlbflush.h> +#include <asm/mmu_context.h> #include <mach_apic.h> /* @@ -249,13 +250,13 @@ static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); /* - * We cannot call mmdrop() because we are in interrupt context, + * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. * * We need to reload %cr3 since the page tables may be going * away from under us.. */ -static inline void leave_mm (unsigned long cpu) +void leave_mm(unsigned long cpu) { if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) BUG(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 88baed1e7e8..5910d3fac56 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __cpuinit smp_store_cpu_info(int id) +void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -static inline void -set_cpu_sibling_map(int cpu) +void set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = cpu_data; @@ -941,17 +940,6 @@ exit: } #endif -static void smp_tune_scheduling(void) -{ - if (cpu_khz) { - /* cache size in kB */ - long cachesize = boot_cpu_data.x86_cache_size; - - if (cachesize > 0) - max_cache_size = cachesize * 1024; - } -} - /* * Cycle through the processors sending APIC IPIs to boot each. */ @@ -980,7 +968,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; current_thread_info()->cpu = 0; - smp_tune_scheduling(); set_cpu_sibling_map(0); @@ -1156,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -static void -remove_siblinginfo(int cpu) +void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = cpu_data; diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c index 1868ae18eb4..bbfe85a0f69 100644 --- a/arch/i386/kernel/smpcommon.c +++ b/arch/i386/kernel/smpcommon.c @@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic, EXPORT_SYMBOL(smp_call_function); /** - * smp_call_function_single - Run a function on another CPU + * smp_call_function_single - Run a function on a specific CPU * @cpu: The target CPU. Cannot be the calling CPU. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int ret; int me = get_cpu(); if (cpu == me) { - WARN_ON(1); + local_irq_disable(); + func(info); + local_irq_enable(); put_cpu(); - return -EBUSY; + return 0; } ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index bf6adce5226..8344c70adf6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -323,3 +323,4 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_fallocate diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 90da0575fcf..d32fd4b6f78 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -390,7 +390,7 @@ void die(const char * str, struct pt_regs * regs, long err) unsigned long esp; unsigned short ss; - report_bug(regs->eip); + report_bug(regs->eip, regs); printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT @@ -433,6 +433,7 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; + add_taint(TAINT_DIE); spin_unlock_irqrestore(&die.lock, flags); if (!regs) @@ -517,10 +518,12 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ siginfo_t info; \ + if (irq) \ + local_irq_enable(); \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ @@ -560,13 +563,13 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) fastcall void __kprobes do_general_protection(struct pt_regs * regs, long error_code) diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index f64b81f3033..252f9010f28 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -4,6 +4,7 @@ * See comments there for proper credits. */ +#include <linux/sched.h> #include <linux/clocksource.h> #include <linux/workqueue.h> #include <linux/cpufreq.h> @@ -83,7 +84,7 @@ static inline int check_tsc_unstable(void) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale __read_mostly; +unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ @@ -92,32 +93,44 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz) cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long native_sched_clock(void) { unsigned long long this_offset; /* * Fall back to jiffies if there's no TSC available: + * ( But note that we still use it if the TSC is marked + * unstable. We do this because unlike Time Of Day, + * the scheduler clock tolerates small errors and it's + * very important for it to be as fast as the platform + * can achive it. ) */ - if (unlikely(!tsc_enabled)) + if (unlikely(!tsc_enabled && !tsc_unstable)) /* No locking but a rare wrong value is not a big deal: */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - get_scheduled_cycles(this_offset); + rdtscll(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long sched_clock(void) + __attribute__((alias("native_sched_clock"))); +#endif + unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; @@ -277,6 +290,7 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { + sched_clock_unstable_event(); if (!tsc_unstable) { tsc_unstable = 1; tsc_enabled = 0; diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S deleted file mode 100644 index f1d1eacf4ab..00000000000 --- a/arch/i386/kernel/verify_cpu.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Check if CPU has some minimum CPUID bits - This runs in 16bit mode so that the caller can still use the BIOS - to output errors on the screen */ -#include <asm/cpufeature.h> -#include <asm/msr.h> - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - -#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4 - pushfl - pop %eax - orl $(1<<18),%eax # try setting AC - push %eax - popfl - pushfl - popl %eax - testl $(1<<18),%eax - jz bad -#endif -#if REQUIRED_MASK1 != 0 - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz bad # REQUIRED_MASK1 != 0 requires CPUID - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb bad # no cpuid 1 - -#if REQUIRED_MASK1 & NEED_CMPXCHG64 - /* Some VIA C3s need magic MSRs to enable CX64. Do this here */ - cmpl $0x746e6543,%ebx # Cent - jne 1f - cmpl $0x48727561,%edx # aurH - jne 1f - cmpl $0x736c7561,%ecx # auls - jne 1f - movl $1,%eax # check model - cpuid - movl %eax,%ebx - shr $8,%ebx - andl $0xf,%ebx - cmp $6,%ebx # check family == 6 - jne 1f - shr $4,%eax - andl $0xf,%eax - cmpl $6,%eax # check model >= 6 - jb 1f - # assume models >= 6 all support this MSR - movl $MSR_VIA_FCR,%ecx - rdmsr - orl $((1<<1)|(1<<7)),%eax # enable CMPXCHG64 and PGE - wrmsr -1: -#endif - movl $0x1,%eax # Does the cpu have what it takes - cpuid - -#if CONFIG_X86_MINIMUM_CPU_MODEL > 4 -#error add proper model checking here -#endif - - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz bad -#endif /* REQUIRED_MASK1 */ - - popfl - xor %eax,%eax - ret - -bad: - popfl - movl $1,%eax - ret diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index c12720d7cbc..72042bb7ec9 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(u32 pfn) +static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); @@ -891,7 +891,7 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_time_bsp_init; paravirt_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.sched_clock = vmi_sched_clock; paravirt_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c index 26a37f8a876..f9b845f4e69 100644 --- a/arch/i386/kernel/vmiclock.c +++ b/arch/i386/kernel/vmiclock.c @@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now) return 0; } -/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ -unsigned long long vmi_get_sched_cycles(void) +/* paravirt_ops.sched_clock = vmi_sched_clock */ +unsigned long long vmi_sched_clock(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); + return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index aa87b06c7c8..00f1bc47d3a 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -88,6 +88,7 @@ SECTIONS . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) *(.data.idt) } diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S index d4b5be4f3d5..271f16a8ca0 100644 --- a/arch/i386/kernel/vsyscall-note.S +++ b/arch/i386/kernel/vsyscall-note.S @@ -3,23 +3,40 @@ * Here we can supply some information useful to userland. */ -#include <linux/uts.h> #include <linux/version.h> +#include <linux/elfnote.h> -#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ - .section name, flags; \ - .balign 4; \ - .long 1f - 0f; /* name length */ \ - .long 3f - 2f; /* data length */ \ - .long type; /* note type */ \ -0: .asciz vendor; /* vendor name */ \ -1: .balign 4; \ -2: +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END -#define ASM_ELF_NOTE_END \ -3: .balign 4; /* pad out section */ \ - .previous +#ifdef CONFIG_XEN - ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) - .long LINUX_VERSION_CODE - ASM_ELF_NOTE_END +/* + * Add a special note telling glibc's dynamic linker a fake hardware + * flavor that it will use to choose the search path for libraries in the + * same way it uses real hardware capabilities like "mmx". + * We supply "nosegneg" as the fake capability, to indicate that we + * do not like negative offsets in instructions using segment overrides, + * since we implement those inefficiently. This makes it possible to + * install libraries optimized to avoid those access patterns in someplace + * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file + * corresponding to the bits here is needed to make ldconfig work right. + * It should contain: + * hwcap 1 nosegneg + * to match the mapping of bit to name that we give here. + */ + +/* Bit used for the pseudo-hwcap for non-negative segments. We use + bit 1 to avoid bugs in some versions of glibc when bit 0 is + used; the choice is otherwise arbitrary. */ +#define VDSO_NOTE_NONEGSEG_BIT 1 + +ELFNOTE_START(GNU, 2, "a") + .long 1, 1<<VDSO_NOTE_NONEGSEG_BIT /* ncaps, mask */ + .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ +ELFNOTE_END +#endif |