diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
56 files changed, 17536 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile new file mode 100644 index 00000000000..778396c78d6 --- /dev/null +++ b/arch/x86/kernel/cpu/Makefile @@ -0,0 +1,20 @@ +# +# Makefile for x86-compatible CPU details and quirks +# + +obj-y := common.o proc.o bugs.o + +obj-y += amd.o +obj-y += cyrix.o +obj-y += centaur.o +obj-y += transmeta.o +obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o +obj-y += nexgen.o +obj-y += umc.o + +obj-$(CONFIG_X86_MCE) += mcheck/ + +obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_CPU_FREQ) += cpufreq/ + +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c new file mode 100644 index 00000000000..3e91d3ee26e --- /dev/null +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -0,0 +1,50 @@ + +/* + * Routines to indentify additional cpu features that are scattered in + * cpuid space. + */ + +#include <linux/cpu.h> + +#include <asm/processor.h> + +struct cpuid_bit { + u16 feature; + u8 reg; + u8 bit; + u32 level; +}; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ + u32 max_level; + u32 regs[4]; + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { 0, 0, 0, 0 } + }; + + for (cb = cpuid_bits; cb->feature; cb++) { + + /* Verify that the level is valid */ + max_level = cpuid_eax(cb->level & 0xffff0000); + if (max_level < cb->level || + max_level > (cb->level | 0xffff)) + continue; + + cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], + ®s[CR_ECX], ®s[CR_EDX]); + + if (regs[cb->reg] & (1 << cb->bit)) + set_bit(cb->feature, c->x86_capability); + } +} diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c new file mode 100644 index 00000000000..dcf6bbb1c7c --- /dev/null +++ b/arch/x86/kernel/cpu/amd.c @@ -0,0 +1,337 @@ +#include <linux/init.h> +#include <linux/bitops.h> +#include <linux/mm.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/apic.h> + +#include "cpu.h" + +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +#ifdef CONFIG_X86_LOCAL_APIC +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + +int force_mwait __cpuinitdata; + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + int r; + +#ifdef CONFIG_SMP + unsigned long long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 4: + /* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model==9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } + break; + case 5: + if( c->x86_model < 6 ) + { + /* Based on AMD doc 20734R - June 2000 */ + if ( c->x86_model == 0 ) { + clear_bit(X86_FEATURE_APIC, c->x86_capability); + set_bit(X86_FEATURE_PGE, c->x86_capability); + } + break; + } + + if ( c->x86_model == 6 && c->x86_mask == 1 ) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model== 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if(mbytes>508) + mbytes=508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF)==0) { + unsigned long flags; + l=(1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + break; + } + + if ((c->x86_model == 8 && c->x86_mask >7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if(mbytes>4092) + mbytes=4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000)==0) { + unsigned long flags; + l=((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); + break; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; + case 6: /* An Athlon/Duron */ + + /* Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_bit(X86_FEATURE_XMM, c->x86_capability); + } + } + + /* It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + break; + } + + switch (c->x86) { + case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: + set_bit(X86_FEATURE_K8, c->x86_capability); + break; + case 6: + set_bit(X86_FEATURE_K7, c->x86_capability); + break; + } + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); + + display_cacheinfo(c); + + if (cpuid_eax(0x80000000) >= 0x80000008) { + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + } + + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } + +#ifdef CONFIG_X86_HT + /* + * On a AMD multi core setup the lower bits of the APIC id + * distingush the cores. + */ + if (c->x86_max_cores > 1) { + int cpu = smp_processor_id(); + unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1); + c->phys_proc_id >>= bits; + printk(KERN_INFO "CPU %d(%d) -> Core %d\n", + cpu, c->x86_max_cores, c->cpu_core_id); + } +#endif + + if (cpuid_eax(0x80000000) >= 0x80000006) { + if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } + +#ifdef CONFIG_X86_LOCAL_APIC + if (amd_apic_timer_broken()) + local_apic_timer_disabled = 1; +#endif + + if (c->x86 == 0x10 && !force_mwait) + clear_bit(X86_FEATURE_MWAIT, c->x86_capability); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_bit(X86_FEATURE_MCE, c->x86_capability); +} + +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_init = init_amd, + .c_size_cache = amd_size_cache, +}; + +int __init amd_init_cpu(void) +{ + cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c new file mode 100644 index 00000000000..59266f03d1c --- /dev/null +++ b/arch/x86/kernel/cpu/bugs.c @@ -0,0 +1,192 @@ +/* + * arch/i386/cpu/bugs.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * <rreilova@ececs.uc.edu> + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + */ +#include <linux/init.h> +#include <linux/utsname.h> +#include <asm/bugs.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/msr.h> +#include <asm/paravirt.h> +#include <asm/alternative.h> + +static int __init no_halt(char *s) +{ + boot_cpu_data.hlt_works_ok = 0; + return 1; +} + +__setup("no-hlt", no_halt); + +static int __init mca_pentium(char *s) +{ + mca_pentium_flag = 1; + return 1; +} + +__setup("mca-pentium", mca_pentium); + +static int __init no_387(char *s) +{ + boot_cpu_data.hard_math = 0; + write_cr0(0xE | read_cr0()); + return 1; +} + +__setup("no387", no_387); + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + if (!boot_cpu_data.hard_math) { +#ifndef CONFIG_MATH_EMULATION + printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); + printk(KERN_EMERG "Giving up.\n"); + for (;;) ; +#endif + return; + } + +/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ + /* Test for the divl bug.. */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&boot_cpu_data.fdiv_bug) + : "m" (*&x), "m" (*&y)); + if (boot_cpu_data.fdiv_bug) + printk("Hmm, FPU with FDIV bug.\n"); +} + +static void __init check_hlt(void) +{ + if (paravirt_enabled()) + return; + + printk(KERN_INFO "Checking 'hlt' instruction... "); + if (!boot_cpu_data.hlt_works_ok) { + printk("disabled\n"); + return; + } + halt(); + halt(); + halt(); + halt(); + printk("OK.\n"); +} + +/* + * Most 386 processors have a bug where a POPAD can lock the + * machine even from user space. + */ + +static void __init check_popad(void) +{ +#ifndef CONFIG_X86_POPAD_OK + int res, inp = (int) &res; + + printk(KERN_INFO "Checking for popad bug... "); + __asm__ __volatile__( + "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " + : "=&a" (res) + : "d" (inp) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); + else printk( "OK.\n" ); +#endif +} + +/* + * Check whether we are able to run this kernel safely on SMP. + * + * - In order to run on a i386, we need to be compiled for i386 + * (for due to lack of "invlpg" and working WP on a i386) + * - In order to run on anything without a TSC, we need to be + * compiled for a i486. + * - In order to support the local APIC on a buggy Pentium machine, + * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, + * which happens implicitly if compiled for a Pentium or lower + * (unless an advanced selection of CPU features is used) as an + * otherwise config implies a properly working local APIC without + * the need to do extra reads from the APIC. +*/ + +static void __init check_config(void) +{ +/* + * We'd better not be a i386 if we're configured to use some + * i486+ only features! (WP works in supervisor mode and the + * new "invlpg" and "bswap" instructions) + */ +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) + if (boot_cpu_data.x86 == 3) + panic("Kernel requires i486+ for 'invlpg' and other features"); +#endif + +/* + * If we configured ourselves for a TSC, we'd better have one! + */ +#ifdef CONFIG_X86_TSC + if (!cpu_has_tsc && !tsc_disable) + panic("Kernel compiled for Pentium+, requires TSC feature!"); +#endif + +/* + * If we were told we had a good local APIC, check for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL + && cpu_has_apic + && boot_cpu_data.x86 == 5 + && boot_cpu_data.x86_model == 2 + && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) + panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); +#endif +} + + +void __init check_bugs(void) +{ + identify_boot_cpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_config(); + check_fpu(); + check_hlt(); + check_popad(); + init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + alternative_instructions(); +} diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c new file mode 100644 index 00000000000..473eac883c7 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur.c @@ -0,0 +1,471 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/e820.h> +#include <asm/mtrr.h> +#include "cpu.h" + +#ifdef CONFIG_X86_OOSTORE + +static u32 __cpuinit power2(u32 x) +{ + u32 s=1; + while(s<=x) + s<<=1; + return s>>=1; +} + + +/* + * Set up an actual MCR + */ + +static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) +{ + u32 lo, hi; + + hi = base & ~0xFFF; + lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ + lo &= ~0xFFF; /* Remove the ctrl value bits */ + lo |= key; /* Attribute we wish to set */ + wrmsr(reg+MSR_IDT_MCR0, lo, hi); + mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ +} + +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ + +static u32 __cpuinit ramtop(void) /* 16388 */ +{ + int i; + u32 top = 0; + u32 clip = 0xFFFFFFFFUL; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophy already + */ + + if (e820.map[i].type == E820_RESERVED) + { + if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* Everything below 'top' should be RAM except for the ISA hole. + Because of the limited MCR's we want to map NV/ACPI into our + MCR range for gunk in RAM + + Clip might cause us to MCR insufficient RAM but that is an + acceptable failure mode and should only bite obscure boxes with + a VESA hole at 15Mb + + The second case Clip sometimes kicks in is when the EBDA is marked + as reserved. Again we fail safe with reasonable results + */ + + if(top>clip) + top=clip; + + return top; +} + +/* + * Compute a set of MCR's to give maximum coverage + */ + +static int __cpuinit centaur_mcr_compute(int nr, int key) +{ + u32 mem = ramtop(); + u32 root = power2(mem); + u32 base = root; + u32 top = root; + u32 floor = 0; + int ct = 0; + + while(ct<nr) + { + u32 fspace = 0; + + /* + * Find the largest block we will fill going upwards + */ + + u32 high = power2(mem-top); + + /* + * Find the largest block we will fill going downwards + */ + + u32 low = base/2; + + /* + * Don't fill below 1Mb going downwards as there + * is an ISA hole in the way. + */ + + if(base <= 1024*1024) + low = 0; + + /* + * See how much space we could cover by filling below + * the ISA hole + */ + + if(floor == 0) + fspace = 512*1024; + else if(floor ==512*1024) + fspace = 128*1024; + + /* And forget ROM space */ + + /* + * Now install the largest coverage we get + */ + + if(fspace > high && fspace > low) + { + centaur_mcr_insert(ct, floor, fspace, key); + floor += fspace; + } + else if(high > low) + { + centaur_mcr_insert(ct, top, high, key); + top += high; + } + else if(low > 0) + { + base -= low; + centaur_mcr_insert(ct, base, low, key); + } + else break; + ct++; + } + /* + * We loaded ct values. We now need to set the mask. The caller + * must do this bit. + */ + + return ct; +} + +static void __cpuinit centaur_create_optimal_mcr(void) +{ + int i; + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining and weak write ordered. + * + * To experiment with: Linux never uses stack operations for + * mmio spaces so we could globally enable stack operation wc + * + * Load the registers with type 31 - full write combining, all + * writes weakly ordered. + */ + int used = centaur_mcr_compute(6, 31); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +static void __cpuinit winchip2_create_optimal_mcr(void) +{ + u32 lo, hi; + int i; + + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining, weak store ordered. + * + * Load the registers with type 25 + * 8 - weak write ordering + * 16 - weak read ordering + * 1 - write combining + */ + + int used = centaur_mcr_compute(6, 25); + + /* + * Mark the registers we are using. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + for(i=0;i<used;i++) + lo|=1<<(9+i); + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +/* + * Handle the MCR key on the Winchip 2. + */ + +static void __cpuinit winchip2_unprotect_mcr(void) +{ + u32 lo, hi; + u32 key; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + key = (lo>>17) & 7; + lo |= key<<6; /* replace with unlock key */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +static void __cpuinit winchip2_protect_mcr(void) +{ + u32 lo, hi; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} +#endif /* CONFIG_X86_OOSTORE */ + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ + +static void __cpuinit init_c3(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Centaur Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= ACE_FCR; /* enable ACE unit */ + wrmsr (MSR_VIA_FCR, lo, hi); + printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + } + + /* enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr (MSR_VIA_RNG, lo, hi); + lo |= RNG_ENABLE; /* enable RNG unit */ + wrmsr (MSR_VIA_RNG, lo, hi); + printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + } + + /* store Centaur Extended Feature Flags as + * word 5 of the CPU capability bit array + */ + c->x86_capability[5] = cpuid_edx(0xC0000001); + } + + /* Cyrix III family needs CX8 & PGE explicity enabled. */ + if (c->x86_model >=6 && c->x86_model <= 9) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= (1<<1 | 1<<7); + wrmsr (MSR_VIA_FCR, lo, hi); + set_bit(X86_FEATURE_CX8, c->x86_capability); + } + + /* Before Nehemiah, the C3's had 3dNOW! */ + if (c->x86_model >=6 && c->x86_model <9) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + + get_model_name(c); + display_cacheinfo(c); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + enum { + ECX8=1<<1, + EIERRINT=1<<2, + DPM=1<<3, + DMCE=1<<4, + DSTPCLK=1<<5, + ELINEAR=1<<6, + DSMC=1<<7, + DTLOCK=1<<8, + EDCTLB=1<<8, + EMMX=1<<9, + DPDC=1<<11, + EBRPRED=1<<12, + DIC=1<<13, + DDC=1<<14, + DNA=1<<15, + ERETSTK=1<<16, + E2MMX=1<<19, + EAMD3D=1<<20, + }; + + char *name; + u32 fcr_set=0; + u32 fcr_clr=0; + u32 lo,hi,newlo; + u32 aa,bb,cc,dd; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + switch (c->x86) { + + case 5: + switch(c->x86_model) { + case 4: + name="C6"; + fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr=DPDC; + printk(KERN_NOTICE "Disabling bugged TSC.\n"); + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#ifdef CONFIG_X86_OOSTORE + centaur_create_optimal_mcr(); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + + The C6 original lacks weak read order + + Note 0x120 is write only on Winchip 1 */ + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); +#endif + break; + case 8: + switch(c->x86_mask) { + default: + name="2"; + break; + case 7 ... 9: + name="2A"; + break; + case 10 ... 15: + name="2B"; + break; + } + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + case 9: + name="3"; + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + default: + name="??"; + } + + rdmsr(MSR_IDT_FCR1, lo, hi); + newlo=(lo|fcr_set) & (~fcr_clr); + + if (newlo!=lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); + wrmsr(MSR_IDT_FCR1, newlo, hi ); + } else { + printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + } + /* Emulate MTRRs using Centaur's MCR. */ + set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + /* Report CX8 */ + set_bit(X86_FEATURE_CX8, c->x86_capability); + /* Set 3DNow! on Winchip 2 and above. */ + if (c->x86_model >=8) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + /* See if we can find out some more. */ + if ( cpuid_eax(0x80000000) >= 0x80000005 ) { + /* Yes, we can. */ + cpuid(0x80000005,&aa,&bb,&cc,&dd); + /* Add L1 data and code cache sizes. */ + c->x86_cache_size = (cc>>24)+(dd>>24); + } + sprintf( c->x86_model_id, "WinChip %s", name ); + break; + + case 6: + init_c3(c); + break; + } +} + +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) + size >>= 8; + + /* VIA also screwed up Nehemiah stepping 1, and made + it return '65KB' instead of '64KB' + - Note, it seems this may only be in engineering samples. */ + if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) + size -=1; + + return size; +} + +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { + .c_vendor = "Centaur", + .c_ident = { "CentaurHauls" }, + .c_init = init_centaur, + .c_size_cache = centaur_size_cache, +}; + +int __init centaur_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c new file mode 100644 index 00000000000..d506201d397 --- /dev/null +++ b/arch/x86/kernel/cpu/common.c @@ -0,0 +1,733 @@ +#include <linux/init.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/bootmem.h> +#include <asm/semaphore.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/mtrr.h> +#include <asm/mce.h> +#ifdef CONFIG_X86_LOCAL_APIC +#include <asm/mpspec.h> +#include <asm/apic.h> +#include <mach_apic.h> +#endif + +#include "cpu.h" + +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, + [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, + [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, + [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ + [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, + [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ + + [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, +} }; +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; + +struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; + +extern int disable_pse; + +static void __cpuinit default_init(struct cpuinfo_x86 * c) +{ + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; + +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c,l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + +/* Naming convention should be: <Name> [(<Codename>)] */ +/* This table only is used unless init_<vendor>() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +{ + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v,cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v,cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + if (!early) + this_cpu = cpu_devs[i]; + return; + } + } + } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; +} + + +static int __init x86_fxsr_setup(char * s) +{ + /* Tell all the other CPU's to not use it... */ + disable_x86_fxsr = 1; + + /* + * ... and clear the bits early in the boot_cpu_data + * so that the bootup process doesn't try to do this + * either. + */ + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + + +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init cpu_detect(struct cpuinfo_x86 *c) +{ + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + c->x86 = 4; + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; + if (cap0 & (1<<19)) + c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; + } +} + +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) +{ + u32 tfms, xlvl; + int ebx; + + if (have_cpuid_p()) { + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c, 0); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; +#ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + init_scattered_cpuid_features(c); + } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; +#endif +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { + /* Disable processor serial number */ + unsigned long lo,hi; + rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_bit(X86_FEATURE_PN, c->x86_capability); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; + c->x86_clflush_size = 32; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if (!have_cpuid_p()) { + /* First of all, decide if this is a 486 or higher */ + /* It's a 486 if we can modify the AC flag */ + if ( flag_is_changeable_p(X86_EFLAGS_AC) ) + c->x86 = 4; + else + c->x86 = 3; + } + + generic_identify(c); + + printk(KERN_DEBUG "CPU: After generic identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + if (this_cpu->c_identify) { + this_cpu->c_identify(c); + + printk(KERN_DEBUG "CPU: After vendor identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + } + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* TSC disabled? */ + if ( tsc_disable ) + clear_bit(X86_FEATURE_TSC, c->x86_capability); + + /* FXSR disabled? */ + if (disable_x86_fxsr) { + clear_bit(X86_FEATURE_FXSR, c->x86_capability); + clear_bit(X86_FEATURE_XMM, c->x86_capability); + } + + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + if (disable_pse) + clear_bit(X86_FEATURE_PSE, c->x86_capability); + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After all inits, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + /* Init Machine Check Exception if available. */ + mcheck_init(c); +} + +void __init identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); + sysenter_setup(); + enable_sep_cpu(); + mtrr_bp_init(); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + enable_sep_cpu(); + mtrr_ap_init(); +} + +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif + +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +/* This is hacky. :) + * We're emulating future behavior. + * In the future, the cpu-specific init functions will be called implicitly + * via the magic of initcalls. + * They will insert themselves into the cpu_devs structure. + * Then, when cpu_init() is called, we can just iterate over that array. + */ + +extern int intel_cpu_init(void); +extern int cyrix_init_cpu(void); +extern int nsc_init_cpu(void); +extern int amd_init_cpu(void); +extern int centaur_init_cpu(void); +extern int transmeta_init_cpu(void); +extern int nexgen_init_cpu(void); +extern int umc_init_cpu(void); + +void __init early_cpu_init(void) +{ + intel_cpu_init(); + cyrix_init_cpu(); + nsc_init_cpu(); + amd_init_cpu(); + centaur_init_cpu(); + transmeta_init_cpu(); + nexgen_init_cpu(); + umc_init_cpu(); + early_cpu_detect(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif +} + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->xfs = __KERNEL_PERCPU; + return regs; +} + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct Xgt_desc_struct gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_esp0(t, thread); + set_tss_desc(cpu,t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h new file mode 100644 index 00000000000..2f6432cef6f --- /dev/null +++ b/arch/x86/kernel/cpu/cpu.h @@ -0,0 +1,28 @@ + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* attempt to consolidate cpu attributes */ +struct cpu_dev { + char * c_vendor; + + /* some have two possibilities for cpuid string */ + char * c_ident[2]; + + struct cpu_model_info c_models[4]; + + void (*c_init)(struct cpuinfo_x86 * c); + void (*c_identify)(struct cpuinfo_x86 * c); + unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); +}; + +extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; + +extern int get_model_name(struct cpuinfo_x86 *c); +extern void display_cacheinfo(struct cpuinfo_x86 *c); + +extern void early_intel_workaround(struct cpuinfo_x86 *c); + diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig new file mode 100644 index 00000000000..d8c6f132dc7 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -0,0 +1,250 @@ +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver also supports Intel Enhanced Speedstep. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config ELAN_CPUFREQ + tristate "AMD Elan SC400 and SC410" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC400 and SC410 + processors. + + You need to specify the processor maximum speed as boot + parameter: elanfreq=maxspeed (in kHz) or as module + parameter "max_freq". + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + + +config X86_POWERNOW_K6 + tristate "AMD Mobile K6-2/K6-3 PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K6-2+ and mobile + AMD K6-3+ processors. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_POWERNOW_K7 + tristate "AMD Mobile Athlon/Duron PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K7 mobile processors. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_POWERNOW_K7_ACPI + bool + depends on X86_POWERNOW_K7 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + default y + +config X86_POWERNOW_K8 + tristate "AMD Opteron/Athlon64 PowerNow!" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_POWERNOW_K8_ACPI + bool "ACPI Support" + select ACPI_PROCESSOR + depends on ACPI && X86_POWERNOW_K8 + default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. + +config X86_GX_SUSPMOD + tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI + help + This add the CPUFreq driver for NatSemi Geode processors which + support suspend modulation. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO + tristate "Intel Enhanced SpeedStep" + select CPU_FREQ_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE + help + This adds the CPUFreq driver for Enhanced SpeedStep enabled + mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, + you also need to say Y to "Use ACPI tables to decode..." below + [which might imply enabling ACPI] if you want to use this driver + on non-Banias CPUs. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO_TABLE + bool "Built-in tables for Banias CPUs" + depends on X86_SPEEDSTEP_CENTRINO + default y + help + Use built-in tables for Banias CPUs if ACPI encoding + is not available. + + If in doubt, say N. + +config X86_SPEEDSTEP_ICH + tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + ICH3 or ICH4 southbridge. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_SPEEDSTEP_SMI + tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) + on systems which have an Intel 440BX/ZX/MX southbridge. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_P4_CLOCKMOD + tristate "Intel Pentium 4 clock modulation" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for Intel Pentium 4 / XEON + processors. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_CPUFREQ_NFORCE2 + tristate "nVidia nForce2 FSB changing" + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for FSB changing on nVidia nForce2 + platforms. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_LONGRUN + tristate "Transmeta LongRun" + help + This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors + which support LongRun. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_LONGHAUL + tristate "VIA Cyrix III Longhaul" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + processors. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + +config X86_E_POWERSAVER + tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for VIA C7 processors. + + If in doubt, say N. + +comment "shared options" + +config X86_ACPI_CPUFREQ_PROC_INTF + bool "/proc/acpi/processor/../performance interface (deprecated)" + depends on PROC_FS + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + help + This enables the deprecated /proc/acpi/processor/../performance + interface. While it is helpful for debugging, the generic, + cross-architecture cpufreq interfaces should be used. + + If in doubt, say N. + +config X86_SPEEDSTEP_LIB + tristate + default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD + +config X86_SPEEDSTEP_RELAXED_CAP_CHECK + bool "Relaxed speedstep capability checks" + depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + help + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This + option lets the probing code bypass some of those checks if the + parameter "relaxed_check=1" is passed to the module. + +endif # CPU_FREQ + +endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile new file mode 100644 index 00000000000..560f7760dae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Makefile @@ -0,0 +1,16 @@ +obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_LONGHAUL) += longhaul.o +obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o +obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o +obj-$(CONFIG_X86_LONGRUN) += longrun.o +obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o +obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o +obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o +obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c new file mode 100644 index 00000000000..ffd01e5dcb5 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -0,0 +1,798 @@ +/* + * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> + * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/cpufreq.h> +#include <linux/compiler.h> +#include <linux/dmi.h> + +#include <linux/acpi.h> +#include <acpi/processor.h> + +#include <asm/io.h> +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/delay.h> +#include <asm/uaccess.h> + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + +enum { + UNDEFINED_CAPABLE = 0, + SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_IO_CAPABLE, +}; + +#define INTEL_MSR_RANGE (0xffff) +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) + +struct acpi_cpufreq_data { + struct acpi_processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; + unsigned int resume; + unsigned int cpu_feature; +}; + +static struct acpi_cpufreq_data *drv_data[NR_CPUS]; +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance *acpi_perf_data; + +static struct cpufreq_driver acpi_cpufreq_driver; + +static unsigned int acpi_pstate_strict; + +static int check_est_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + + if (cpu->x86_vendor != X86_VENDOR_INTEL || + !cpu_has(cpu, X86_FEATURE_EST)) + return 0; + + return 1; +} + +static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) +{ + struct acpi_processor_performance *perf; + int i; + + perf = data->acpi_data; + + for (i=0; i<perf->state_count; i++) { + if (value == perf->states[i].status) + return data->freq_table[i].frequency; + } + return 0; +} + +static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) +{ + int i; + struct acpi_processor_performance *perf; + + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; + + for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == perf->states[data->freq_table[i].index].status) + return data->freq_table[i].frequency; + } + return data->freq_table[0].frequency; +} + +static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) +{ + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + return extract_msr(val, data); + case SYSTEM_IO_CAPABLE: + return extract_io(val, data); + default: + return 0; + } +} + +struct msr_addr { + u32 reg; +}; + +struct io_addr { + u16 port; + u8 bit_width; +}; + +typedef union { + struct msr_addr msr; + struct io_addr io; +} drv_addr_union; + +struct drv_cmd { + unsigned int type; + cpumask_t mask; + drv_addr_union addr; + u32 val; +}; + +static void do_drv_read(struct drv_cmd *cmd) +{ + u32 h; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, cmd->val, h); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_read_port((acpi_io_address)cmd->addr.io.port, + &cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void do_drv_write(struct drv_cmd *cmd) +{ + u32 lo, hi; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); + wrmsr(cmd->addr.msr.reg, lo, hi); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_write_port((acpi_io_address)cmd->addr.io.port, + cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void drv_read(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + cmd->val = 0; + + set_cpus_allowed(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed(current, saved_mask); +} + +static void drv_write(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + unsigned int i; + + for_each_cpu_mask(i, cmd->mask) { + set_cpus_allowed(current, cpumask_of_cpu(i)); + do_drv_write(cmd); + } + + set_cpus_allowed(current, saved_mask); + return; +} + +static u32 get_cur_val(cpumask_t mask) +{ + struct acpi_processor_performance *perf; + struct drv_cmd cmd; + + if (unlikely(cpus_empty(mask))) + return 0; + + switch (drv_data[first_cpu(mask)]->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + perf = drv_data[first_cpu(mask)]->acpi_data; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + break; + default: + return 0; + } + + cmd.mask = mask; + + drv_read(&cmd); + + dprintk("get_cur_val = %u\n", cmd.val); + + return cmd.val; +} + +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +static unsigned int get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ + /* + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. + */ + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + else + perf_percent = 0; + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (aperf_cur.whole && mperf_cur.whole) + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + else + perf_percent = 0; + +#endif + + retval = drv_data[cpu]->max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + +static unsigned int get_cur_freq_on_cpu(unsigned int cpu) +{ + struct acpi_cpufreq_data *data = drv_data[cpu]; + unsigned int freq; + + dprintk("get_cur_freq_on_cpu (%d)\n", cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return 0; + } + + freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + dprintk("cur freq = %u\n", freq); + + return freq; +} + +static unsigned int check_freqs(cpumask_t mask, unsigned int freq, + struct acpi_cpufreq_data *data) +{ + unsigned int cur_freq; + unsigned int i; + + for (i=0; i<100; i++) { + cur_freq = extract_freq(get_cur_val(mask), data); + if (cur_freq == freq) + return 1; + udelay(10); + } + return 0; +} + +static int acpi_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_processor_performance *perf; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + struct drv_cmd cmd; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ + unsigned int i; + int result = 0; + + dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return -ENODEV; + } + + perf = data->acpi_data; + result = cpufreq_frequency_table_target(policy, + data->freq_table, + target_freq, + relation, &next_state); + if (unlikely(result)) + return -ENODEV; + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + next_perf_state = data->freq_table[next_state].index; + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", + next_perf_state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", + next_perf_state); + return 0; + } + } + + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + default: + return -ENODEV; + } + + cpus_clear(cmd.mask); + + if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) + cmd.mask = online_policy_cpus; + else + cpu_set(policy->cpu, cmd.mask); + + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + drv_write(&cmd); + + if (acpi_pstate_strict) { + if (!check_freqs(cmd.mask, freqs.new, data)) { + dprintk("acpi_cpufreq_target failed (%d)\n", + policy->cpu); + return -EAGAIN; + } + } + + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + perf->state = next_perf_state; + + return result; +} + +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_verify\n"); + + return cpufreq_frequency_table_verify(policy, data->freq_table); +} + +static unsigned long +acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) +{ + struct acpi_processor_performance *perf = data->acpi_data; + + if (cpu_khz) { + /* search the closest match to cpu_khz */ + unsigned int i; + unsigned long freq; + unsigned long freqn = perf->states[0].core_frequency * 1000; + + for (i=0; i<(perf->state_count-1); i++) { + freq = freqn; + freqn = perf->states[i+1].core_frequency * 1000; + if ((2 * cpu_khz) > (freqn + freq)) { + perf->state = i; + return freq; + } + } + perf->state = perf->state_count-1; + return freqn; + } else { + /* assume CPU is at P0... */ + perf->state = 0; + return perf->states[0].core_frequency * 1000; + } +} + +/* + * acpi_cpufreq_early_init - initialize ACPI P-States library + * + * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) + * in order to determine correct frequency and voltage pairings. We can + * do _PDC and _PSD and find out the processor dependency for the + * actual init that will happen later... + */ +static int __init acpi_cpufreq_early_init(void) +{ + dprintk("acpi_cpufreq_early_init\n"); + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + dprintk("Memory allocation error for acpi_perf_data.\n"); + return -ENOMEM; + } + + /* Do initialization in ACPI core */ + acpi_processor_preregister_performance(acpi_perf_data); + return 0; +} + +#ifdef CONFIG_SMP +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; + +static int sw_any_bug_found(const struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + +static const struct dmi_system_id sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; +#endif + +static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int valid_states = 0; + unsigned int cpu = policy->cpu; + struct acpi_cpufreq_data *data; + unsigned int result = 0; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct acpi_processor_performance *perf; + + dprintk("acpi_cpufreq_cpu_init\n"); + + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); + drv_data[cpu] = data; + + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) + acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + + result = acpi_processor_register_performance(data->acpi_data, cpu); + if (result) + goto err_free; + + perf = data->acpi_data; + policy->shared_type = perf->shared_type; + + /* + * Will let policy->cpus know about dependency only when software + * coordination is required. + */ + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + policy->cpus = perf->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif + + /* capability check */ + if (perf->state_count <= 1) { + dprintk("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if (perf->control_register.space_id != perf->status_register.space_id) { + result = -ENODEV; + goto err_unreg; + } + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + dprintk("SYSTEM IO addr space\n"); + data->cpu_feature = SYSTEM_IO_CAPABLE; + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + dprintk("HARDWARE addr space\n"); + if (!check_est_cpu(cpu)) { + result = -ENODEV; + goto err_unreg; + } + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; + default: + dprintk("Unknown addr space %d\n", + (u32) (perf->control_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (perf->state_count+1), GFP_KERNEL); + if (!data->freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; i<perf->state_count; i++) { + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + } + + data->max_freq = perf->states[0].core_frequency * 1000; + /* table init */ + for (i=0; i<perf->state_count; i++) { + if (i>0 && perf->states[i].core_frequency >= + data->freq_table[valid_states-1].frequency / 1000) + continue; + + data->freq_table[valid_states].index = i; + data->freq_table[valid_states].frequency = + perf->states[i].core_frequency * 1000; + valid_states++; + } + data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; + + result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if (result) + goto err_freqfree; + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + /* Current speed is unknown and not detectable by IO port */ + policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + acpi_cpufreq_driver.get = get_cur_freq_on_cpu; + policy->cur = get_cur_freq_on_cpu(cpu); + break; + default: + break; + } + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) + acpi_cpufreq_driver.getavg = get_measured_perf; + } + + dprintk("CPU%u - ACPI performance management activated.\n", cpu); + for (i = 0; i < perf->state_count; i++) + dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency); + + cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + data->resume = 1; + + return result; + +err_freqfree: + kfree(data->freq_table); +err_unreg: + acpi_processor_unregister_performance(perf, cpu); +err_free: + kfree(data); + drv_data[cpu] = NULL; + + return result; +} + +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_cpu_exit\n"); + + if (data) { + cpufreq_frequency_table_put_attr(policy->cpu); + drv_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(data->acpi_data, + policy->cpu); + kfree(data); + } + + return 0; +} + +static int acpi_cpufreq_resume(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_resume\n"); + + data->resume = 1; + + return 0; +} + +static struct freq_attr *acpi_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, +}; + +static int __init acpi_cpufreq_init(void) +{ + int ret; + + dprintk("acpi_cpufreq_init\n"); + + ret = acpi_cpufreq_early_init(); + if (ret) + return ret; + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + +static void __exit acpi_cpufreq_exit(void) +{ + dprintk("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); + + free_percpu(acpi_perf_data); + + return; +} + +module_param(acpi_pstate_strict, uint, 0644); +MODULE_PARM_DESC(acpi_pstate_strict, + "value 0 or non-zero. non-zero -> strict ACPI checks are " + "performed during frequency changes."); + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); + +MODULE_ALIAS("acpi"); diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c new file mode 100644 index 00000000000..32f0bda3fc9 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -0,0 +1,440 @@ +/* + * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net> + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/pci.h> +#include <linux/delay.h> + +#define NFORCE2_XTAL 25 +#define NFORCE2_BOOTFSB 0x48 +#define NFORCE2_PLLENABLE 0xa8 +#define NFORCE2_PLLREG 0xa4 +#define NFORCE2_PLLADR 0xa0 +#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) + +#define NFORCE2_MIN_FSB 50 +#define NFORCE2_SAFE_DISTANCE 50 + +/* Delay in ms between FSB changes */ +//#define NFORCE2_DELAY 10 + +/* nforce2_chipset: + * FSB is changed using the chipset + */ +static struct pci_dev *nforce2_chipset_dev; + +/* fid: + * multiplier * 10 + */ +static int fid = 0; + +/* min_fsb, max_fsb: + * minimum and maximum FSB (= FSB at boot time) + */ +static int min_fsb = 0; +static int max_fsb = 0; + +MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); +MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); +MODULE_LICENSE("GPL"); + +module_param(fid, int, 0444); +module_param(min_fsb, int, 0444); + +MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); +MODULE_PARM_DESC(min_fsb, + "Minimum FSB to use, if not defined: current FSB - 50"); + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) + +/** + * nforce2_calc_fsb - calculate FSB + * @pll: PLL value + * + * Calculates FSB from PLL value + */ +static int nforce2_calc_fsb(int pll) +{ + unsigned char mul, div; + + mul = (pll >> 8) & 0xff; + div = pll & 0xff; + + if (div > 0) + return NFORCE2_XTAL * mul / div; + + return 0; +} + +/** + * nforce2_calc_pll - calculate PLL value + * @fsb: FSB + * + * Calculate PLL value for given FSB + */ +static int nforce2_calc_pll(unsigned int fsb) +{ + unsigned char xmul, xdiv; + unsigned char mul = 0, div = 0; + int tried = 0; + + /* Try to calculate multiplier and divider up to 4 times */ + while (((mul == 0) || (div == 0)) && (tried <= 3)) { + for (xdiv = 2; xdiv <= 0x80; xdiv++) + for (xmul = 1; xmul <= 0xfe; xmul++) + if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == + fsb + tried) { + mul = xmul; + div = xdiv; + } + tried++; + } + + if ((mul == 0) || (div == 0)) + return -1; + + return NFORCE2_PLL(mul, div); +} + +/** + * nforce2_write_pll - write PLL value to chipset + * @pll: PLL value + * + * Writes new FSB PLL value to chipset + */ +static void nforce2_write_pll(int pll) +{ + int temp; + + /* Set the pll addr. to 0x00 */ + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); + + /* Now write the value in all 64 registers */ + for (temp = 0; temp <= 0x3f; temp++) + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); + + return; +} + +/** + * nforce2_fsb_read - Read FSB + * + * Read FSB from chipset + * If bootfsb != 0, return FSB at boot-time + */ +static unsigned int nforce2_fsb_read(int bootfsb) +{ + struct pci_dev *nforce2_sub5; + u32 fsb, temp = 0; + + /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ + nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); + if (!nforce2_sub5) + return 0; + + pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); + fsb /= 1000000; + + /* Check if PLL register is already set */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + + if(bootfsb || !temp) + return fsb; + + /* Use PLL register FSB value */ + pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); + fsb = nforce2_calc_fsb(temp); + + return fsb; +} + +/** + * nforce2_set_fsb - set new FSB + * @fsb: New FSB + * + * Sets new FSB + */ +static int nforce2_set_fsb(unsigned int fsb) +{ + u32 temp = 0; + unsigned int tfsb; + int diff; + int pll = 0; + + if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { + printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); + return -EINVAL; + } + + tfsb = nforce2_fsb_read(0); + if (!tfsb) { + printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); + return -EINVAL; + } + + /* First write? Then set actual value */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + if (!temp) { + pll = nforce2_calc_pll(tfsb); + + if (pll < 0) + return -EINVAL; + + nforce2_write_pll(pll); + } + + /* Enable write access */ + temp = 0x01; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); + + diff = tfsb - fsb; + + if (!diff) + return 0; + + while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { + if (diff < 0) + tfsb++; + else + tfsb--; + + /* Calculate the PLL reg. value */ + if ((pll = nforce2_calc_pll(tfsb)) == -1) + return -EINVAL; + + nforce2_write_pll(pll); +#ifdef NFORCE2_DELAY + mdelay(NFORCE2_DELAY); +#endif + } + + temp = 0x40; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); + + return 0; +} + +/** + * nforce2_get - get the CPU frequency + * @cpu: CPU number + * + * Returns the CPU frequency + */ +static unsigned int nforce2_get(unsigned int cpu) +{ + if (cpu) + return 0; + return nforce2_fsb_read(0) * fid * 100; +} + +/** + * nforce2_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int nforce2_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ +// unsigned long flags; + struct cpufreq_freqs freqs; + unsigned int target_fsb; + + if ((target_freq > policy->max) || (target_freq < policy->min)) + return -EINVAL; + + target_fsb = target_freq / (fid * 100); + + freqs.old = nforce2_get(policy->cpu); + freqs.new = target_fsb * fid * 100; + freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ + + if (freqs.old == freqs.new) + return 0; + + dprintk("Old CPU frequency %d kHz, new %d kHz\n", + freqs.old, freqs.new); + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Disable IRQs */ + //local_irq_save(flags); + + if (nforce2_set_fsb(target_fsb) < 0) + printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", + target_fsb); + else + dprintk("Changed FSB successfully to %d\n", + target_fsb); + + /* Enable IRQs */ + //local_irq_restore(flags); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + +/** + * nforce2_verify - verifies a new CPUFreq policy + * @policy: new policy + */ +static int nforce2_verify(struct cpufreq_policy *policy) +{ + unsigned int fsb_pol_max; + + fsb_pol_max = policy->max / (fid * 100); + + if (policy->min < (fsb_pol_max * fid * 100)) + policy->max = (fsb_pol_max + 1) * fid * 100; + + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + return 0; +} + +static int nforce2_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int fsb; + unsigned int rfid; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* Get current FSB */ + fsb = nforce2_fsb_read(0); + + if (!fsb) + return -EIO; + + /* FIX: Get FID from CPU */ + if (!fid) { + if (!cpu_khz) { + printk(KERN_WARNING + "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); + return -ENODEV; + } + + fid = cpu_khz / (fsb * 100); + rfid = fid % 5; + + if (rfid) { + if (rfid > 2) + fid += 5 - rfid; + else + fid -= rfid; + } + } + + printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, + fid / 10, fid % 10); + + /* Set maximum FSB to FSB at boot time */ + max_fsb = nforce2_fsb_read(1); + + if(!max_fsb) + return -EIO; + + if (!min_fsb) + min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; + + if (min_fsb < NFORCE2_MIN_FSB) + min_fsb = NFORCE2_MIN_FSB; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = min_fsb * fid * 100; + policy->cpuinfo.max_freq = max_fsb * fid * 100; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = nforce2_get(policy->cpu); + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + + return 0; +} + +static int nforce2_cpu_exit(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver nforce2_driver = { + .name = "nforce2", + .verify = nforce2_verify, + .target = nforce2_target, + .get = nforce2_get, + .init = nforce2_cpu_init, + .exit = nforce2_cpu_exit, + .owner = THIS_MODULE, +}; + +/** + * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic + * + * Detects nForce2 A2 and C1 stepping + * + */ +static unsigned int nforce2_detect_chipset(void) +{ + nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + PCI_DEVICE_ID_NVIDIA_NFORCE2, + PCI_ANY_ID, PCI_ANY_ID, NULL); + + if (nforce2_chipset_dev == NULL) + return -ENODEV; + + printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", + nforce2_chipset_dev->revision); + printk(KERN_INFO + "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); + + return 0; +} + +/** + * nforce2_init - initializes the nForce2 CPUFreq driver + * + * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init nforce2_init(void) +{ + /* TODO: do we need to detect the processor? */ + + /* detect chipset */ + if (nforce2_detect_chipset()) { + printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); + return -ENODEV; + } + + return cpufreq_register_driver(&nforce2_driver); +} + +/** + * nforce2_exit - unregisters cpufreq module + * + * Unregisters nForce2 FSB change support. + */ +static void __exit nforce2_exit(void) +{ + cpufreq_unregister_driver(&nforce2_driver); +} + +module_init(nforce2_init); +module_exit(nforce2_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c new file mode 100644 index 00000000000..c11baaf9f2b --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -0,0 +1,333 @@ +/* + * Based on documentation provided by Dave Jones. Thanks! + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/ioport.h> +#include <linux/slab.h> + +#include <asm/msr.h> +#include <asm/tsc.h> +#include <asm/timex.h> +#include <asm/io.h> +#include <asm/delay.h> + +#define EPS_BRAND_C7M 0 +#define EPS_BRAND_C7 1 +#define EPS_BRAND_EDEN 2 +#define EPS_BRAND_C3 3 + +struct eps_cpu_data { + u32 fsb; + struct cpufreq_frequency_table freq_table[]; +}; + +static struct eps_cpu_data *eps_cpu[NR_CPUS]; + + +static unsigned int eps_get(unsigned int cpu) +{ + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (cpu) + return 0; + centaur = eps_cpu[cpu]; + if (centaur == NULL) + return 0; + + /* Return current frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + return centaur->fsb * ((lo >> 8) & 0xff); +} + +static int eps_set_state(struct eps_cpu_data *centaur, + unsigned int cpu, + u32 dest_state) +{ + struct cpufreq_freqs freqs; + u32 lo, hi; + int err = 0; + int i; + + freqs.old = eps_get(cpu); + freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Wait while CPU is busy */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i = 0; + while (lo & ((1 << 16) | (1 << 17))) { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } + /* Set new multiplier and voltage */ + wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); + /* Wait until transition end */ + i = 0; + do { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } while (lo & ((1 << 16) | (1 << 17))); + + /* Return current frequency */ +postchange: + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + freqs.new = centaur->fsb * ((lo >> 8) & 0xff); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + return err; +} + +static int eps_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct eps_cpu_data *centaur; + unsigned int newstate = 0; + unsigned int cpu = policy->cpu; + unsigned int dest_state; + int ret; + + if (unlikely(eps_cpu[cpu] == NULL)) + return -ENODEV; + centaur = eps_cpu[cpu]; + + if (unlikely(cpufreq_frequency_table_target(policy, + &eps_cpu[cpu]->freq_table[0], + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + + /* Make frequency transition */ + dest_state = centaur->freq_table[newstate].index & 0xffff; + ret = eps_set_state(centaur, cpu, dest_state); + if (ret) + printk(KERN_ERR "eps: Timeout!\n"); + return ret; +} + +static int eps_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, + &eps_cpu[policy->cpu]->freq_table[0]); +} + +static int eps_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + u32 lo, hi; + u64 val; + u8 current_multiplier, current_voltage; + u8 max_multiplier, max_voltage; + u8 min_multiplier, min_voltage; + u8 brand; + u32 fsb; + struct eps_cpu_data *centaur; + struct cpufreq_frequency_table *f_table; + int k, step, voltage; + int ret; + int states; + + if (policy->cpu != 0) + return -ENODEV; + + /* Check brand */ + printk("eps: Detected VIA "); + rdmsr(0x1153, lo, hi); + brand = (((lo >> 2) ^ lo) >> 18) & 3; + switch(brand) { + case EPS_BRAND_C7M: + printk("C7-M\n"); + break; + case EPS_BRAND_C7: + printk("C7\n"); + break; + case EPS_BRAND_EDEN: + printk("Eden\n"); + break; + case EPS_BRAND_C3: + printk("C3\n"); + return -ENODEV; + break; + } + /* Enable Enhanced PowerSaver */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + val |= 1 << 16; + wrmsrl(MSR_IA32_MISC_ENABLE, val); + /* Can be locked at 0 */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + printk("eps: Can't enable Enhanced PowerSaver\n"); + return -ENODEV; + } + } + + /* Print voltage and multiplier */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + current_voltage = lo & 0xff; + printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); + current_multiplier = (lo >> 8) & 0xff; + printk("eps: Current multiplier = %d\n", current_multiplier); + + /* Print limits */ + max_voltage = hi & 0xff; + printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); + max_multiplier = (hi >> 8) & 0xff; + printk("eps: Highest multiplier = %d\n", max_multiplier); + min_voltage = (hi >> 16) & 0xff; + printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); + min_multiplier = (hi >> 24) & 0xff; + printk("eps: Lowest multiplier = %d\n", min_multiplier); + + /* Sanity checks */ + if (current_multiplier == 0 || max_multiplier == 0 + || min_multiplier == 0) + return -EINVAL; + if (current_multiplier > max_multiplier + || max_multiplier <= min_multiplier) + return -EINVAL; + if (current_voltage > 0x1c || max_voltage > 0x1c) + return -EINVAL; + if (max_voltage < min_voltage) + return -EINVAL; + + /* Calc FSB speed */ + fsb = cpu_khz / current_multiplier; + /* Calc number of p-states supported */ + if (brand == EPS_BRAND_C7M) + states = max_multiplier - min_multiplier + 1; + else + states = 2; + + /* Allocate private data and frequency table for current cpu */ + centaur = kzalloc(sizeof(struct eps_cpu_data) + + (states + 1) * sizeof(struct cpufreq_frequency_table), + GFP_KERNEL); + if (!centaur) + return -ENOMEM; + eps_cpu[0] = centaur; + + /* Copy basic values */ + centaur->fsb = fsb; + + /* Fill frequency and MSR value table */ + f_table = ¢aur->freq_table[0]; + if (brand != EPS_BRAND_C7M) { + f_table[0].frequency = fsb * min_multiplier; + f_table[0].index = (min_multiplier << 8) | min_voltage; + f_table[1].frequency = fsb * max_multiplier; + f_table[1].index = (max_multiplier << 8) | max_voltage; + f_table[2].frequency = CPUFREQ_TABLE_END; + } else { + k = 0; + step = ((max_voltage - min_voltage) * 256) + / (max_multiplier - min_multiplier); + for (i = min_multiplier; i <= max_multiplier; i++) { + voltage = (k * step) / 256 + min_voltage; + f_table[k].frequency = fsb * i; + f_table[k].index = (i << 8) | voltage; + k++; + } + f_table[k].frequency = CPUFREQ_TABLE_END; + } + + policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ + policy->cur = fsb * current_multiplier; + + ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); + if (ret) { + kfree(centaur); + return ret; + } + + cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); + return 0; +} + +static int eps_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (eps_cpu[cpu] == NULL) + return -ENODEV; + centaur = eps_cpu[cpu]; + + /* Get max frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + /* Set max frequency */ + eps_set_state(centaur, cpu, hi & 0xffff); + /* Bye */ + cpufreq_frequency_table_put_attr(policy->cpu); + kfree(eps_cpu[cpu]); + eps_cpu[cpu] = NULL; + return 0; +} + +static struct freq_attr* eps_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver eps_driver = { + .verify = eps_verify, + .target = eps_target, + .init = eps_cpu_init, + .exit = eps_cpu_exit, + .get = eps_get, + .name = "e_powersaver", + .owner = THIS_MODULE, + .attr = eps_attr, +}; + +static int __init eps_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* This driver will work only on Centaur C7 processors with + * Enhanced SpeedStep/PowerSaver registers */ + if (c->x86_vendor != X86_VENDOR_CENTAUR + || c->x86 != 6 || c->x86_model != 10) + return -ENODEV; + if (!cpu_has(c, X86_FEATURE_EST)) + return -ENODEV; + + if (cpufreq_register_driver(&eps_driver)) + return -EINVAL; + return 0; +} + +static void __exit eps_exit(void) +{ + cpufreq_unregister_driver(&eps_driver); +} + +MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>"); +MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); +MODULE_LICENSE("GPL"); + +module_init(eps_init); +module_exit(eps_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c new file mode 100644 index 00000000000..1e7ae7dafcf --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -0,0 +1,308 @@ +/* + * elanfreq: cpufreq driver for the AMD ELAN family + * + * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> + * + * Parts of this code are (c) Sven Geggus <sven@geggus.net> + * + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/cpufreq.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> + +#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ +#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ + +/* Module parameter */ +static int max_freq; + +struct s_elan_multiplier { + int clock; /* frequency in kHz */ + int val40h; /* PMU Force Mode register */ + int val80h; /* CPU Clock Speed Register */ +}; + +/* + * It is important that the frequencies + * are listed in ascending order here! + */ +struct s_elan_multiplier elan_multiplier[] = { + {1000, 0x02, 0x18}, + {2000, 0x02, 0x10}, + {4000, 0x02, 0x08}, + {8000, 0x00, 0x00}, + {16000, 0x00, 0x02}, + {33000, 0x00, 0x04}, + {66000, 0x01, 0x04}, + {99000, 0x01, 0x05} +}; + +static struct cpufreq_frequency_table elanfreq_table[] = { + {0, 1000}, + {1, 2000}, + {2, 4000}, + {3, 8000}, + {4, 16000}, + {5, 33000}, + {6, 66000}, + {7, 99000}, + {0, CPUFREQ_TABLE_END}, +}; + + +/** + * elanfreq_get_cpu_frequency: determine current cpu speed + * + * Finds out at which frequency the CPU of the Elan SOC runs + * at the moment. Frequencies from 1 to 33 MHz are generated + * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" + * and have the rest of the chip running with 33 MHz. + */ + +static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg; /* Clock Speed Register */ + + local_irq_disable(); + outb_p(0x80,REG_CSCIR); + clockspeed_reg = inb_p(REG_CSCDR); + local_irq_enable(); + + if ((clockspeed_reg & 0xE0) == 0xE0) + return 0; + + /* Are we in CPU clock multiplied mode (66/99 MHz)? */ + if ((clockspeed_reg & 0xE0) == 0xC0) { + if ((clockspeed_reg & 0x01) == 0) + return 66000; + else + return 99000; + } + + /* 33 MHz is not 32 MHz... */ + if ((clockspeed_reg & 0xE0)==0xA0) + return 33000; + + return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); +} + + +/** + * elanfreq_set_cpu_frequency: Change the CPU core frequency + * @cpu: cpu number + * @freq: frequency in kHz + * + * This function takes a frequency value and changes the CPU frequency + * according to this. Note that the frequency has to be checked by + * elanfreq_validatespeed() for correctness! + * + * There is no return value. + */ + +static void elanfreq_set_cpu_state (unsigned int state) +{ + struct cpufreq_freqs freqs; + + freqs.old = elanfreq_get_cpu_frequency(0); + freqs.new = elan_multiplier[state].clock; + freqs.cpu = 0; /* elanfreq.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", + elan_multiplier[state].clock); + + + /* + * Access to the Elan's internal registers is indexed via + * 0x22: Chip Setup & Control Register Index Register (CSCI) + * 0x23: Chip Setup & Control Register Data Register (CSCD) + * + */ + + /* + * 0x40 is the Power Management Unit's Force Mode Register. + * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) + */ + + local_irq_disable(); + outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00,REG_CSCDR); + local_irq_enable(); /* wait till internal pipelines and */ + udelay(1000); /* buffers have cleaned up */ + + local_irq_disable(); + + /* now, set the CPU clock speed register (0x80) */ + outb_p(0x80,REG_CSCIR); + outb_p(elan_multiplier[state].val80h,REG_CSCDR); + + /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ + outb_p(0x40,REG_CSCIR); + outb_p(elan_multiplier[state].val40h,REG_CSCDR); + udelay(10000); + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + + +/** + * elanfreq_validatespeed: test if frequency range is valid + * @policy: the policy to validate + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int elanfreq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); +} + +static int elanfreq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + elanfreq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int elanfreq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int i; + int result; + + /* capability check */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) + return -ENODEV; + + /* max freq */ + if (!max_freq) + max_freq = elanfreq_get_cpu_frequency(0); + + /* table init */ + for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if (elanfreq_table[i].frequency > max_freq) + elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = elanfreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); + return 0; +} + + +static int elanfreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +#ifndef MODULE +/** + * elanfreq_setup - elanfreq command line parameter parsing + * + * elanfreq command line parameter. Use: + * elanfreq=66000 + * to set the maximum CPU frequency to 66 MHz. Note that in + * case you do not give this boot parameter, the maximum + * frequency will fall back to _current_ CPU frequency which + * might be lower. If you build this as a module, use the + * max_freq module parameter instead. + */ +static int __init elanfreq_setup(char *str) +{ + max_freq = simple_strtoul(str, &str, 0); + printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); + return 1; +} +__setup("elanfreq=", elanfreq_setup); +#endif + + +static struct freq_attr* elanfreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver elanfreq_driver = { + .get = elanfreq_get_cpu_frequency, + .verify = elanfreq_verify, + .target = elanfreq_target, + .init = elanfreq_cpu_init, + .exit = elanfreq_cpu_exit, + .name = "elanfreq", + .owner = THIS_MODULE, + .attr = elanfreq_attr, +}; + + +static int __init elanfreq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) { + printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); + return -ENODEV; + } + return cpufreq_register_driver(&elanfreq_driver); +} + + +static void __exit elanfreq_exit(void) +{ + cpufreq_unregister_driver(&elanfreq_driver); +} + + +module_param (max_freq, int, 0444); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); + +module_init(elanfreq_init); +module_exit(elanfreq_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c new file mode 100644 index 00000000000..ed2bda127c4 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -0,0 +1,494 @@ +/* + * Cyrix MediaGX and NatSemi Geode Suspend Modulation + * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> + * (C) 2002 Hiroshi Miura <miura@da-cha.org> + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Theoritical note: + * + * (see Geode(tm) CS5530 manual (rev.4.1) page.56) + * + * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 + * are based on Suspend Moduration. + * + * Suspend Modulation works by asserting and de-asserting the SUSP# pin + * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# + * the CPU enters an idle state. GX1 stops its core clock when SUSP# is + * asserted then power consumption is reduced. + * + * Suspend Modulation's OFF/ON duration are configurable + * with 'Suspend Modulation OFF Count Register' + * and 'Suspend Modulation ON Count Register'. + * These registers are 8bit counters that represent the number of + * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) + * to the processor. + * + * These counters define a ratio which is the effective frequency + * of operation of the system. + * + * OFF Count + * F_eff = Fgx * ---------------------- + * OFF Count + ON Count + * + * 0 <= On Count, Off Count <= 255 + * + * From these limits, we can get register values + * + * off_duration + on_duration <= MAX_DURATION + * on_duration = off_duration * (stock_freq - freq) / freq + * + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration + * + * + *--------------------------------------------------------------------------- + * + * ChangeLog: + * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. + * + * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> + * - rewrite for Cyrix MediaGX Cx5510/5520 and + * NatSemi Geode Cs5530(A). + * + * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> + * - cs5530_mod patch for 2.4.19-rc1. + * + *--------------------------------------------------------------------------- + * + * Todo + * Test on machines with 5510, 5530, 5530A + */ + +/************************************************************************ + * Suspend Modulation - Definitions * + ************************************************************************/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/cpufreq.h> +#include <linux/pci.h> +#include <asm/processor-cyrix.h> +#include <asm/errno.h> + +/* PCI config registers, all at F0 */ +#define PCI_PMER1 0x80 /* power management enable register 1 */ +#define PCI_PMER2 0x81 /* power management enable register 2 */ +#define PCI_PMER3 0x82 /* power management enable register 3 */ +#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ +#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ +#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ +#define PCI_MODON 0x95 /* suspend modulation ON counter register */ +#define PCI_SUSCFG 0x96 /* suspend configuration register */ + +/* PMER1 bits */ +#define GPM (1<<0) /* global power management */ +#define GIT (1<<1) /* globally enable PM device idle timers */ +#define GTR (1<<2) /* globally enable IO traps */ +#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ +#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ + +/* SUSCFG bits */ +#define SUSMOD (1<<0) /* enable/disable suspend modulation */ +/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ + /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ +#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ +/* the belows support only with cs5530A */ +#define PWRSVE_ISA (1<<3) /* stop ISA clock */ +#define PWRSVE (1<<4) /* active idle */ + +struct gxfreq_params { + u8 on_duration; + u8 off_duration; + u8 pci_suscfg; + u8 pci_pmer1; + u8 pci_pmer2; + struct pci_dev *cs55x0; +}; + +static struct gxfreq_params *gx_params; +static int stock_freq; + +/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ +static int pci_busclk = 0; +module_param (pci_busclk, int, 0444); + +/* maximum duration for which the cpu may be suspended + * (32us * MAX_DURATION). If no parameter is given, this defaults + * to 255. + * Note that this leads to a maximum of 8 ms(!) where the CPU clock + * is suspended -- processing power is just 0.39% of what it used to be, + * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ +static int max_duration = 255; +module_param (max_duration, int, 0444); + +/* For the default policy, we want at least some processing power + * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) + */ +#define POLICY_MIN_DIV 20 + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) + +/** + * we can detect a core multipiler from dir0_lsb + * from GX1 datasheet p.56, + * MULT[3:0]: + * 0000 = SYSCLK multiplied by 4 (test only) + * 0001 = SYSCLK multiplied by 10 + * 0010 = SYSCLK multiplied by 4 + * 0011 = SYSCLK multiplied by 6 + * 0100 = SYSCLK multiplied by 9 + * 0101 = SYSCLK multiplied by 5 + * 0110 = SYSCLK multiplied by 7 + * 0111 = SYSCLK multiplied by 8 + * of 33.3MHz + **/ +static int gx_freq_mult[16] = { + 4, 10, 4, 6, 9, 5, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +/**************************************************************** + * Low Level chipset interface * + ****************************************************************/ +static struct pci_device_id gx_chipset_tbl[] __initdata = { + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { 0, }, +}; + +/** + * gx_detect_chipset: + * + **/ +static __init struct pci_dev *gx_detect_chipset(void) +{ + struct pci_dev *gx_pci = NULL; + + /* check if CPU is a MediaGX or a Geode. */ + if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && + (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { + dprintk("error: no MediaGX/Geode processor found!\n"); + return NULL; + } + + /* detect which companion chip is used */ + while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) + return gx_pci; + } + + dprintk("error: no supported chipset found!\n"); + return NULL; +} + +/** + * gx_get_cpuspeed: + * + * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. + */ +static unsigned int gx_get_cpuspeed(unsigned int cpu) +{ + if ((gx_params->pci_suscfg & SUSMOD) == 0) + return stock_freq; + + return (stock_freq * gx_params->off_duration) + / (gx_params->on_duration + gx_params->off_duration); +} + +/** + * gx_validate_speed: + * determine current cpu speed + * + **/ + +static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) +{ + unsigned int i; + u8 tmp_on, tmp_off; + int old_tmp_freq = stock_freq; + int tmp_freq; + + *off_duration=1; + *on_duration=0; + + for (i=max_duration; i>0; i--) { + tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_on = i - tmp_off; + tmp_freq = (stock_freq * tmp_off) / i; + /* if this relation is closer to khz, use this. If it's equal, + * prefer it, too - lower latency */ + if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { + *on_duration = tmp_on; + *off_duration = tmp_off; + old_tmp_freq = tmp_freq; + } + } + + return old_tmp_freq; +} + + +/** + * gx_set_cpuspeed: + * set cpu speed in khz. + **/ + +static void gx_set_cpuspeed(unsigned int khz) +{ + u8 suscfg, pmer1; + unsigned int new_khz; + unsigned long flags; + struct cpufreq_freqs freqs; + + freqs.cpu = 0; + freqs.old = gx_get_cpuspeed(0); + + new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); + + freqs.new = new_khz; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + local_irq_save(flags); + + if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + switch (gx_params->cs55x0->device) { + case PCI_DEVICE_ID_CYRIX_5530_LEGACY: + pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; + /* FIXME: need to test other values -- Zwane,Miura */ + pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); + + if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg | SUSMOD; + } else { /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + } + break; + case PCI_DEVICE_ID_CYRIX_5520: + case PCI_DEVICE_ID_CYRIX_5510: + suscfg = gx_params->pci_suscfg | SUSMOD; + break; + default: + local_irq_restore(flags); + dprintk("fatal: try to set unknown chipset.\n"); + return; + } + } else { + suscfg = gx_params->pci_suscfg & ~(SUSMOD); + gx_params->off_duration = 0; + gx_params->on_duration = 0; + dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); + } + + pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); + pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); + + pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); + + local_irq_restore(flags); + + gx_params->pci_suscfg = suscfg; + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", + gx_params->on_duration * 32, gx_params->off_duration * 32); + dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); +} + +/**************************************************************** + * High level functions * + ****************************************************************/ + +/* + * cpufreq_gx_verify: test if frequency range is valid + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int cpufreq_gx_verify(struct cpufreq_policy *policy) +{ + unsigned int tmp_freq = 0; + u8 tmp1, tmp2; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + /* it needs to be assured that at least one supported frequency is + * within policy->min and policy->max. If it is not, policy->max + * needs to be increased until one freuqency is supported. + * policy->min may not be decreased, though. This way we guarantee a + * specific processing capacity. + */ + tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); + if (tmp_freq < policy->min) + tmp_freq += stock_freq / max_duration; + policy->min = tmp_freq; + if (policy->min > policy->max) + policy->max = tmp_freq; + tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); + if (tmp_freq > policy->max) + tmp_freq -= stock_freq / max_duration; + policy->max = tmp_freq; + if (policy->max < policy->min) + policy->max = policy->min; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + return 0; +} + +/* + * cpufreq_gx_target: + * + */ +static int cpufreq_gx_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + u8 tmp1, tmp2; + unsigned int tmp_freq; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + + tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); + while (tmp_freq < policy->min) { + tmp_freq += stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + while (tmp_freq > policy->max) { + tmp_freq -= stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + + gx_set_cpuspeed(tmp_freq); + + return 0; +} + +static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int maxfreq, curfreq; + + if (!policy || policy->cpu != 0) + return -ENODEV; + + /* determine maximum frequency */ + if (pci_busclk) { + maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } else if (cpu_khz) { + maxfreq = cpu_khz; + } else { + maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } + stock_freq = maxfreq; + curfreq = gx_get_cpuspeed(0); + + dprintk("cpu max frequency is %d.\n", maxfreq); + dprintk("cpu current frequency is %dkHz.\n",curfreq); + + /* setup basic struct for cpufreq API */ + policy->cpu = 0; + + if (max_duration < POLICY_MIN_DIV) + policy->min = maxfreq / max_duration; + else + policy->min = maxfreq / POLICY_MIN_DIV; + policy->max = maxfreq; + policy->cur = curfreq; + policy->cpuinfo.min_freq = maxfreq / max_duration; + policy->cpuinfo.max_freq = maxfreq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + + return 0; +} + +/* + * cpufreq_gx_init: + * MediaGX/Geode GX initialize cpufreq driver + */ +static struct cpufreq_driver gx_suspmod_driver = { + .get = gx_get_cpuspeed, + .verify = cpufreq_gx_verify, + .target = cpufreq_gx_target, + .init = cpufreq_gx_cpu_init, + .name = "gx-suspmod", + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gx_init(void) +{ + int ret; + struct gxfreq_params *params; + struct pci_dev *gx_pci; + + /* Test if we have the right hardware */ + if ((gx_pci = gx_detect_chipset()) == NULL) + return -ENODEV; + + /* check whether module parameters are sane */ + if (max_duration > 0xff) + max_duration = 0xff; + + dprintk("geode suspend modulation available.\n"); + + params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); + if (params == NULL) + return -ENOMEM; + + params->cs55x0 = gx_pci; + gx_params = params; + + /* keep cs55x0 configurations */ + pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); + pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); + pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); + pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); + pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); + + if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + kfree(params); + return ret; /* register error! */ + } + + return 0; +} + +static void __exit cpufreq_gx_exit(void) +{ + cpufreq_unregister_driver(&gx_suspmod_driver); + pci_dev_put(gx_params->cs55x0); + kfree(gx_params); +} + +MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>"); +MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); +MODULE_LICENSE ("GPL"); + +module_init(cpufreq_gx_init); +module_exit(cpufreq_gx_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c new file mode 100644 index 00000000000..5045f5d583c --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -0,0 +1,1027 @@ +/* + * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> + * (C) 2002 Padraig Brady. <padraig@antefacto.com> + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by VIA. + * + * VIA have currently 3 different versions of Longhaul. + * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. + * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. + * Version 2 of longhaul is backward compatible with v1, but adds + * LONGHAUL MSR for purpose of both frequency and voltage scaling. + * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). + * Version 3 of longhaul got renamed to Powersaver and redesigned + * to use only the POWERSAVER MSR at 0x110a. + * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. + * It's pretty much the same feature wise to longhaul v2, though + * there is provision for scaling FSB too, but this doesn't work + * too well in practice so we don't even try to use this. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/delay.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> +#include <asm/acpi.h> +#include <linux/acpi.h> +#include <acpi/processor.h> + +#include "longhaul.h" + +#define PFX "longhaul: " + +#define TYPE_LONGHAUL_V1 1 +#define TYPE_LONGHAUL_V2 2 +#define TYPE_POWERSAVER 3 + +#define CPU_SAMUEL 1 +#define CPU_SAMUEL2 2 +#define CPU_EZRA 3 +#define CPU_EZRA_T 4 +#define CPU_NEHEMIAH 5 +#define CPU_NEHEMIAH_C 6 + +/* Flags */ +#define USE_ACPI_C3 (1 << 1) +#define USE_NORTHBRIDGE (1 << 2) + +static int cpu_model; +static unsigned int numscales=16; +static unsigned int fsb; + +static const struct mV_pos *vrm_mV_table; +static const unsigned char *mV_vrm_table; + +static unsigned int highest_speed, lowest_speed; /* kHz */ +static unsigned int minmult, maxmult; +static int can_scale_voltage; +static struct acpi_processor *pr = NULL; +static struct acpi_processor_cx *cx = NULL; +static u32 acpi_regs_addr; +static u8 longhaul_flags; +static unsigned int longhaul_index; + +/* Module parameters */ +static int scale_voltage; +static int disable_acpi_c3; +static int revid_errata; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) + + +/* Clock ratios multiplied by 10 */ +static int clock_ratio[32]; +static int eblcr_table[32]; +static int longhaul_version; +static struct cpufreq_frequency_table *longhaul_table; + +#ifdef CONFIG_CPU_FREQ_DEBUG +static char speedbuffer[8]; + +static char *print_speed(int speed) +{ + if (speed < 1000) { + snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + return speedbuffer; + } + + if (speed%1000 == 0) + snprintf(speedbuffer, sizeof(speedbuffer), + "%dGHz", speed/1000); + else + snprintf(speedbuffer, sizeof(speedbuffer), + "%d.%dGHz", speed/1000, (speed%1000)/100); + + return speedbuffer; +} +#endif + + +static unsigned int calc_speed(int mult) +{ + int khz; + khz = (mult/10)*fsb; + if (mult%10) + khz += fsb/2; + khz *= 1000; + return khz; +} + + +static int longhaul_get_cpu_mult(void) +{ + unsigned long invalue=0,lo, hi; + + rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; + if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { + if (lo & (1<<27)) + invalue+=16; + } + return eblcr_table[invalue]; +} + +/* For processor with BCR2 MSR */ + +static void do_longhaul1(unsigned int clock_ratio_index) +{ + union msr_bcr2 bcr2; + + rdmsrl(MSR_VIA_BCR2, bcr2.val); + /* Enable software clock multiplier */ + bcr2.bits.ESOFTBF = 1; + bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; + + /* Sync to timer tick */ + safe_halt(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_BCR2, bcr2.val); + /* Invoke transition */ + ACPI_FLUSH_CPU_CACHE(); + halt(); + + /* Disable software clock multiplier */ + local_irq_disable(); + rdmsrl(MSR_VIA_BCR2, bcr2.val); + bcr2.bits.ESOFTBF = 0; + wrmsrl(MSR_VIA_BCR2, bcr2.val); +} + +/* For processor with Longhaul MSR */ + +static void do_powersaver(int cx_address, unsigned int clock_ratio_index, + unsigned int dir) +{ + union msr_longhaul longhaul; + u32 t; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Setup new frequency */ + if (!revid_errata) + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + else + longhaul.bits.RevisionKey = 0; + longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; + longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + /* Setup new voltage */ + if (can_scale_voltage) + longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; + /* Sync to timer tick */ + safe_halt(); + /* Raise voltage if necessary */ + if (can_scale_voltage && dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } + + /* Change frequency on next halt or sleep */ + longhaul.bits.EnableSoftBusRatio = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + /* Disable bus ratio bit */ + longhaul.bits.EnableSoftBusRatio = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + + /* Reduce voltage if necessary */ + if (can_scale_voltage && !dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } +} + +/** + * longhaul_set_cpu_frequency() + * @clock_ratio_index : bitpattern of the new multiplier. + * + * Sets a new clock ratio. + */ + +static void longhaul_setstate(unsigned int table_index) +{ + unsigned int clock_ratio_index; + int speed, mult; + struct cpufreq_freqs freqs; + unsigned long flags; + unsigned int pic1_mask, pic2_mask; + u16 bm_status = 0; + u32 bm_timeout = 1000; + unsigned int dir = 0; + + clock_ratio_index = longhaul_table[table_index].index; + /* Safety precautions */ + mult = clock_ratio[clock_ratio_index & 0x1f]; + if (mult == -1) + return; + speed = calc_speed(mult); + if ((speed > highest_speed) || (speed < lowest_speed)) + return; + /* Voltage transition before frequency transition? */ + if (can_scale_voltage && longhaul_index < table_index) + dir = 1; + + freqs.old = calc_speed(longhaul_get_cpu_mult()); + freqs.new = speed; + freqs.cpu = 0; /* longhaul.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", + fsb, mult/10, mult%10, print_speed(speed/1000)); +retry_loop: + preempt_disable(); + local_irq_save(flags); + + pic2_mask = inb(0xA1); + pic1_mask = inb(0x21); /* works on C3. save mask. */ + outb(0xFF,0xA1); /* Overkill */ + outb(0xFE,0x21); /* TMR0 only */ + + /* Wait while PCI bus is busy. */ + if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE + || ((pr != NULL) && pr->flags.bm_control))) { + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + while (bm_status && bm_timeout) { + outw(1 << 4, acpi_regs_addr); + bm_timeout--; + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + } + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Disable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); + } + switch (longhaul_version) { + + /* + * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) + * Software controlled multipliers only. + */ + case TYPE_LONGHAUL_V1: + do_longhaul1(clock_ratio_index); + break; + + /* + * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] + * + * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) + * Nehemiah can do FSB scaling too, but this has never been proven + * to work in practice. + */ + case TYPE_LONGHAUL_V2: + case TYPE_POWERSAVER: + if (longhaul_flags & USE_ACPI_C3) { + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); + do_powersaver(cx->address, clock_ratio_index, dir); + } else { + do_powersaver(0, clock_ratio_index, dir); + } + break; + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Enable arbiters */ + outb(0, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Enable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + } + outb(pic2_mask,0xA1); /* restore mask */ + outb(pic1_mask,0x21); + + local_irq_restore(flags); + preempt_enable(); + + freqs.new = calc_speed(longhaul_get_cpu_mult()); + /* Check if requested frequency is set. */ + if (unlikely(freqs.new != speed)) { + printk(KERN_INFO PFX "Failed to set requested frequency!\n"); + /* Revision ID = 1 but processor is expecting revision key + * equal to 0. Jumpers at the bottom of processor will change + * multiplier and FSB, but will not change bits in Longhaul + * MSR nor enable voltage scaling. */ + if (!revid_errata) { + printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " + "option.\n"); + revid_errata = 1; + msleep(200); + goto retry_loop; + } + /* Why ACPI C3 sometimes doesn't work is a mystery for me. + * But it does happen. Processor is entering ACPI C3 state, + * but it doesn't change frequency. I tried poking various + * bits in northbridge registers, but without success. */ + if (longhaul_flags & USE_ACPI_C3) { + printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); + longhaul_flags &= ~USE_ACPI_C3; + if (revid_errata) { + printk(KERN_INFO PFX "Disabling \"Ignore " + "Revision ID\" option.\n"); + revid_errata = 0; + } + msleep(200); + goto retry_loop; + } + /* This shouldn't happen. Longhaul ver. 2 was reported not + * working on processors without voltage scaling, but with + * RevID = 1. RevID errata will make things right. Just + * to be 100% sure. */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); + longhaul_version = TYPE_LONGHAUL_V1; + msleep(200); + goto retry_loop; + } + } + /* Report true CPU frequency */ + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + if (!bm_timeout) + printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); +} + +/* + * Centaur decided to make life a little more tricky. + * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. + * Samuel2 and above have to try and guess what the FSB is. + * We do this by assuming we booted at maximum multiplier, and interpolate + * between that value multiplied by possible FSBs and cpu_mhz which + * was calculated at boot time. Really ugly, but no other way to do this. + */ + +#define ROUNDING 0xf + +static int guess_fsb(int mult) +{ + int speed = cpu_khz / 1000; + int i; + int speeds[] = { 666, 1000, 1333, 2000 }; + int f_max, f_min; + + for (i = 0; i < 4; i++) { + f_max = ((speeds[i] * mult) + 50) / 100; + f_max += (ROUNDING / 2); + f_min = f_max - ROUNDING; + if ((speed <= f_max) && (speed >= f_min)) + return speeds[i] / 10; + } + return 0; +} + + +static int __init longhaul_get_ranges(void) +{ + unsigned int i, j, k = 0; + unsigned int ratio; + int mult; + + /* Get current frequency */ + mult = longhaul_get_cpu_mult(); + if (mult == -1) { + printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); + return -EINVAL; + } + fsb = guess_fsb(mult); + if (fsb == 0) { + printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); + return -EINVAL; + } + /* Get max multiplier - as we always did. + * Longhaul MSR is usefull only when voltage scaling is enabled. + * C3 is booting at max anyway. */ + maxmult = mult; + /* Get min multiplier */ + switch (cpu_model) { + case CPU_NEHEMIAH: + minmult = 50; + break; + case CPU_NEHEMIAH_C: + minmult = 40; + break; + default: + minmult = 30; + break; + } + + dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", + minmult/10, minmult%10, maxmult/10, maxmult%10); + + highest_speed = calc_speed(maxmult); + lowest_speed = calc_speed(minmult); + dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, + print_speed(lowest_speed/1000), + print_speed(highest_speed/1000)); + + if (lowest_speed == highest_speed) { + printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + return -EINVAL; + } + if (lowest_speed > highest_speed) { + printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + lowest_speed, highest_speed); + return -EINVAL; + } + + longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); + if(!longhaul_table) + return -ENOMEM; + + for (j = 0; j < numscales; j++) { + ratio = clock_ratio[j]; + if (ratio == -1) + continue; + if (ratio > maxmult || ratio < minmult) + continue; + longhaul_table[k].frequency = calc_speed(ratio); + longhaul_table[k].index = j; + k++; + } + if (k <= 1) { + kfree(longhaul_table); + return -ENODEV; + } + /* Sort */ + for (j = 0; j < k - 1; j++) { + unsigned int min_f, min_i; + min_f = longhaul_table[j].frequency; + min_i = j; + for (i = j + 1; i < k; i++) { + if (longhaul_table[i].frequency < min_f) { + min_f = longhaul_table[i].frequency; + min_i = i; + } + } + if (min_i != j) { + unsigned int temp; + temp = longhaul_table[j].frequency; + longhaul_table[j].frequency = longhaul_table[min_i].frequency; + longhaul_table[min_i].frequency = temp; + temp = longhaul_table[j].index; + longhaul_table[j].index = longhaul_table[min_i].index; + longhaul_table[min_i].index = temp; + } + } + + longhaul_table[k].frequency = CPUFREQ_TABLE_END; + + /* Find index we are running on */ + for (j = 0; j < k; j++) { + if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + longhaul_index = j; + break; + } + } + return 0; +} + + +static void __init longhaul_setup_voltagescaling(void) +{ + union msr_longhaul longhaul; + struct mV_pos minvid, maxvid, vid; + unsigned int j, speed, pos, kHz_step, numvscales; + int min_vid_speed; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!(longhaul.bits.RevisionID & 1)) { + printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); + return; + } + + if (!longhaul.bits.VRMRev) { + printk(KERN_INFO PFX "VRM 8.5\n"); + vrm_mV_table = &vrm85_mV[0]; + mV_vrm_table = &mV_vrm85[0]; + } else { + printk(KERN_INFO PFX "Mobile VRM\n"); + if (cpu_model < CPU_NEHEMIAH) + return; + vrm_mV_table = &mobilevrm_mV[0]; + mV_vrm_table = &mV_mobilevrm[0]; + } + + minvid = vrm_mV_table[longhaul.bits.MinimumVID]; + maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; + + if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { + printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " + "Voltage scaling disabled.\n", + minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); + return; + } + + if (minvid.mV == maxvid.mV) { + printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " + "both %d.%03d. Voltage scaling disabled\n", + maxvid.mV/1000, maxvid.mV%1000); + return; + } + + /* How many voltage steps */ + numvscales = maxvid.pos - minvid.pos + 1; + printk(KERN_INFO PFX + "Max VID=%d.%03d " + "Min VID=%d.%03d, " + "%d possible voltage scales\n", + maxvid.mV/1000, maxvid.mV%1000, + minvid.mV/1000, minvid.mV%1000, + numvscales); + + /* Calculate max frequency at min voltage */ + j = longhaul.bits.MinMHzBR; + if (longhaul.bits.MinMHzBR4) + j += 16; + min_vid_speed = eblcr_table[j]; + if (min_vid_speed == -1) + return; + switch (longhaul.bits.MinMHzFSB) { + case 0: + min_vid_speed *= 13333; + break; + case 1: + min_vid_speed *= 10000; + break; + case 3: + min_vid_speed *= 6666; + break; + default: + return; + break; + } + if (min_vid_speed >= highest_speed) + return; + /* Calculate kHz for one voltage step */ + kHz_step = (highest_speed - min_vid_speed) / numvscales; + + j = 0; + while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { + speed = longhaul_table[j].frequency; + if (speed > min_vid_speed) + pos = (speed - min_vid_speed) / kHz_step + minvid.pos; + else + pos = minvid.pos; + longhaul_table[j].index |= mV_vrm_table[pos] << 8; + vid = vrm_mV_table[mV_vrm_table[pos]]; + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); + j++; + } + + can_scale_voltage = 1; + printk(KERN_INFO PFX "Voltage scaling enabled.\n"); +} + + +static int longhaul_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, longhaul_table); +} + + +static int longhaul_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int table_index = 0; + unsigned int i; + unsigned int dir = 0; + u8 vid, current_vid; + + if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) + return -EINVAL; + + /* Don't set same frequency again */ + if (longhaul_index == table_index) + return 0; + + if (!can_scale_voltage) + longhaul_setstate(table_index); + else { + /* On test system voltage transitions exceeding single + * step up or down were turning motherboard off. Both + * "ondemand" and "userspace" are unsafe. C7 is doing + * this in hardware, C3 is old and we need to do this + * in software. */ + i = longhaul_index; + current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + if (table_index > longhaul_index) + dir = 1; + while (i != table_index) { + vid = (longhaul_table[i].index >> 8) & 0x1f; + if (vid != current_vid) { + longhaul_setstate(i); + current_vid = vid; + msleep(200); + } + if (dir) + i++; + else + i--; + } + longhaul_setstate(table_index); + } + longhaul_index = table_index; + return 0; +} + + +static unsigned int longhaul_get(unsigned int cpu) +{ + if (cpu) + return 0; + return calc_speed(longhaul_get_cpu_mult()); +} + +static acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) +{ + struct acpi_device *d; + + if ( acpi_bus_get_device(obj_handle, &d) ) { + return 0; + } + *return_value = (void *)acpi_driver_data(d); + return 1; +} + +/* VIA don't support PM2 reg, but have something similar */ +static int enable_arbiter_disable(void) +{ + struct pci_dev *dev; + int status = 1; + int reg; + u8 pci_cmd; + + /* Find PLE133 host bridge */ + reg = 0x78; + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, + NULL); + /* Find PM133/VT8605 host bridge */ + if (dev == NULL) + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8605_0, NULL); + /* Find CLE266 host bridge */ + if (dev == NULL) { + reg = 0x76; + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_862X_0, NULL); + /* Find CN400 V-Link host bridge */ + if (dev == NULL) + dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); + } + if (dev != NULL) { + /* Enable access to port 0x22 */ + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + pci_cmd |= 1<<7; + pci_write_config_byte(dev, reg, pci_cmd); + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + printk(KERN_ERR PFX + "Can't enable access to port 0x22.\n"); + status = 0; + } + } + pci_dev_put(dev); + return status; + } + return 0; +} + +static int longhaul_setup_southbridge(void) +{ + struct pci_dev *dev; + u8 pci_cmd; + + /* Find VT8235 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); + if (dev == NULL) + /* Find VT8237 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8237, NULL); + if (dev != NULL) { + /* Set transition time to max */ + pci_read_config_byte(dev, 0xec, &pci_cmd); + pci_cmd &= ~(1 << 2); + pci_write_config_byte(dev, 0xec, pci_cmd); + pci_read_config_byte(dev, 0xe4, &pci_cmd); + pci_cmd &= ~(1 << 7); + pci_write_config_byte(dev, 0xe4, pci_cmd); + pci_read_config_byte(dev, 0xe5, &pci_cmd); + pci_cmd |= 1 << 7; + pci_write_config_byte(dev, 0xe5, pci_cmd); + /* Get address of ACPI registers block*/ + pci_read_config_byte(dev, 0x81, &pci_cmd); + if (pci_cmd & 1 << 7) { + pci_read_config_dword(dev, 0x88, &acpi_regs_addr); + acpi_regs_addr &= 0xff00; + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + } + + pci_dev_put(dev); + return 1; + } + return 0; +} + +static int __init longhaul_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + char *cpuname=NULL; + int ret; + u32 lo, hi; + + /* Check what we have on this motherboard */ + switch (c->x86_model) { + case 6: + cpu_model = CPU_SAMUEL; + cpuname = "C3 'Samuel' [C5A]"; + longhaul_version = TYPE_LONGHAUL_V1; + memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); + memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); + break; + + case 7: + switch (c->x86_mask) { + case 0: + longhaul_version = TYPE_LONGHAUL_V1; + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + /* Note, this is not a typo, early Samuel2's had + * Samuel1 ratios. */ + memcpy(clock_ratio, samuel1_clock_ratio, + sizeof(samuel1_clock_ratio)); + memcpy(eblcr_table, samuel2_eblcr, + sizeof(samuel2_eblcr)); + break; + case 1 ... 15: + longhaul_version = TYPE_LONGHAUL_V1; + if (c->x86_mask < 8) { + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + } else { + cpu_model = CPU_EZRA; + cpuname = "C3 'Ezra' [C5C]"; + } + memcpy(clock_ratio, ezra_clock_ratio, + sizeof(ezra_clock_ratio)); + memcpy(eblcr_table, ezra_eblcr, + sizeof(ezra_eblcr)); + break; + } + break; + + case 8: + cpu_model = CPU_EZRA_T; + cpuname = "C3 'Ezra-T' [C5M]"; + longhaul_version = TYPE_POWERSAVER; + numscales=32; + memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); + memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); + break; + + case 9: + longhaul_version = TYPE_POWERSAVER; + numscales = 32; + memcpy(clock_ratio, + nehemiah_clock_ratio, + sizeof(nehemiah_clock_ratio)); + memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); + switch (c->x86_mask) { + case 0 ... 1: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah A' [C5XLOE]"; + break; + case 2 ... 4: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah B' [C5XLOH]"; + break; + case 5 ... 15: + cpu_model = CPU_NEHEMIAH_C; + cpuname = "C3 'Nehemiah C' [C5P]"; + break; + } + break; + + default: + cpuname = "Unknown"; + break; + } + /* Check Longhaul ver. 2 */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + rdmsr(MSR_VIA_LONGHAUL, lo, hi); + if (lo == 0 && hi == 0) + /* Looks like MSR isn't present */ + longhaul_version = TYPE_LONGHAUL_V1; + } + + printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + switch (longhaul_version) { + case TYPE_LONGHAUL_V1: + case TYPE_LONGHAUL_V2: + printk ("Longhaul v%d supported.\n", longhaul_version); + break; + case TYPE_POWERSAVER: + printk ("Powersaver supported.\n"); + break; + }; + + /* Doesn't hurt */ + longhaul_setup_southbridge(); + + /* Find ACPI data for processor */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, &longhaul_walk_callback, + NULL, (void *)&pr); + + /* Check ACPI support for C3 state */ + if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address > 0 && cx->latency <= 1000) + longhaul_flags |= USE_ACPI_C3; + } + /* Disable if it isn't working */ + if (disable_acpi_c3) + longhaul_flags &= ~USE_ACPI_C3; + /* Check if northbridge is friendly */ + if (enable_arbiter_disable()) + longhaul_flags |= USE_NORTHBRIDGE; + + /* Check ACPI support for bus master arbiter disable */ + if (!(longhaul_flags & USE_ACPI_C3 + || longhaul_flags & USE_NORTHBRIDGE) + && ((pr == NULL) || !(pr->flags.bm_control))) { + printk(KERN_ERR PFX + "No ACPI support. Unsupported northbridge.\n"); + return -ENODEV; + } + + if (longhaul_flags & USE_NORTHBRIDGE) + printk(KERN_INFO PFX "Using northbridge support.\n"); + if (longhaul_flags & USE_ACPI_C3) + printk(KERN_INFO PFX "Using ACPI support.\n"); + + ret = longhaul_get_ranges(); + if (ret != 0) + return ret; + + if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) + longhaul_setup_voltagescaling(); + + policy->cpuinfo.transition_latency = 200000; /* nsec */ + policy->cur = calc_speed(longhaul_get_cpu_mult()); + + ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); + if (ret) + return ret; + + cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); + + return 0; +} + +static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* longhaul_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver longhaul_driver = { + .verify = longhaul_verify, + .target = longhaul_target, + .get = longhaul_get, + .init = longhaul_cpu_init, + .exit = __devexit_p(longhaul_cpu_exit), + .name = "longhaul", + .owner = THIS_MODULE, + .attr = longhaul_attr, +}; + + +static int __init longhaul_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) + return -ENODEV; + +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + return -ENODEV; + } +#endif +#ifdef CONFIG_X86_IO_APIC + if (cpu_has_apic) { + printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + return -ENODEV; + } +#endif + switch (c->x86_model) { + case 6 ... 9: + return cpufreq_register_driver(&longhaul_driver); + case 10: + printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); + default: + ;; + } + + return -ENODEV; +} + + +static void __exit longhaul_exit(void) +{ + int i; + + for (i=0; i < numscales; i++) { + if (clock_ratio[i] == maxmult) { + longhaul_setstate(i); + break; + } + } + + cpufreq_unregister_driver(&longhaul_driver); + kfree(longhaul_table); +} + +/* Even if BIOS is exporting ACPI C3 state, and it is used + * with success when CPU is idle, this state doesn't + * trigger frequency transition in some cases. */ +module_param (disable_acpi_c3, int, 0644); +MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); +/* Change CPU voltage with frequency. Very usefull to save + * power, but most VIA C3 processors aren't supporting it. */ +module_param (scale_voltage, int, 0644); +MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); +/* Force revision key to 0 for processors which doesn't + * support voltage scaling, but are introducing itself as + * such. */ +module_param(revid_errata, int, 0644); +MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); + +MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); +MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(longhaul_init); +module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h new file mode 100644 index 00000000000..4fcc320997d --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -0,0 +1,353 @@ +/* + * longhaul.h + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * VIA-specific information + */ + +union msr_bcr2 { + struct { + unsigned Reseved:19, // 18:0 + ESOFTBF:1, // 19 + Reserved2:3, // 22:20 + CLOCKMUL:4, // 26:23 + Reserved3:5; // 31:27 + } bits; + unsigned long val; +}; + +union msr_longhaul { + struct { + unsigned RevisionID:4, // 3:0 + RevisionKey:4, // 7:4 + EnableSoftBusRatio:1, // 8 + EnableSoftVID:1, // 9 + EnableSoftBSEL:1, // 10 + Reserved:3, // 11:13 + SoftBusRatio4:1, // 14 + VRMRev:1, // 15 + SoftBusRatio:4, // 19:16 + SoftVID:5, // 24:20 + Reserved2:3, // 27:25 + SoftBSEL:2, // 29:28 + Reserved3:2, // 31:30 + MaxMHzBR:4, // 35:32 + MaximumVID:5, // 40:36 + MaxMHzFSB:2, // 42:41 + MaxMHzBR4:1, // 43 + Reserved4:4, // 47:44 + MinMHzBR:4, // 51:48 + MinimumVID:5, // 56:52 + MinMHzFSB:2, // 58:57 + MinMHzBR4:1, // 59 + Reserved5:4; // 63:60 + } bits; + unsigned long long val; +}; + +/* + * Clock ratio tables. Div/Mod by 10 to get ratio. + * The eblcr ones specify the ratio read from the CPU. + * The clock_ratio ones specify what to write to the CPU. + */ + +/* + * VIA C3 Samuel 1 & Samuel 2 (stepping 0) + */ +static const int __initdata samuel1_clock_ratio[16] = { + -1, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + -1, /* 0100 -> RESERVED */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + -1, /* 1111 -> RESERVED */ +}; + +static const int __initdata samuel1_eblcr[16] = { + 50, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + -1, /* 0111 -> RESERVED */ + -1, /* 1000 -> RESERVED */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + -1, /* 1100 -> RESERVED */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Samuel2 Stepping 1->15 + */ +static const int __initdata samuel2_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 110, /* 0111 -> 11.0x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 130, /* 1110 -> 13.0x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Ezra + */ +static const int __initdata ezra_clock_ratio[16] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ +}; + +static const int __initdata ezra_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 (Ezra-T) [C5M]. + */ +static const int __initdata ezrat_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + + -1, /* 0000 -> RESERVED (10.0x) */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (9.0x)*/ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> RESERVED (12.0x) */ +}; + +static const int __initdata ezrat_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + + -1, /* 0000 -> RESERVED (9.0x) */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (10.0x)*/ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + -1, /* 1100 -> RESERVED (12.0x) */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145, /* 1111 -> 14.5x */ +}; + +/* + * VIA C3 Nehemiah */ + +static const int __initdata nehemiah_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + -1, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + -1, /* 0000 -> 10.0x */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> 9.0x */ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> 12.0x */ +}; + +static const int __initdata nehemiah_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 160, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + 90, /* 0000 -> 9.0x */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + 100, /* 0011 -> 10.0x */ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + 120, /* 1100 -> 12.0x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145 /* 1111 -> 14.5x */ +}; + +/* + * Voltage scales. Div/Mod by 1000 to get actual voltage. + * Which scale to use depends on the VRM type in use. + */ + +struct mV_pos { + unsigned short mV; + unsigned short pos; +}; + +static const struct mV_pos __initdata vrm85_mV[32] = { + {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, + {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, + {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, + {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, + {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, + {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, + {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, + {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} +}; + +static const unsigned char __initdata mV_vrm85[32] = { + 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, + 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, + 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, + 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 +}; + +static const struct mV_pos __initdata mobilevrm_mV[32] = { + {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, + {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, + {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, + {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, + {975, 15}, {950, 14}, {925, 13}, {900, 12}, + {875, 11}, {850, 10}, {825, 9}, {800, 8}, + {775, 7}, {750, 6}, {725, 5}, {700, 4}, + {675, 3}, {650, 2}, {625, 1}, {600, 0} +}; + +static const unsigned char __initdata mV_mobilevrm[32] = { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +}; + diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c new file mode 100644 index 00000000000..b2689514295 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -0,0 +1,325 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/cpufreq.h> + +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/timex.h> + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) + +static struct cpufreq_driver longrun_driver; + +/** + * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz + * values into per cent values. In TMTA microcode, the following is valid: + * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int longrun_low_freq, longrun_high_freq; + + +/** + * longrun_get_policy - get the current LongRun policy + * @policy: struct cpufreq_policy where current policy is written into + * + * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS + * and MSR_TMTA_LONGRUN_CTRL + */ +static void __init longrun_get_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); + if (msr_lo & 0x01) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); + msr_lo &= 0x0000007F; + msr_hi &= 0x0000007F; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + policy->min = policy->max = longrun_high_freq; + } else { + policy->min = longrun_low_freq + msr_lo * + ((longrun_high_freq - longrun_low_freq) / 100); + policy->max = longrun_low_freq + msr_hi * + ((longrun_high_freq - longrun_low_freq) / 100); + } + policy->cpu = 0; +} + + +/** + * longrun_set_policy - sets a new CPUFreq policy + * @policy: new policy + * + * Sets a new CPUFreq policy on LongRun-capable processors. This function + * has to be called with cpufreq_driver locked. + */ +static int longrun_set_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + u32 pctg_lo, pctg_hi; + + if (!policy) + return -EINVAL; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + pctg_lo = pctg_hi = 100; + } else { + pctg_lo = (policy->min - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + pctg_hi = (policy->max - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + } + + if (pctg_hi > 100) + pctg_hi = 100; + if (pctg_lo > pctg_hi) + pctg_lo = pctg_hi; + + /* performance or economy mode */ + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + msr_lo &= 0xFFFFFFFE; + switch (policy->policy) { + case CPUFREQ_POLICY_PERFORMANCE: + msr_lo |= 0x00000001; + break; + case CPUFREQ_POLICY_POWERSAVE: + break; + } + wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + + /* lower and upper boundary */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_lo |= pctg_lo; + msr_hi |= pctg_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + return 0; +} + + +/** + * longrun_verify_poliy - verifies a new CPUFreq policy + * @policy: the policy to verify + * + * Validates a new CPUFreq policy. This function has to be called with + * cpufreq_driver locked. + */ +static int longrun_verify_policy(struct cpufreq_policy *policy) +{ + if (!policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static unsigned int longrun_get(unsigned int cpu) +{ + u32 eax, ebx, ecx, edx; + + if (cpu) + return 0; + + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + dprintk("cpuid eax is %u\n", eax); + + return (eax * 1000); +} + +/** + * longrun_determine_freqs - determines the lowest and highest possible core frequency + * @low_freq: an int to put the lowest frequency into + * @high_freq: an int to put the highest frequency into + * + * Determines the lowest and highest possible core frequencies on this CPU. + * This is necessary to calculate the performance percentage according to + * TMTA rules: + * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, + unsigned int *high_freq) +{ + u32 msr_lo, msr_hi; + u32 save_lo, save_hi; + u32 eax, ebx, ecx, edx; + u32 try_hi; + struct cpuinfo_x86 *c = cpu_data; + + if (!low_freq || !high_freq) + return -EINVAL; + + if (cpu_has(c, X86_FEATURE_LRTI)) { + /* if the LongRun Table Interface is present, the + * detection is a bit easier: + * For minimum frequency, read out the maximum + * level (msr_hi), write that into "currently + * selected level", and read out the frequency. + * For maximum frequency, read out level zero. + */ + /* minimum */ + rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); + wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *low_freq = msr_lo * 1000; /* to kHz */ + + /* maximum */ + wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *high_freq = msr_lo * 1000; /* to kHz */ + + dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + return 0; + } + + /* set the upper border to the value determined during TSC init */ + *high_freq = (cpu_khz / 1000); + *high_freq = *high_freq * 1000; + dprintk("high frequency is %u kHz\n", *high_freq); + + /* get current borders */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + save_lo = msr_lo & 0x0000007F; + save_hi = msr_hi & 0x0000007F; + + /* if current perf_pctg is larger than 90%, we need to decrease the + * upper limit to make the calculation more accurate. + */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + /* try decreasing in 10% steps, some processors react only + * on some barrier values */ + for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { + /* set to 0 to try_hi perf_pctg */ + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_hi |= try_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + /* read out current core MHz and current perf_pctg */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + + /* restore values */ + wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); + } + dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); + + /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + * eqals + * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) + * + * high_freq * perf_pctg is stored tempoarily into "ebx". + */ + ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ + + if ((ecx > 95) || (ecx == 0) || (eax < ebx)) + return -EIO; + + edx = (eax - ebx) / (100 - ecx); + *low_freq = edx * 1000; /* back to kHz */ + + dprintk("low frequency is %u kHz\n", *low_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + + return 0; +} + + +static int __init longrun_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* detect low and high frequency */ + result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); + if (result) + return result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = longrun_low_freq; + policy->cpuinfo.max_freq = longrun_high_freq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + longrun_get_policy(policy); + + return 0; +} + + +static struct cpufreq_driver longrun_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = longrun_verify_policy, + .setpolicy = longrun_set_policy, + .get = longrun_get, + .init = longrun_cpu_init, + .name = "longrun", + .owner = THIS_MODULE, +}; + + +/** + * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver + * + * Initializes the LongRun support. + */ +static int __init longrun_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_TRANSMETA || + !cpu_has(c, X86_FEATURE_LONGRUN)) + return -ENODEV; + + return cpufreq_register_driver(&longrun_driver); +} + + +/** + * longrun_exit - unregisters LongRun support + */ +static void __exit longrun_exit(void) +{ + cpufreq_unregister_driver(&longrun_driver); +} + + +MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); +MODULE_LICENSE ("GPL"); + +module_init(longrun_init); +module_exit(longrun_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c new file mode 100644 index 00000000000..8eb414b906d --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -0,0 +1,315 @@ +/* + * Pentium 4/Xeon CPU on demand clock modulation/speed scaling + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> + * (C) 2002 Arjan van de Ven <arjanv@redhat.com> + * (C) 2002 Tora T. Engstad + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Date Errata Description + * 20020525 N44, O17 12.5% or 25% DC causes lockup + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> +#include <linux/cpumask.h> + +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/timex.h> + +#include "speedstep-lib.h" + +#define PFX "p4-clockmod: " +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) + +/* + * Duty Cycle (3bits), note DC_DISABLE is not specified in + * intel docs i just use it to mean disable + */ +enum { + DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, + DC_64PT, DC_75PT, DC_88PT, DC_DISABLE +}; + +#define DC_ENTRIES 8 + + +static int has_N44_O17_errata[NR_CPUS]; +static unsigned int stock_freq; +static struct cpufreq_driver p4clockmod_driver; +static unsigned int cpufreq_p4_get(unsigned int cpu); + +static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) +{ + u32 l, h; + + if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) + return -EINVAL; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); + + if (l & 0x01) + dprintk("CPU#%d currently thermal throttled\n", cpu); + + if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) + newstate = DC_38PT; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + if (newstate == DC_DISABLE) { + dprintk("CPU#%d disabling modulation\n", cpu); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); + } else { + dprintk("CPU#%d setting duty cycle to %d%%\n", + cpu, ((125 * newstate) / 10)); + /* bits 63 - 5 : reserved + * bit 4 : enable/disable + * bits 3-1 : duty cycle + * bit 0 : reserved + */ + l = (l & ~14); + l = l | (1<<4) | ((newstate & 0x7)<<1); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); + } + + return 0; +} + + +static struct cpufreq_frequency_table p4clockmod_table[] = { + {DC_RESV, CPUFREQ_ENTRY_INVALID}, + {DC_DFLT, 0}, + {DC_25PT, 0}, + {DC_38PT, 0}, + {DC_50PT, 0}, + {DC_64PT, 0}, + {DC_75PT, 0}, + {DC_88PT, 0}, + {DC_DISABLE, 0}, + {DC_RESV, CPUFREQ_TABLE_END}, +}; + + +static int cpufreq_p4_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = DC_RESV; + struct cpufreq_freqs freqs; + int i; + + if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = cpufreq_p4_get(policy->cpu); + freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; + + if (freqs.new == freqs.old) + return 0; + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software + * Developer's Manual, Volume 3 + */ + for_each_cpu_mask(i, policy->cpus) + cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +static int cpufreq_p4_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); +} + + +static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x06) { + if (cpu_has(c, X86_FEATURE_EST)) + printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " + "The acpi-cpufreq module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + switch (c->x86_model) { + case 0x0E: /* Core */ + case 0x0F: /* Core Duo */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + case 0x0D: /* Pentium M (Dothan) */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + /* fall through */ + case 0x09: /* Pentium M (Banias) */ + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + } + } + + if (c->x86 != 0xF) { + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); + return 0; + } + + /* on P-4s, the TSC runs with constant frequency independent whether + * throttling is active or not. */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { + printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " + "The speedstep-ich or acpi cpufreq modules offer " + "voltage scaling in addition of frequency scaling. " + "You should use either one instead of p4-clockmod, " + "if possible.\n"); + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); + } + + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); +} + + + +static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + int cpuid = 0; + unsigned int i; + +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + /* Errata workaround */ + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + switch (cpuid) { + case 0x0f07: + case 0x0f0a: + case 0x0f11: + case 0x0f12: + has_N44_O17_errata[policy->cpu] = 1; + dprintk("has errata -- disabling low frequencies\n"); + } + + /* get max frequency */ + stock_freq = cpufreq_p4_get_frequency(c); + if (!stock_freq) + return -EINVAL; + + /* table init */ + for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if ((i<2) && (has_N44_O17_errata[policy->cpu])) + p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; + else + p4clockmod_table[i].frequency = (stock_freq * i)/8; + } + cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = 1000000; /* assumed */ + policy->cur = stock_freq; + + return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); +} + + +static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int cpufreq_p4_get(unsigned int cpu) +{ + u32 l, h; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + + if (l & 0x10) { + l = l >> 1; + l &= 0x7; + } else + l = DC_DISABLE; + + if (l != DC_DISABLE) + return (stock_freq * l / 8); + + return stock_freq; +} + +static struct freq_attr* p4clockmod_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver p4clockmod_driver = { + .verify = cpufreq_p4_verify, + .target = cpufreq_p4_target, + .init = cpufreq_p4_cpu_init, + .exit = cpufreq_p4_cpu_exit, + .get = cpufreq_p4_get, + .name = "p4-clockmod", + .owner = THIS_MODULE, + .attr = p4clockmod_attr, +}; + + +static int __init cpufreq_p4_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int ret; + + /* + * THERM_CONTROL is architectural for IA32 now, so + * we can rely on the capability checks + */ + if (c->x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + + if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || + !test_bit(X86_FEATURE_ACC, c->x86_capability)) + return -ENODEV; + + ret = cpufreq_register_driver(&p4clockmod_driver); + if (!ret) + printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); + + return (ret); +} + + +static void __exit cpufreq_p4_exit(void) +{ + cpufreq_unregister_driver(&p4clockmod_driver); +} + + +MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>"); +MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); +MODULE_LICENSE ("GPL"); + +late_initcall(cpufreq_p4_init); +module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c new file mode 100644 index 00000000000..6d028533931 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -0,0 +1,255 @@ +/* + * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/ioport.h> +#include <linux/slab.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> + + +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ + +static unsigned int busfreq; /* FSB, in 10 kHz */ +static unsigned int max_multiplier; + + +/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ +static struct cpufreq_frequency_table clock_ratio[] = { + {45, /* 000 -> 4.5x */ 0}, + {50, /* 001 -> 5.0x */ 0}, + {40, /* 010 -> 4.0x */ 0}, + {55, /* 011 -> 5.5x */ 0}, + {20, /* 100 -> 2.0x */ 0}, + {30, /* 101 -> 3.0x */ 0}, + {60, /* 110 -> 6.0x */ 0}, + {35, /* 111 -> 3.5x */ 0}, + {0, CPUFREQ_TABLE_END} +}; + + +/** + * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier + * + * Returns the current setting of the frequency multiplier. Core clock + * speed is frequency of the Front-Side Bus multiplied with this value. + */ +static int powernow_k6_get_cpu_multiplier(void) +{ + u64 invalue = 0; + u32 msrval; + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + return clock_ratio[(invalue >> 5)&7].index; +} + + +/** + * powernow_k6_set_state - set the PowerNow! multiplier + * @best_i: clock_ratio[best_i] is the target multiplier + * + * Tries to change the PowerNow! multiplier + */ +static void powernow_k6_set_state (unsigned int best_i) +{ + unsigned long outvalue=0, invalue=0; + unsigned long msrval; + struct cpufreq_freqs freqs; + + if (clock_ratio[best_i].index > max_multiplier) { + printk(KERN_ERR "cpufreq: invalid target frequency\n"); + return; + } + + freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); + freqs.new = busfreq * clock_ratio[best_i].index; + freqs.cpu = 0; /* powernow-k6.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* we now need to transform best_i to the BVC format, see AMD#23446 */ + + outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = invalue & 0xf; + outvalue = outvalue | invalue; + outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return; +} + + +/** + * powernow_k6_verify - verifies a new CPUfreq policy + * @policy: new policy + * + * Policy must be within lowest and highest possible CPU Frequency, + * and at least one possible state must be within min and max. + */ +static int powernow_k6_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); +} + + +/** + * powernow_k6_setpolicy - sets a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * sets a new CPUFreq policy + */ +static int powernow_k6_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) + return -EINVAL; + + powernow_k6_set_state(newstate); + + return 0; +} + + +static int powernow_k6_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + /* get frequencies */ + max_multiplier = powernow_k6_get_cpu_multiplier(); + busfreq = cpu_khz / max_multiplier; + + /* table init */ + for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + if (clock_ratio[i].index > max_multiplier) + clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; + else + clock_ratio[i].frequency = busfreq * clock_ratio[i].index; + } + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = busfreq * max_multiplier; + + result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); + + return 0; +} + + +static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int i; + for (i=0; i<8; i++) { + if (i==max_multiplier) + powernow_k6_set_state(i); + } + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int powernow_k6_get(unsigned int cpu) +{ + return busfreq * powernow_k6_get_cpu_multiplier(); +} + +static struct freq_attr* powernow_k6_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_k6_driver = { + .verify = powernow_k6_verify, + .target = powernow_k6_target, + .init = powernow_k6_cpu_init, + .exit = powernow_k6_cpu_exit, + .get = powernow_k6_get, + .name = "powernow-k6", + .owner = THIS_MODULE, + .attr = powernow_k6_attr, +}; + + +/** + * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver + * + * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported + * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero + * on success. + */ +static int __init powernow_k6_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || + ((c->x86_model != 12) && (c->x86_model != 13))) + return -ENODEV; + + if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { + printk("cpufreq: PowerNow IOPORT region already used.\n"); + return -EIO; + } + + if (cpufreq_register_driver(&powernow_k6_driver)) { + release_region (POWERNOW_IOPORT, 16); + return -EINVAL; + } + + return 0; +} + + +/** + * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support + * + * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. + */ +static void __exit powernow_k6_exit(void) +{ + cpufreq_unregister_driver(&powernow_k6_driver); + release_region (POWERNOW_IOPORT, 16); +} + + +MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE ("GPL"); + +module_init(powernow_k6_init); +module_exit(powernow_k6_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c new file mode 100644 index 00000000000..7decd6a50ff --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -0,0 +1,701 @@ +/* + * AMD K7 Powernow driver. + * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. + * (C) 2003-2004 Dave Jones <davej@redhat.com> + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. + * - We cli/sti on stepping A0 CPUs around the FID/VID transition. + * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. + * - We disable half multipliers if ACPI is used on A0 stepping CPUs. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/dmi.h> + +#include <asm/msr.h> +#include <asm/timer.h> +#include <asm/timex.h> +#include <asm/io.h> +#include <asm/system.h> + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +#include <linux/acpi.h> +#include <acpi/processor.h> +#endif + +#include "powernow-k7.h" + +#define PFX "powernow: " + + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags; + u16 settlingtime; + u8 reserved1; + u8 numpst; +}; + +struct pst_s { + u32 cpuid; + u8 fsbspeed; + u8 maxfid; + u8 startvid; + u8 numpstates; +}; + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +union powernow_acpi_control_t { + struct { + unsigned long fid:5, + vid:5, + sgtc:20, + res1:2; + } bits; + unsigned long val; +}; +#endif + +#ifdef CONFIG_CPU_FREQ_DEBUG +/* divide by 1000 to get VCore voltage in V. */ +static const int mobile_vid_table[32] = { + 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, + 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, + 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, + 1075, 1050, 1025, 1000, 975, 950, 925, 0, +}; +#endif + +/* divide by 10 to get FID. */ +static const int fid_codes[32] = { + 110, 115, 120, 125, 50, 55, 60, 65, + 70, 75, 80, 85, 90, 95, 100, 105, + 30, 190, 40, 200, 130, 135, 140, 210, + 150, 225, 160, 165, 170, 180, -1, -1, +}; + +/* This parameter is used in order to force ACPI instead of legacy method for + * configuration purpose. + */ + +static int acpi_force; + +static struct cpufreq_frequency_table *powernow_table; + +static unsigned int can_scale_bus; +static unsigned int can_scale_vid; +static unsigned int minimum_speed=-1; +static unsigned int maximum_speed; +static unsigned int number_scales; +static unsigned int fsb; +static unsigned int latency; +static char have_a0; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) + +static int check_fsb(unsigned int fsbspeed) +{ + int delta; + unsigned int f = fsb / 1000; + + delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; + return (delta < 5); +} + +static int check_powernow(void) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int maxei, eax, ebx, ecx, edx; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { +#ifdef MODULE + printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); +#endif + return 0; + } + + /* Get maximum capabilities */ + maxei = cpuid_eax (0x80000000); + if (maxei < 0x80000007) { /* Any powernow info ? */ +#ifdef MODULE + printk (KERN_INFO PFX "No powernow capabilities detected\n"); +#endif + return 0; + } + + if ((c->x86_model == 6) && (c->x86_mask == 0)) { + printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); + have_a0 = 1; + } + + cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + + /* Check we can actually do something before we say anything.*/ + if (!(edx & (1 << 1 | 1 << 2))) + return 0; + + printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + + if (edx & 1 << 1) { + printk ("frequency"); + can_scale_bus=1; + } + + if ((edx & (1 << 1 | 1 << 2)) == 0x6) + printk (" and "); + + if (edx & 1 << 2) { + printk ("voltage"); + can_scale_vid=1; + } + + printk (".\n"); + return 1; +} + + +static int get_ranges (unsigned char *pst) +{ + unsigned int j; + unsigned int speed; + u8 fid, vid; + + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + if (!powernow_table) + return -ENOMEM; + + for (j=0 ; j < number_scales; j++) { + fid = *pst++; + + powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; + powernow_table[j].index = fid; /* lower 8 bits */ + + speed = powernow_table[j].frequency; + + if ((fid_codes[fid] % 10)==5) { +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (have_a0 == 1) + powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; +#endif + } + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + + vid = *pst++; + powernow_table[j].index |= (vid << 8); /* upper 8 bits */ + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + } + powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; + powernow_table[number_scales].index = 0; + + return 0; +} + + +static void change_FID(int fid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.FID != fid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.FID = fid; + fidvidctl.bits.VIDC = 0; + fidvidctl.bits.FIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_VID(int vid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.VID != vid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.VID = vid; + fidvidctl.bits.FIDC = 0; + fidvidctl.bits.VIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_speed (unsigned int index) +{ + u8 fid, vid; + struct cpufreq_freqs freqs; + union msr_fidvidstatus fidvidstatus; + int cfid; + + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in powernow_decode_bios, + * vid are the upper 8 bits. + */ + + fid = powernow_table[index].index & 0xFF; + vid = (powernow_table[index].index & 0xFF00) >> 8; + + freqs.cpu = 0; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + freqs.old = fsb * fid_codes[cfid] / 10; + + freqs.new = powernow_table[index].frequency; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Now do the magic poking into the MSRs. */ + + if (have_a0 == 1) /* A0 errata 5 */ + local_irq_disable(); + + if (freqs.old > freqs.new) { + /* Going down, so change FID first */ + change_FID(fid); + change_VID(vid); + } else { + /* Going up, so change VID first */ + change_VID(vid); + change_FID(fid); + } + + + if (have_a0 == 1) + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +} + + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + +static struct acpi_processor_performance *acpi_processor_perf; + +static int powernow_acpi_init(void) +{ + int i; + int retval = 0; + union powernow_acpi_control_t pc; + + if (acpi_processor_perf != NULL && powernow_table != NULL) { + retval = -EINVAL; + goto err0; + } + + acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!acpi_processor_perf) { + retval = -ENOMEM; + goto err0; + } + + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { + retval = -EIO; + goto err1; + } + + if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + number_scales = acpi_processor_perf->state_count; + + if (number_scales < 2) { + retval = -ENODEV; + goto err2; + } + + powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + if (!powernow_table) { + retval = -ENOMEM; + goto err2; + } + + pc.val = (unsigned long) acpi_processor_perf->states[0].control; + for (i = 0; i < number_scales; i++) { + u8 fid, vid; + struct acpi_processor_px *state = + &acpi_processor_perf->states[i]; + unsigned int speed, speed_mhz; + + pc.val = (unsigned long) state->control; + dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + i, + (u32) state->core_frequency, + (u32) state->power, + (u32) state->transition_latency, + (u32) state->control, + pc.bits.sgtc); + + vid = pc.bits.vid; + fid = pc.bits.fid; + + powernow_table[i].frequency = fsb * fid_codes[fid] / 10; + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + + speed = powernow_table[i].frequency; + speed_mhz = speed / 1000; + + /* processor_perflib will multiply the MHz value by 1000 to + * get a KHz value (e.g. 1266000). However, powernow-k7 works + * with true KHz values (e.g. 1266768). To ensure that all + * powernow frequencies are available, we must ensure that + * ACPI doesn't restrict them, so we round up the MHz value + * to ensure that perflib's computed KHz value is greater than + * or equal to powernow's KHz value. + */ + if (speed % 1000 > 0) + speed_mhz++; + + if ((fid_codes[fid] % 10)==5) { + if (have_a0 == 1) + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed_mhz, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + + if (state->core_frequency != speed_mhz) { + state->core_frequency = speed_mhz; + dprintk(" Corrected ACPI frequency to %d\n", + speed_mhz); + } + + if (latency < pc.bits.sgtc) + latency = pc.bits.sgtc; + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + } + + powernow_table[i].frequency = CPUFREQ_TABLE_END; + powernow_table[i].index = 0; + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err2: + acpi_processor_unregister_performance(acpi_processor_perf, 0); +err1: + kfree(acpi_processor_perf); +err0: + printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + acpi_processor_perf = NULL; + return retval; +} +#else +static int powernow_acpi_init(void) +{ + printk(KERN_INFO PFX "no support for ACPI processor found." + " Please recompile your kernel with ACPI processor\n"); + return -EINVAL; +} +#endif + +static int powernow_decode_bios (int maxfid, int startvid) +{ + struct psb_s *psb; + struct pst_s *pst; + unsigned int i, j; + unsigned char *p; + unsigned int etuple; + unsigned int ret; + + etuple = cpuid_eax(0x80000001); + + for (i=0xC0000; i < 0xffff0 ; i+=16) { + + p = phys_to_virt(i); + + if (memcmp(p, "AMDK7PNOW!", 10) == 0){ + dprintk ("Found PSB header at %p\n", p); + psb = (struct psb_s *) p; + dprintk ("Table version: 0x%x\n", psb->tableversion); + if (psb->tableversion != 0x12) { + printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); + return -ENODEV; + } + + dprintk ("Flags: 0x%x\n", psb->flags); + if ((psb->flags & 1)==0) { + dprintk ("Mobile voltage regulator\n"); + } else { + dprintk ("Desktop voltage regulator\n"); + } + + latency = psb->settlingtime; + if (latency < 100) { + printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." + "Should be at least 100. Correcting.\n", latency); + latency = 100; + } + dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); + dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); + + p += sizeof (struct psb_s); + + pst = (struct pst_s *) p; + + for (j=0; j<psb->numpst; j++) { + pst = (struct pst_s *) p; + number_scales = pst->numpstates; + + if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && + (maxfid==pst->maxfid) && (startvid==pst->startvid)) + { + dprintk ("PST:%d (@%p)\n", j, pst); + dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); + + ret = get_ranges ((char *) pst + sizeof (struct pst_s)); + return ret; + } else { + unsigned int k; + p = (char *) pst + sizeof (struct pst_s); + for (k=0; k<number_scales; k++) + p+=2; + } + } + printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple); + printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n"); + + return -EINVAL; + } + p++; + } + + return -ENODEV; +} + + +static int powernow_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate; + + if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate)) + return -EINVAL; + + change_speed(newstate); + + return 0; +} + + +static int powernow_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, powernow_table); +} + +/* + * We use the fact that the bus frequency is somehow + * a multiple of 100000/3 khz, then we compute sgtc according + * to this multiple. + * That way, we match more how AMD thinks all of that work. + * We will then get the same kind of behaviour already tested under + * the "well-known" other OS. + */ +static int __init fixup_sgtc(void) +{ + unsigned int sgtc; + unsigned int m; + + m = fsb / 3333; + if ((m % 10) >= 5) + m += 5; + + m /= 10; + + sgtc = 100 * m * latency; + sgtc = sgtc / 3; + if (sgtc > 0xfffff) { + printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); + sgtc = 0xfffff; + } + return sgtc; +} + +static unsigned int powernow_get(unsigned int cpu) +{ + union msr_fidvidstatus fidvidstatus; + unsigned int cfid; + + if (cpu) + return 0; + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + + return (fsb * fid_codes[cfid] / 10); +} + + +static int __init acer_cpufreq_pst(struct dmi_system_id *d) +{ + printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); + printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); + printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); + return 0; +} + +/* + * Some Athlon laptops have really fucked PST tables. + * A BIOS update is all that can save them. + * Mention this, and disable cpufreq. + */ +static struct dmi_system_id __initdata powernow_dmi_table[] = { + { + .callback = acer_cpufreq_pst, + .ident = "Acer Aspire", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), + DMI_MATCH(DMI_BIOS_VERSION, "3A71"), + }, + }, + { } +}; + +static int __init powernow_cpu_init (struct cpufreq_policy *policy) +{ + union msr_fidvidstatus fidvidstatus; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + + recalibrate_cpu_khz(); + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; + if (!fsb) { + printk(KERN_WARNING PFX "can not determine bus frequency\n"); + return -EINVAL; + } + dprintk("FSB: %3dMHz\n", fsb/1000); + + if (dmi_check_system(powernow_dmi_table) || acpi_force) { + printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + result = powernow_acpi_init(); + } else { + result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + if (result) { + printk (KERN_INFO PFX "Trying ACPI perflib\n"); + maximum_speed = 0; + minimum_speed = -1; + latency = 0; + result = powernow_acpi_init(); + if (result) { + printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); + printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); + } + } else { + /* SGTC use the bus clock as timer */ + latency = fixup_sgtc(); + printk(KERN_INFO PFX "SGTC: %d\n", latency); + } + } + + if (result) + return result; + + printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", + minimum_speed/1000, maximum_speed/1000); + + policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); + + policy->cur = powernow_get(0); + + cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, powernow_table); +} + +static int powernow_cpu_exit (struct cpufreq_policy *policy) { + cpufreq_frequency_table_put_attr(policy->cpu); + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (acpi_processor_perf) { + acpi_processor_unregister_performance(acpi_processor_perf, 0); + kfree(acpi_processor_perf); + } +#endif + + kfree(powernow_table); + return 0; +} + +static struct freq_attr* powernow_table_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_driver = { + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, +}; + +static int __init powernow_init (void) +{ + if (check_powernow()==0) + return -ENODEV; + return cpufreq_register_driver(&powernow_driver); +} + + +static void __exit powernow_exit (void) +{ + cpufreq_unregister_driver(&powernow_driver); +} + +module_param(acpi_force, int, 0444); +MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); + +MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); +MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(powernow_init); +module_exit(powernow_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h new file mode 100644 index 00000000000..f8a63b3664e --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h @@ -0,0 +1,44 @@ +/* + * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * AMD-specific information + * + */ + +union msr_fidvidctl { + struct { + unsigned FID:5, // 4:0 + reserved1:3, // 7:5 + VID:5, // 12:8 + reserved2:3, // 15:13 + FIDC:1, // 16 + VIDC:1, // 17 + reserved3:2, // 19:18 + FIDCHGRATIO:1, // 20 + reserved4:11, // 31-21 + SGTC:20, // 32:51 + reserved5:12; // 63:52 + } bits; + unsigned long long val; +}; + +union msr_fidvidstatus { + struct { + unsigned CFID:5, // 4:0 + reserved1:3, // 7:5 + SFID:5, // 12:8 + reserved2:3, // 15:13 + MFID:5, // 20:16 + reserved3:11, // 31:21 + CVID:5, // 36:32 + reserved4:3, // 39:37 + SVID:5, // 44:40 + reserved5:3, // 47:45 + MVID:5, // 52:48 + reserved6:11; // 63:53 + } bits; + unsigned long long val; +}; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c new file mode 100644 index 00000000000..b273b69cfdd --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -0,0 +1,1360 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Support : mark.langsdorf@amd.com + * + * Based on the powernow-k7.c module written by Dave Jones. + * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs + * (C) 2004 Dominik Brodowski <linux@brodo.de> + * (C) 2004 Pavel Machek <pavel@suse.cz> + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Valuable input gratefully received from Dave Jones, Pavel Machek, + * Dominik Brodowski, Jacob Shin, and others. + * Originally developed by Paul Devriendt. + * Processor information obtained from Chapter 9 (Power and Thermal Management) + * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD + * Opteron Processors" available for download from www.amd.com + * + * Tables for specific CPUs can be inferred from + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + */ + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/cpumask.h> +#include <linux/sched.h> /* for current / set_cpus_allowed() */ + +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/delay.h> + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +#include <linux/acpi.h> +#include <linux/mutex.h> +#include <acpi/processor.h> +#endif + +#define PFX "powernow-k8: " +#define BFX PFX "BIOS error: " +#define VERSION "version 2.00.00" +#include "powernow-k8.h" + +/* serialize freq changes */ +static DEFINE_MUTEX(fidvid_mutex); + +static struct powernow_k8_data *powernow_data[NR_CPUS]; + +static int cpu_family = CPU_OPTERON; + +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + +/* Return a frequency in MHz, given an input fid */ +static u32 find_freq_from_fid(u32 fid) +{ + return 800 + (fid * 100); +} + + +/* Return a frequency in KHz, given an input fid */ +static u32 find_khz_freq_from_fid(u32 fid) +{ + return 1000 * find_freq_from_fid(fid); +} + +/* Return a frequency in MHz, given an input fid and did */ +static u32 find_freq_from_fiddid(u32 fid, u32 did) +{ + if (current_cpu_data.x86 == 0x10) + return 100 * (fid + 0x10) >> did; + else + return 100 * (fid + 0x8) >> did; +} + +static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) +{ + return 1000 * find_freq_from_fiddid(fid, did); +} + +static u32 find_fid_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return lo & HW_PSTATE_FID_MASK; +} + +static u32 find_did_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; +} + +/* Return the vco fid for an input fid + * + * Each "low" fid has corresponding "high" fid, and you can get to "low" fids + * only from corresponding high fids. This returns "high" fid corresponding to + * "low" one. + */ +static u32 convert_fid_to_vco_fid(u32 fid) +{ + if (fid < HI_FID_TABLE_BOTTOM) + return 8 + (2 * fid); + else + return fid; +} + +/* + * Return 1 if the pending bit is set. Unless we just instructed the processor + * to transition to a new state, seeing this bit set is really bad news. + */ +static int pending_bit_stuck(void) +{ + u32 lo, hi; + + if (cpu_family == CPU_HW_PSTATE) + return 0; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; +} + +/* + * Update the global current fid / vid values from the status msr. + * Returns 1 on error. + */ +static int query_current_values_with_pending_wait(struct powernow_k8_data *data) +{ + u32 lo, hi; + u32 i = 0; + + if (cpu_family == CPU_HW_PSTATE) { + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); + data->currfid = lo & HW_PSTATE_FID_MASK; + data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + return 0; + } + do { + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); + return 1; + } + rdmsr(MSR_FIDVID_STATUS, lo, hi); + } while (lo & MSR_S_LO_CHANGE_PENDING); + + data->currvid = hi & MSR_S_HI_CURRENT_VID; + data->currfid = lo & MSR_S_LO_CURRENT_FID; + + return 0; +} + +/* the isochronous relief time */ +static void count_off_irt(struct powernow_k8_data *data) +{ + udelay((1 << data->irt) * 10); + return; +} + +/* the voltage stabalization time */ +static void count_off_vst(struct powernow_k8_data *data) +{ + udelay(data->vstable * VST_UNITS_20US); + return; +} + +/* need to init the control msr to a safe value (for each cpu) */ +static void fidvid_msr_init(void) +{ + u32 lo, hi; + u8 fid, vid; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + vid = hi & MSR_S_HI_CURRENT_VID; + fid = lo & MSR_S_LO_CURRENT_FID; + lo = fid | (vid << MSR_C_LO_VID_SHIFT); + hi = MSR_C_HI_STP_GNT_BENIGN; + dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); + wrmsr(MSR_FIDVID_CTL, lo, hi); +} + + +/* write the new fid value along with the other control fields to the msr */ +static int write_new_fid(struct powernow_k8_data *data, u32 fid) +{ + u32 lo; + u32 savevid = data->currvid; + u32 i = 0; + + if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on fid write\n"); + return 1; + } + + lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", + fid, lo, data->plllock * PLL_LOCK_CONVERSION); + + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + count_off_irt(data); + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", + savevid, data->currvid); + return 1; + } + + if (fid != data->currfid) { + printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, + data->currfid); + return 1; + } + + return 0; +} + +/* Write a new vid to the hardware */ +static int write_new_vid(struct powernow_k8_data *data, u32 vid) +{ + u32 lo; + u32 savefid = data->currfid; + int i = 0; + + if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on vid write\n"); + return 1; + } + + lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", + vid, lo, STOP_GRANT_5NS); + + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (vid != data->currvid) { + printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, + data->currvid); + return 1; + } + + return 0; +} + +/* + * Reduce the vid by the max of step or reqvid. + * Decreasing vid codes represent increasing voltages: + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. + */ +static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) +{ + if ((data->currvid - reqvid) > step) + reqvid = data->currvid - step; + + if (write_new_vid(data, reqvid)) + return 1; + + count_off_vst(data); + + return 0; +} + +/* Change hardware pstate by single MSR write */ +static int transition_pstate(struct powernow_k8_data *data, u32 pstate) +{ + wrmsr(MSR_PSTATE_CTRL, pstate, 0); + data->currfid = find_fid_from_pstate(pstate); + return 0; +} + +/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ +static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) +{ + if (core_voltage_pre_transition(data, reqvid)) + return 1; + + if (core_frequency_transition(data, reqfid)) + return 1; + + if (core_voltage_post_transition(data, reqvid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((reqfid != data->currfid) || (reqvid != data->currvid)) { + printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", + smp_processor_id(), + reqfid, reqvid, data->currfid, data->currvid); + return 1; + } + + dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", + smp_processor_id(), data->currfid, data->currvid); + + return 0; +} + +/* Phase 1 - core voltage transition ... setup voltage */ +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 rvosteps = data->rvo; + u32 savefid = data->currfid; + u32 maxvid, lo; + + dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqvid, data->rvo); + + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + + while (data->currvid > reqvid) { + dprintk("ph1: curr 0x%x, req vid 0x%x\n", + data->currvid, reqvid); + if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) + return 1; + } + + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { + rvosteps = 0; + } else { + dprintk("ph1: changing vid for rvo, req 0x%x\n", + data->currvid - 1); + if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) + return 1; + rvosteps--; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); + return 1; + } + + dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 2 - core frequency transition */ +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) +{ + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; + + if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", + reqfid, data->currfid); + return 1; + } + + if (data->currfid == reqfid) { + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); + return 0; + } + + dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqfid); + + vcoreqfid = convert_fid_to_vco_fid(reqfid); + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + + while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + + if (reqfid > data->currfid) { + if (data->currfid > LO_FID_TABLE_TOP) { + if (write_new_fid(data, data->currfid + fid_interval)) { + return 1; + } + } else { + if (write_new_fid + (data, 2 + convert_fid_to_vco_fid(data->currfid))) { + return 1; + } + } + } else { + if (write_new_fid(data, data->currfid - fid_interval)) + return 1; + } + + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + } + + if (write_new_fid(data, reqfid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (data->currfid != reqfid) { + printk(KERN_ERR PFX + "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", + data->currfid, reqfid); + return 1; + } + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", + savevid, data->currvid); + return 1; + } + + dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 3 - core voltage transition flow ... jump to the final vid. */ +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 savefid = data->currfid; + u32 savereqvid = reqvid; + + dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid); + + if (reqvid != data->currvid) { + if (write_new_vid(data, reqvid)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX + "ph3: bad fid change, save 0x%x, curr 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (data->currvid != reqvid) { + printk(KERN_ERR PFX + "ph3: failed vid transition\n, req 0x%x, curr 0x%x", + reqvid, data->currvid); + return 1; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savereqvid != data->currvid) { + dprintk("ph3 failed, currvid 0x%x\n", data->currvid); + return 1; + } + + if (savefid != data->currfid) { + dprintk("ph3 failed, currfid changed 0x%x\n", + data->currfid); + return 1; + } + + dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +static int check_supported_cpu(unsigned int cpu) +{ + cpumask_t oldmask = CPU_MASK_ALL; + u32 eax, ebx, ecx, edx; + unsigned int rc = 0; + + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); + goto out; + } + + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) + goto out; + + eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && + ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) + goto out; + + if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { + printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + goto out; + } + + eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); + if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { + printk(KERN_INFO PFX + "No frequency change capabilities detected\n"); + goto out; + } + + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX "Power state transitions not supported\n"); + goto out; + } + } else { /* must be a HW Pstate capable processor */ + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) + cpu_family = CPU_HW_PSTATE; + else + goto out; + } + + rc = 1; + +out: + set_cpus_allowed(current, oldmask); + return rc; +} + +static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + unsigned int j; + u8 lastfid = 0xff; + + for (j = 0; j < data->numps; j++) { + if (pst[j].vid > LEAST_VID) { + printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); + return -EINVAL; + } + if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ + printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ + printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { + /* Only first fid is allowed to be in "low" range */ + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + return -EINVAL; + } + if (pst[j].fid < lastfid) + lastfid = pst[j].fid; + } + if (lastfid & 1) { + printk(KERN_ERR BFX "lastfid invalid\n"); + return -EINVAL; + } + if (lastfid > LO_FID_TABLE_TOP) + printk(KERN_INFO BFX "first fid not from lo freq table\n"); + + return 0; +} + +static void print_basics(struct powernow_k8_data *data) +{ + int j; + for (j = 0; j < data->numps; j++) { + if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (cpu_family == CPU_HW_PSTATE) { + printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", + j, + (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); + } else { + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + j, + data->powernow_table[j].index & 0xff, + data->powernow_table[j].frequency/1000, + data->powernow_table[j].index >> 8); + } + } + } + if (data->batps) + printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); +} + +static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + struct cpufreq_frequency_table *powernow_table; + unsigned int j; + + if (data->batps) { /* use ACPI support to get full speed on mains power */ + printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); + data->numps = data->batps; + } + + for ( j=1; j<data->numps; j++ ) { + if (pst[j-1].fid >= pst[j].fid) { + printk(KERN_ERR PFX "PST out of sequence\n"); + return -EINVAL; + } + } + + if (data->numps < 2) { + printk(KERN_ERR PFX "no p states to transition\n"); + return -ENODEV; + } + + if (check_pst_table(data, pst, maxvid)) + return -EINVAL; + + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->numps + 1)), GFP_KERNEL); + if (!powernow_table) { + printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); + return -ENOMEM; + } + + for (j = 0; j < data->numps; j++) { + powernow_table[j].index = pst[j].fid; /* lower 8 bits */ + powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ + powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); + } + powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; + powernow_table[data->numps].index = 0; + + if (query_current_values_with_pending_wait(data)) { + kfree(powernow_table); + return -EIO; + } + + dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); + data->powernow_table = powernow_table; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + + for (j = 0; j < data->numps; j++) + if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) + return 0; + + dprintk("currfid/vid do not match PST, ignoring\n"); + return 0; +} + +/* Find and validate the PSB/PST table in BIOS. */ +static int find_psb_table(struct powernow_k8_data *data) +{ + struct psb_s *psb; + unsigned int i; + u32 mvs; + u8 maxvid; + u32 cpst = 0; + u32 thiscpuid; + + for (i = 0xc0000; i < 0xffff0; i += 0x10) { + /* Scan BIOS looking for the signature. */ + /* It can not be at ffff0 - it is too big. */ + + psb = phys_to_virt(i); + if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) + continue; + + dprintk("found PSB header at 0x%p\n", psb); + + dprintk("table vers: 0x%x\n", psb->tableversion); + if (psb->tableversion != PSB_VERSION_1_4) { + printk(KERN_ERR BFX "PSB table is not v1.4\n"); + return -ENODEV; + } + + dprintk("flags: 0x%x\n", psb->flags1); + if (psb->flags1) { + printk(KERN_ERR BFX "unknown flags\n"); + return -ENODEV; + } + + data->vstable = psb->vstable; + dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); + + dprintk("flags2: 0x%x\n", psb->flags2); + data->rvo = psb->flags2 & 3; + data->irt = ((psb->flags2) >> 2) & 3; + mvs = ((psb->flags2) >> 4) & 3; + data->vidmvs = 1 << mvs; + data->batps = ((psb->flags2) >> 6) & 3; + + dprintk("ramp voltage offset: %d\n", data->rvo); + dprintk("isochronous relief time: %d\n", data->irt); + dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); + + dprintk("numpst: 0x%x\n", psb->num_tables); + cpst = psb->num_tables; + if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ + thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { + cpst = 1; + } + } + if (cpst != 1) { + printk(KERN_ERR BFX "numpst must be 1\n"); + return -ENODEV; + } + + data->plllock = psb->plllocktime; + dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); + dprintk("maxfid: 0x%x\n", psb->maxfid); + dprintk("maxvid: 0x%x\n", psb->maxvid); + maxvid = psb->maxvid; + + data->numps = psb->numps; + dprintk("numpstates: 0x%x\n", data->numps); + return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); + } + /* + * If you see this message, complain to BIOS manufacturer. If + * he tells you "we do not support Linux" or some similar + * nonsense, remember that Windows 2000 uses the same legacy + * mechanism that the old Linux PSB driver uses. Tell them it + * is broken with Windows 2000. + * + * The reference to the AMD documentation is chapter 9 in the + * BIOS and Kernel Developer's Guide, which is available on + * www.amd.com + */ + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + return -ENODEV; +} + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) +{ + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + return; + + data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; +} + +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) +{ + struct cpufreq_frequency_table *powernow_table; + int ret_val; + + if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { + dprintk("register performance failed: bad ACPI data\n"); + return -EIO; + } + + /* verify the data contained in the ACPI structures */ + if (data->acpi_data.state_count <= 1) { + dprintk("No ACPI P-States\n"); + goto err_out; + } + + if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + dprintk("Invalid control/status registers (%x - %x)\n", + data->acpi_data.control_register.space_id, + data->acpi_data.status_register.space_id); + goto err_out; + } + + /* fill in data->powernow_table */ + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->acpi_data.state_count + 1)), GFP_KERNEL); + if (!powernow_table) { + dprintk("powernow_table memory alloc failure\n"); + goto err_out; + } + + if (cpu_family == CPU_HW_PSTATE) + ret_val = fill_powernow_table_pstate(data, powernow_table); + else + ret_val = fill_powernow_table_fidvid(data, powernow_table); + if (ret_val) + goto err_out_mem; + + powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].index = 0; + data->powernow_table = powernow_table; + + /* fill in data */ + data->numps = data->acpi_data.state_count; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + powernow_k8_acpi_pst_values(data, 0); + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err_out_mem: + kfree(powernow_table); + +err_out: + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + + /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + data->acpi_data.state_count = 0; + + return -ENODEV; +} + +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 index; + u32 hi = 0, lo = 0; + u32 fid; + u32 did; + + index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + if (index > MAX_HW_PSTATE) { + printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + } + rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); + if (!(hi & HW_PSTATE_VALID_MASK)) { + dprintk("invalid pstate %d, ignoring\n", index); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + fid = lo & HW_PSTATE_FID_MASK; + did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + + dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); + + powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); + + powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + int cntlofreq = 0; + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 fid; + u32 vid; + + if (data->exttype) { + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + } else { + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + } + + dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); + + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + powernow_table[i].frequency = find_khz_freq_from_fid(fid); + + /* verify frequency is OK */ + if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || + (powernow_table[i].frequency < (MIN_FREQ * 1000))) { + dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ + if (vid == VID_OFF) { + dprintk("invalid vid %u, ignoring\n", vid); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this one ... */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + return 1; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; + } + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) +{ + if (data->acpi_data.state_count) + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); +} + +#else +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ + +/* Take a frequency, and issue the fid/vid transition command */ +static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 vid = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* fid/vid correctness check for k8 */ + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in find_psb_table, vid + * are the upper 8 bits. + */ + fid = data->powernow_table[index].index & 0xFF; + vid = (data->powernow_table[index].index & 0xFF00) >> 8; + + dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((data->currvid == vid) && (data->currfid == fid)) { + dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", + fid, vid); + return 0; + } + + if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", + data->currfid, fid); + return 1; + } + + dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", + smp_processor_id(), fid, vid); + freqs.old = find_khz_freq_from_fid(data->currfid); + freqs.new = find_khz_freq_from_fid(fid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_fid_vid(data, fid, vid); + freqs.new = find_khz_freq_from_fid(data->currfid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Take a frequency, and issue the hardware pstate transition command */ +static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 did = 0; + u32 pstate = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* get fid did for hardware pstate transition */ + pstate = index & HW_PSTATE_MASK; + if (pstate > MAX_HW_PSTATE) + return 0; + fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; + did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; + freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_fiddid(fid, did); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_pstate(data, pstate); + data->currfid = find_fid_from_pstate(pstate); + data->currdid = find_did_from_pstate(pstate); + freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Driver entry point to switch to the target frequency */ +static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +{ + cpumask_t oldmask = CPU_MASK_ALL; + struct powernow_k8_data *data = powernow_data[pol->cpu]; + u32 checkfid; + u32 checkvid; + unsigned int newstate; + int ret = -EIO; + + if (!data) + return -EINVAL; + + checkfid = data->currfid; + checkvid = data->currvid; + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing targ, change pending bit set\n"); + goto err_out; + } + + dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", + pol->cpu, targfreq, pol->min, pol->max, relation); + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_HW_PSTATE) + dprintk("targ: curr fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else { + dprintk("targ: curr fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); + } + } + + if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) + goto err_out; + + mutex_lock(&fidvid_mutex); + + powernow_k8_acpi_pst_values(data, newstate); + + if (cpu_family == CPU_HW_PSTATE) + ret = transition_frequency_pstate(data, newstate); + else + ret = transition_frequency_fidvid(data, newstate); + if (ret) { + printk(KERN_ERR PFX "transition frequency failed\n"); + ret = 1; + mutex_unlock(&fidvid_mutex); + goto err_out; + } + mutex_unlock(&fidvid_mutex); + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + ret = 0; + +err_out: + set_cpus_allowed(current, oldmask); + return ret; +} + +/* Driver entry point to verify the policy and range of frequencies */ +static int powernowk8_verify(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + return cpufreq_frequency_table_verify(pol, data->powernow_table); +} + +/* per CPU init entry point to the driver */ +static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = CPU_MASK_ALL; + int rc; + + if (!cpu_online(pol->cpu)) + return -ENODEV; + + if (!check_supported_cpu(pol->cpu)) + return -ENODEV; + + data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); + if (!data) { + printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); + return -ENOMEM; + } + + data->cpu = pol->cpu; + + if (powernow_k8_cpu_init_acpi(data)) { + /* + * Use the PSB BIOS structure. This is only availabe on + * an UP version, and is deprecated by AMD. + */ + if (num_online_cpus() != 1) { + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); + kfree(data); + return -ENODEV; + } + if (pol->cpu != 0) { + printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); + kfree(data); + return -ENODEV; + } + rc = find_psb_table(data); + if (rc) { + kfree(data); + return -ENODEV; + } + } + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + goto err_out; + } + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + /* run on any CPU again */ + set_cpus_allowed(current, oldmask); + + if (cpu_family == CPU_HW_PSTATE) + pol->cpus = cpumask_of_cpu(pol->cpu); + else + pol->cpus = cpu_core_map[pol->cpu]; + data->available_cores = &(pol->cpus); + + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + (3 * (1 << data->irt) * 10)) * 1000; + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + dprintk("policy current frequency %d kHz\n", pol->cur); + + /* min/max the cpu is capable of */ + if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { + printk(KERN_ERR PFX "invalid powernow_table\n"); + powernow_k8_cpu_exit_acpi(data); + kfree(data->powernow_table); + kfree(data); + return -EINVAL; + } + + cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); + + if (cpu_family == CPU_HW_PSTATE) + dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else + dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + powernow_data[pol->cpu] = data; + + return 0; + +err_out: + set_cpus_allowed(current, oldmask); + powernow_k8_cpu_exit_acpi(data); + + kfree(data); + return -ENODEV; +} + +static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + powernow_k8_cpu_exit_acpi(data); + + cpufreq_frequency_table_put_attr(pol->cpu); + + kfree(data->powernow_table); + kfree(data); + + return 0; +} + +static unsigned int powernowk8_get (unsigned int cpu) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = current->cpus_allowed; + unsigned int khz = 0; + + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); + set_cpus_allowed(current, oldmask); + return 0; + } + + if (query_current_values_with_pending_wait(data)) + goto out; + + if (cpu_family == CPU_HW_PSTATE) + khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + khz = find_khz_freq_from_fid(data->currfid); + + +out: + set_cpus_allowed(current, oldmask); + return khz; +} + +static struct freq_attr* powernow_k8_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver cpufreq_amd64_driver = { + .verify = powernowk8_verify, + .target = powernowk8_target, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, +}; + +/* driver entry point for init */ +static int __cpuinit powernowk8_init(void) +{ + unsigned int i, supported_cpus = 0; + + for_each_online_cpu(i) { + if (check_supported_cpu(i)) + supported_cpus++; + } + + if (supported_cpus == num_online_cpus()) { + printk(KERN_INFO PFX "Found %d %s " + "processors (%d cpu cores) (" VERSION ")\n", + num_online_nodes(), + boot_cpu_data.x86_model_id, supported_cpus); + return cpufreq_register_driver(&cpufreq_amd64_driver); + } + + return -ENODEV; +} + +/* driver entry point for term */ +static void __exit powernowk8_exit(void) +{ + dprintk("exit\n"); + + cpufreq_unregister_driver(&cpufreq_amd64_driver); +} + +MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); +MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); +MODULE_LICENSE("GPL"); + +late_initcall(powernowk8_init); +module_exit(powernowk8_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h new file mode 100644 index 00000000000..b06c812208c --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -0,0 +1,232 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + */ + +struct powernow_k8_data { + unsigned int cpu; + + u32 numps; /* number of p-states */ + u32 batps; /* number of p-states supported on battery */ + + /* these values are constant when the PSB is used to determine + * vid/fid pairings, but are modified during the ->target() call + * when ACPI is used */ + u32 rvo; /* ramp voltage offset */ + u32 irt; /* isochronous relief time */ + u32 vidmvs; /* usable value calculated from mvs */ + u32 vstable; /* voltage stabilization time, units 20 us */ + u32 plllock; /* pll lock time, units 1 us */ + u32 exttype; /* extended interface = 1 */ + + /* keep track of the current fid / vid or did */ + u32 currvid, currfid, currdid; + + /* the powernow_table includes all frequency and vid/fid pairings: + * fid are the lower 8 bits of the index, vid are the upper 8 bits. + * frequency is in kHz */ + struct cpufreq_frequency_table *powernow_table; + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI + /* the acpi table needs to be kept. it's only available if ACPI was + * used to determine valid frequency/vid/fid states */ + struct acpi_processor_performance acpi_data; +#endif + /* we need to keep track of associated cores, but let cpufreq + * handle hotplug events - so just point at cpufreq pol->cpus + * structure */ + cpumask_t *available_cores; +}; + + +/* processor's cpuid instruction support */ +#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ +#define CPUID_XFAM 0x0ff00000 /* extended family */ +#define CPUID_XFAM_K8 0 +#define CPUID_XMOD 0x000f0000 /* extended model */ +#define CPUID_XMOD_REV_MASK 0x00080000 +#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ +#define CPUID_USE_XFAM_XMOD 0x00000f00 +#define CPUID_GET_MAX_CAPABILITIES 0x80000000 +#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 +#define P_STATE_TRANSITION_CAPABLE 6 + +/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ +/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ +/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ +/* the register number is placed in ecx, and the data is returned in edx:eax. */ + +#define MSR_FIDVID_CTL 0xc0010041 +#define MSR_FIDVID_STATUS 0xc0010042 + +/* Field definitions within the FID VID Low Control MSR : */ +#define MSR_C_LO_INIT_FID_VID 0x00010000 +#define MSR_C_LO_NEW_VID 0x00003f00 +#define MSR_C_LO_NEW_FID 0x0000003f +#define MSR_C_LO_VID_SHIFT 8 + +/* Field definitions within the FID VID High Control MSR : */ +#define MSR_C_HI_STP_GNT_TO 0x000fffff + +/* Field definitions within the FID VID Low Status MSR : */ +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 +#define MSR_S_LO_MAX_FID 0x003f0000 +#define MSR_S_LO_START_FID 0x00003f00 +#define MSR_S_LO_CURRENT_FID 0x0000003f + +/* Field definitions within the FID VID High Status MSR : */ +#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 +#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 +#define MSR_S_HI_START_VID 0x00003f00 +#define MSR_S_HI_CURRENT_VID 0x0000003f +#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 + + +/* Hardware Pstate _PSS and MSR definitions */ +#define USE_HW_PSTATE 0x00000080 +#define HW_PSTATE_FID_MASK 0x0000003f +#define HW_PSTATE_DID_MASK 0x000001c0 +#define HW_PSTATE_DID_SHIFT 6 +#define HW_PSTATE_MASK 0x00000007 +#define HW_PSTATE_VALID_MASK 0x80000000 +#define HW_FID_INDEX_SHIFT 8 +#define HW_FID_INDEX_MASK 0x0000ff00 +#define HW_DID_INDEX_SHIFT 16 +#define HW_DID_INDEX_MASK 0x00ff0000 +#define HW_WATTS_MASK 0xff +#define HW_PWR_DVR_MASK 0x300 +#define HW_PWR_DVR_SHIFT 8 +#define HW_PWR_MAX_MULT 3 +#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ +#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ +#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ + +/* define the two driver architectures */ +#define CPU_OPTERON 0 +#define CPU_HW_PSTATE 1 + + +/* + * There are restrictions frequencies have to follow: + * - only 1 entry in the low fid table ( <=1.4GHz ) + * - lowest entry in the high fid table must be >= 2 * the entry in the + * low fid table + * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry + * in the low fid table + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. + * - lowest frequency must be >= interprocessor hypertransport link speed + * (only applies to MP systems obviously) + */ + +/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ +#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ + +#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ +#define HI_VCOFREQ_TABLE_BOTTOM 1600 + +#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ + +#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ +#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ + +#define MIN_FREQ 800 /* Min and max freqs, per spec */ +#define MAX_FREQ 5000 + +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ +#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ + +#define VID_OFF 0x3f + +#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ + +#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ + +#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ +#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ + +/* + * Most values of interest are enocoded in a single field of the _PSS + * entries: the "control" value. + */ + +#define IRT_SHIFT 30 +#define RVO_SHIFT 28 +#define EXT_TYPE_SHIFT 27 +#define PLL_L_SHIFT 20 +#define MVS_SHIFT 18 +#define VST_SHIFT 11 +#define VID_SHIFT 6 +#define IRT_MASK 3 +#define RVO_MASK 3 +#define EXT_TYPE_MASK 1 +#define PLL_L_MASK 0x7f +#define MVS_MASK 3 +#define VST_MASK 0x7f +#define VID_MASK 0x1f +#define FID_MASK 0x1f +#define EXT_VID_MASK 0x3f +#define EXT_FID_MASK 0x3f + + +/* + * Version 1.4 of the PSB table. This table is constructed by BIOS and is + * to tell the OS's power management driver which VIDs and FIDs are + * supported by this particular processor. + * If the data in the PSB / PST is wrong, then this driver will program the + * wrong values into hardware, which is very likely to lead to a crash. + */ + +#define PSB_ID_STRING "AMDK7PNOW!" +#define PSB_ID_STRING_LEN 10 + +#define PSB_VERSION_1_4 0x14 + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags1; + u16 vstable; + u8 flags2; + u8 num_tables; + u32 cpuid; + u8 plllocktime; + u8 maxfid; + u8 maxvid; + u8 numps; +}; + +/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ +struct pst_s { + u8 fid; + u8 vid; +}; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) + +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); + +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +#endif + +#ifdef CONFIG_SMP +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ +} +#else +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ + cpu_set(0, cpu_sharedcore_mask[0]); +} +#endif diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 00000000000..d9f3e90a7ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,190 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young <sean@mess.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/delay.h> +#include <linux/cpufreq.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int err; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + err = cpufreq_register_driver(&sc520_freq_driver); + if (err) + iounmap(cpuctl); + + return err; +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young <sean@mess.org>"); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c new file mode 100644 index 00000000000..811d4743854 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -0,0 +1,633 @@ +/* + * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium + * M (part of the Centrino chipset). + * + * Since the original Pentium M, most new Intel CPUs support Enhanced + * SpeedStep. + * + * Despite the "SpeedStep" in the name, this is almost entirely unlike + * traditional SpeedStep. + * + * Modelled on speedstep.c + * + * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/sched.h> /* current */ +#include <linux/delay.h> +#include <linux/compiler.h> + +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> + +#define PFX "speedstep-centrino: " +#define MAINTAINER "cpufreq@lists.linux.org.uk" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) + +#define INTEL_MSR_RANGE (0xffff) + +struct cpu_id +{ + __u8 x86; /* CPU family */ + __u8 x86_model; /* model */ + __u8 x86_mask; /* stepping */ +}; + +enum { + CPU_BANIAS, + CPU_DOTHAN_A1, + CPU_DOTHAN_A2, + CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, +}; + +static const struct cpu_id cpu_ids[] = { + [CPU_BANIAS] = { 6, 9, 5 }, + [CPU_DOTHAN_A1] = { 6, 13, 1 }, + [CPU_DOTHAN_A2] = { 6, 13, 2 }, + [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, +}; +#define N_IDS ARRAY_SIZE(cpu_ids) + +struct cpu_model +{ + const struct cpu_id *cpu_id; + const char *model_name; + unsigned max_freq; /* max clock in kHz */ + + struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ +}; +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); + +/* Operating points for current CPU */ +static struct cpu_model *centrino_model[NR_CPUS]; +static const struct cpu_id *centrino_cpu[NR_CPUS]; + +static struct cpufreq_driver centrino_driver; + +#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE + +/* Computes the correct form for IA32_PERF_CTL MSR for a particular + frequency/voltage operating point; frequency in MHz, volts in mV. + This is stored as "index" in the structure. */ +#define OP(mhz, mv) \ + { \ + .frequency = (mhz) * 1000, \ + .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ + } + +/* + * These voltage tables were derived from the Intel Pentium M + * datasheet, document 25261202.pdf, Table 5. I have verified they + * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium + * M. + */ + +/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ +static struct cpufreq_frequency_table banias_900[] = +{ + OP(600, 844), + OP(800, 988), + OP(900, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ +static struct cpufreq_frequency_table banias_1000[] = +{ + OP(600, 844), + OP(800, 972), + OP(900, 988), + OP(1000, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ +static struct cpufreq_frequency_table banias_1100[] = +{ + OP( 600, 956), + OP( 800, 1020), + OP( 900, 1100), + OP(1000, 1164), + OP(1100, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + + +/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ +static struct cpufreq_frequency_table banias_1200[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP( 900, 1020), + OP(1000, 1100), + OP(1100, 1164), + OP(1200, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.30GHz (Banias) */ +static struct cpufreq_frequency_table banias_1300[] = +{ + OP( 600, 956), + OP( 800, 1260), + OP(1000, 1292), + OP(1200, 1356), + OP(1300, 1388), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.40GHz (Banias) */ +static struct cpufreq_frequency_table banias_1400[] = +{ + OP( 600, 956), + OP( 800, 1180), + OP(1000, 1308), + OP(1200, 1436), + OP(1400, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.50GHz (Banias) */ +static struct cpufreq_frequency_table banias_1500[] = +{ + OP( 600, 956), + OP( 800, 1116), + OP(1000, 1228), + OP(1200, 1356), + OP(1400, 1452), + OP(1500, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.60GHz (Banias) */ +static struct cpufreq_frequency_table banias_1600[] = +{ + OP( 600, 956), + OP( 800, 1036), + OP(1000, 1164), + OP(1200, 1276), + OP(1400, 1420), + OP(1600, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.70GHz (Banias) */ +static struct cpufreq_frequency_table banias_1700[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP(1000, 1116), + OP(1200, 1228), + OP(1400, 1308), + OP(1700, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; +#undef OP + +#define _BANIAS(cpuid, max, name) \ +{ .cpu_id = cpuid, \ + .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ + .max_freq = (max)*1000, \ + .op_points = banias_##max, \ +} +#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) + +/* CPU models, their operating frequency range, and freq/voltage + operating points */ +static struct cpu_model models[] = +{ + _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), + BANIAS(1000), + BANIAS(1100), + BANIAS(1200), + BANIAS(1300), + BANIAS(1400), + BANIAS(1500), + BANIAS(1600), + BANIAS(1700), + + /* NULL model_name is a wildcard */ + { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, + + { NULL, } +}; +#undef _BANIAS +#undef BANIAS + +static int centrino_cpu_init_table(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpu_model *model; + + for(model = models; model->cpu_id != NULL; model++) + if (centrino_verify_cpu_id(cpu, model->cpu_id) && + (model->model_name == NULL || + strcmp(cpu->x86_model_id, model->model_name) == 0)) + break; + + if (model->cpu_id == NULL) { + /* No match at all */ + dprintk("no support for CPU model \"%s\": " + "send /proc/cpuinfo to " MAINTAINER "\n", + cpu->x86_model_id); + return -ENOENT; + } + + if (model->op_points == NULL) { + /* Matched a non-match */ + dprintk("no table support for CPU model \"%s\"\n", + cpu->x86_model_id); + dprintk("try using the acpi-cpufreq driver\n"); + return -ENOENT; + } + + centrino_model[policy->cpu] = model; + + dprintk("found \"%s\": max frequency: %dkHz\n", + model->model_name, model->max_freq); + + return 0; +} + +#else +static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } +#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ + +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) +{ + if ((c->x86 == x->x86) && + (c->x86_model == x->x86_model) && + (c->x86_mask == x->x86_mask)) + return 1; + return 0; +} + +/* To be called only after centrino_model is initialized */ +static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) +{ + int i; + + /* + * Extract clock in kHz from PERF_CTL value + * for centrino, as some DSDTs are buggy. + * Ideally, this can be done using the acpi_data structure. + */ + if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { + msr = (msr >> 8) & 0xff; + return msr * 100000; + } + + if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) + return 0; + + msr &= 0xffff; + for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == centrino_model[cpu]->op_points[i].index) + return centrino_model[cpu]->op_points[i].frequency; + } + if (failsafe) + return centrino_model[cpu]->op_points[i-1].frequency; + else + return 0; +} + +/* Return the current CPU frequency in kHz */ +static unsigned int get_cur_freq(unsigned int cpu) +{ + unsigned l, h; + unsigned clock_freq; + cpumask_t saved_mask; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) + return 0; + + rdmsr(MSR_IA32_PERF_STATUS, l, h); + clock_freq = extract_clock(l, cpu, 0); + + if (unlikely(clock_freq == 0)) { + /* + * On some CPUs, we can see transient MSR values (which are + * not present in _PSS), while CPU is doing some automatic + * P-state transition (like TM2). Get the last freq set + * in PERF_CTL. + */ + rdmsr(MSR_IA32_PERF_CTL, l, h); + clock_freq = extract_clock(l, cpu, 1); + } + + set_cpus_allowed(current, saved_mask); + return clock_freq; +} + + +static int centrino_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + unsigned freq; + unsigned l, h; + int ret; + int i; + + /* Only Intel makes Enhanced Speedstep-capable CPUs */ + if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) + centrino_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (policy->cpu != 0) + return -ENODEV; + + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; + + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; + + if (!centrino_cpu[policy->cpu]) { + dprintk("found unsupported CPU with " + "Enhanced SpeedStep: send /proc/cpuinfo to " + MAINTAINER "\n"); + return -ENODEV; + } + + if (centrino_cpu_init_table(policy)) { + return -ENODEV; + } + + /* Check to see if Enhanced SpeedStep is enabled, and try to + enable it if not. */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + + if (!(l & (1<<16))) { + l |= (1<<16); + dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); + wrmsr(MSR_IA32_MISC_ENABLE, l, h); + + /* check to see if it stuck */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + if (!(l & (1<<16))) { + printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); + return -ENODEV; + } + } + + freq = get_cur_freq(policy->cpu); + + policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ + policy->cur = freq; + + dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); + + ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); + if (ret) + return (ret); + + cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); + + return 0; +} + +static int centrino_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + + if (!centrino_model[cpu]) + return -ENODEV; + + cpufreq_frequency_table_put_attr(cpu); + + centrino_model[cpu] = NULL; + + return 0; +} + +/** + * centrino_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within this model's frequency range at least one + * border included. + */ +static int centrino_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); +} + +/** + * centrino_setpolicy - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int centrino_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; + int retval = 0; + unsigned int j, k, first_cpu, tmp; + + if (unlikely(centrino_model[cpu] == NULL)) + return -ENODEV; + + if (unlikely(cpufreq_frequency_table_target(policy, + centrino_model[cpu]->op_points, + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + saved_mask = current->cpus_allowed; + first_cpu = 1; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + preempt_disable(); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + retval = -EAGAIN; + if (first_cpu) { + /* We haven't started the transition yet. */ + goto migrate_end; + } + preempt_enable(); + break; + } + + msr = centrino_model[cpu]->op_points[newstate].index; + + if (first_cpu) { + rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (msr == (oldmsr & 0xffff)) { + dprintk("no change needed - msr was and needs " + "to be %x\n", oldmsr); + retval = 0; + goto migrate_end; + } + + freqs.old = extract_clock(oldmsr, cpu, 0); + freqs.new = extract_clock(msr, cpu, 0); + + dprintk("target=%dkHz old=%d new=%d msr=%04x\n", + target_freq, freqs.old, freqs.new, msr); + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, + CPUFREQ_PRECHANGE); + } + + first_cpu = 0; + /* all but 16 LSB are reserved, treat them with care */ + oldmsr &= ~0xffff; + msr &= 0xffff; + oldmsr |= msr; + } + + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + preempt_enable(); + break; + } + + cpu_set(j, covered_cpus); + preempt_enable(); + } + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + if (unlikely(retval)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to policy->cpus and + * MSRs have already been written on coverd_cpus. + * Best effort undo.. + */ + + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + set_cpus_allowed(current, cpumask_of_cpu(j)); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + } + } + + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + } + set_cpus_allowed(current, saved_mask); + return 0; + +migrate_end: + preempt_enable(); + set_cpus_allowed(current, saved_mask); + return 0; +} + +static struct freq_attr* centrino_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver centrino_driver = { + .name = "centrino", /* should be speedstep-centrino, + but there's a 16 char limit */ + .init = centrino_cpu_init, + .exit = centrino_cpu_exit, + .verify = centrino_verify, + .target = centrino_target, + .get = get_cur_freq, + .attr = centrino_attr, + .owner = THIS_MODULE, +}; + + +/** + * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver + * + * Initializes the Enhanced SpeedStep support. Returns -ENODEV on + * unsupported devices, -ENOENT if there's no voltage table for this + * particular CPU model, -EINVAL on problems during initiatization, + * and zero on success. + * + * This is quite picky. Not only does the CPU have to advertise the + * "est" flag in the cpuid capability flags, we look for a specific + * CPU model and stepping, and we need to have the exact model name in + * our voltage tables. That is, be paranoid about not releasing + * someone's valuable magic smoke. + */ +static int __init centrino_init(void) +{ + struct cpuinfo_x86 *cpu = cpu_data; + + if (!cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + return cpufreq_register_driver(¢rino_driver); +} + +static void __exit centrino_exit(void) +{ + cpufreq_unregister_driver(¢rino_driver); +} + +MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); +MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(centrino_init); +module_exit(centrino_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c new file mode 100644 index 00000000000..36685e8f7be --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -0,0 +1,439 @@ +/* + * (C) 2001 Dave Jones, Arjan van de ven. + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information, and on Intel documentation + * for chipsets ICH2-M and ICH3-M. + * + * Many thanks to Ducrot Bruno for finding and fixing the last + * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler + * for extensive testing. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/sched.h> + +#include "speedstep-lib.h" + + +/* speedstep_chipset: + * It is necessary to know which chipset is used. As accesses to + * this device occur at various places in this module, we need a + * static struct pci_dev * pointing to that device. + */ +static struct pci_dev *speedstep_chipset_dev; + + +/* speedstep_processor + */ +static unsigned int speedstep_processor = 0; + +static u32 pmbase; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) + + +/** + * speedstep_find_register - read the PMBASE address + * + * Returns: -ENODEV if no register could be found + */ +static int speedstep_find_register (void) +{ + if (!speedstep_chipset_dev) + return -ENODEV; + + /* get PMBASE */ + pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); + if (!(pmbase & 0x01)) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + pmbase &= 0xFFFFFFFE; + if (!pmbase) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + /* read state */ + value = inb(pmbase + 0x50); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + /* write new state */ + value &= 0xFE; + value |= state; + + dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); + + /* Disable bus master arbitration */ + pm2_blk = inb(pmbase + 0x20); + pm2_blk |= 0x01; + outb(pm2_blk, (pmbase + 0x20)); + + /* Actual transition */ + outb(value, (pmbase + 0x50)); + + /* Restore bus master arbitration */ + pm2_blk &= 0xfe; + outb(pm2_blk, (pmbase + 0x20)); + + /* check if transition was successful */ + value = inb(pmbase + 0x50); + + /* Enable IRQs */ + local_irq_restore(flags); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + if (state == (value & 0x1)) { + dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); + } else { + printk (KERN_ERR "cpufreq: change failed - I/O error\n"); + } + + return; +} + + +/** + * speedstep_activate - activate SpeedStep control in the chipset + * + * Tries to activate the SpeedStep status and control registers. + * Returns -EINVAL on an unsupported chipset, and zero on success. + */ +static int speedstep_activate (void) +{ + u16 value = 0; + + if (!speedstep_chipset_dev) + return -EINVAL; + + pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); + if (!(value & 0x08)) { + value |= 0x08; + dprintk("activating SpeedStep (TM) registers\n"); + pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); + } + + return 0; +} + + +/** + * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic + * + * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to + * the LPC bridge / PM module which contains all power-management + * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected + * chipset, or zero on failure. + */ +static unsigned int speedstep_detect_chipset (void) +{ + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 4; /* 4-M */ + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801CA_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 3; /* 3-M */ + + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801BA_10, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) { + /* speedstep.c causes lockups on Dell Inspirons 8000 and + * 8100 which use a pretty old revision of the 82815 + * host brige. Abort on these systems. + */ + static struct pci_dev *hostbridge; + + hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82815_MC, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + + if (!hostbridge) + return 2; /* 2-M */ + + if (hostbridge->revision < 5) { + dprintk("hostbridge does not support speedstep\n"); + speedstep_chipset_dev = NULL; + pci_dev_put(hostbridge); + return 0; + } + + pci_dev_put(hostbridge); + return 2; /* 2-M */ + } + + return 0; +} + +static unsigned int _speedstep_get(cpumask_t cpus) +{ + unsigned int speed; + cpumask_t cpus_allowed; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpus); + speed = speedstep_get_processor_frequency(speedstep_processor); + set_cpus_allowed(current, cpus_allowed); + dprintk("detected %u kHz as current frequency\n", speed); + return speed; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + return _speedstep_get(cpumask_of_cpu(cpu)); +} + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + cpumask_t cpus_allowed; + int i; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = _speedstep_get(policy->cpus); + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = policy->cpu; + + dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); + + /* no transition necessary */ + if (freqs.old == freqs.new) + return 0; + + cpus_allowed = current->cpus_allowed; + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* switch to physical CPU where state is to be changed */ + set_cpus_allowed(current, policy->cpus); + + speedstep_set_state(newstate); + + /* allow to be run on all CPUs */ + set_cpus_allowed(current, cpus_allowed); + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + unsigned int speed; + cpumask_t cpus_allowed; + + /* only run on CPU to be set, or on its sibling */ +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, policy->cpus); + + /* detect low and high frequency and transition latency */ + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, + &speedstep_set_state); + set_cpus_allowed(current, cpus_allowed); + if (result) + return result; + + /* get current speed setting */ + speed = _speedstep_get(policy->cpus); + if (!speed) + return -EIO; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-ich", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + /* detect processor */ + speedstep_processor = speedstep_detect_processor(); + if (!speedstep_processor) { + dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); + return -ENODEV; + } + + /* detect chipset */ + if (!speedstep_detect_chipset()) { + dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); + return -ENODEV; + } + + /* activate speedstep support */ + if (speedstep_activate()) { + pci_dev_put(speedstep_chipset_dev); + return -EINVAL; + } + + if (speedstep_find_register()) + return -ENODEV; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + pci_dev_put(speedstep_chipset_dev); + cpufreq_unregister_driver(&speedstep_driver); +} + + +MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c new file mode 100644 index 00000000000..b1acc8ce316 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -0,0 +1,444 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> + +#include <asm/msr.h> +#include "speedstep-lib.h" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +static int relaxed_check = 0; +#else +#define relaxed_check 0 +#endif + +/********************************************************************* + * GET PROCESSOR CORE SPEED IN KHZ * + *********************************************************************/ + +static unsigned int pentium3_get_frequency (unsigned int processor) +{ + /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ + struct { + unsigned int ratio; /* Frequency Multiplier (x10) */ + u8 bitmap; /* power on configuration bits + [27, 25:22] (in MSR 0x2a) */ + } msr_decode_mult [] = { + { 30, 0x01 }, + { 35, 0x05 }, + { 40, 0x02 }, + { 45, 0x06 }, + { 50, 0x00 }, + { 55, 0x04 }, + { 60, 0x0b }, + { 65, 0x0f }, + { 70, 0x09 }, + { 75, 0x0d }, + { 80, 0x0a }, + { 85, 0x26 }, + { 90, 0x20 }, + { 100, 0x2b }, + { 0, 0xff } /* error or unknown value */ + }; + + /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ + struct { + unsigned int value; /* Front Side Bus speed in MHz */ + u8 bitmap; /* power on configuration bits [18: 19] + (in MSR 0x2a) */ + } msr_decode_fsb [] = { + { 66, 0x0 }, + { 100, 0x2 }, + { 133, 0x1 }, + { 0, 0xff} + }; + + u32 msr_lo, msr_tmp; + int i = 0, j = 0; + + /* read MSR 0x2a - we only need the low 32 bits */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + msr_tmp = msr_lo; + + /* decode the FSB */ + msr_tmp &= 0x00c0000; + msr_tmp >>= 18; + while (msr_tmp != msr_decode_fsb[i].bitmap) { + if (msr_decode_fsb[i].bitmap == 0xff) + return 0; + i++; + } + + /* decode the multiplier */ + if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { + dprintk("workaround for early PIIIs\n"); + msr_lo &= 0x03c00000; + } else + msr_lo &= 0x0bc00000; + msr_lo >>= 22; + while (msr_lo != msr_decode_mult[j].bitmap) { + if (msr_decode_mult[j].bitmap == 0xff) + return 0; + j++; + } + + dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); + + return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); +} + + +static unsigned int pentiumM_get_frequency(void) +{ + u32 msr_lo, msr_tmp; + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + /* see table B-2 of 24547212.pdf */ + if (msr_lo & 0x00040000) { + printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); + return 0; + } + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); + + return (msr_tmp * 100 * 1000); +} + +static unsigned int pentium_core_get_frequency(void) +{ + u32 fsb = 0; + u32 msr_lo, msr_tmp; + + rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); + /* see table B-2 of 25366920.pdf */ + switch (msr_lo & 0x07) { + case 5: + fsb = 100000; + break; + case 1: + fsb = 133333; + break; + case 3: + fsb = 166667; + break; + default: + printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); + } + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + + return (msr_tmp * fsb); +} + + +static unsigned int pentium4_get_frequency(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + u32 msr_lo, msr_hi, mult; + unsigned int fsb = 0; + + rdmsr(0x2c, msr_lo, msr_hi); + + dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); + + /* decode the FSB: see IA-32 Intel (C) Architecture Software + * Developer's Manual, Volume 3: System Prgramming Guide, + * revision #12 in Table B-1: MSRs in the Pentium 4 and + * Intel Xeon Processors, on page B-4 and B-5. + */ + if (c->x86_model < 2) + fsb = 100 * 1000; + else { + u8 fsb_code = (msr_lo >> 16) & 0x7; + switch (fsb_code) { + case 0: + fsb = 100 * 1000; + break; + case 1: + fsb = 13333 * 10; + break; + case 2: + fsb = 200 * 1000; + break; + } + } + + if (!fsb) + printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); + + /* Multiplier. */ + if (c->x86_model < 2) + mult = msr_lo >> 27; + else + mult = msr_lo >> 24; + + dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); + + return (fsb * mult); +} + + +unsigned int speedstep_get_processor_frequency(unsigned int processor) +{ + switch (processor) { + case SPEEDSTEP_PROCESSOR_PCORE: + return pentium_core_get_frequency(); + case SPEEDSTEP_PROCESSOR_PM: + return pentiumM_get_frequency(); + case SPEEDSTEP_PROCESSOR_P4D: + case SPEEDSTEP_PROCESSOR_P4M: + return pentium4_get_frequency(); + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + return pentium3_get_frequency(processor); + default: + return 0; + }; + return 0; +} +EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); + + +/********************************************************************* + * DETECT SPEEDSTEP-CAPABLE PROCESSOR * + *********************************************************************/ + +unsigned int speedstep_detect_processor (void) +{ + struct cpuinfo_x86 *c = cpu_data; + u32 ebx, msr_lo, msr_hi; + + dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); + + if ((c->x86_vendor != X86_VENDOR_INTEL) || + ((c->x86 != 6) && (c->x86 != 0xF))) + return 0; + + if (c->x86 == 0xF) { + /* Intel Mobile Pentium 4-M + * or Intel Mobile Pentium 4 with 533 MHz FSB */ + if (c->x86_model != 2) + return 0; + + ebx = cpuid_ebx(0x00000001); + ebx &= 0x000000FF; + + dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + + switch (c->x86_mask) { + case 4: + /* + * B-stepping [M-P4-M] + * sample has ebx = 0x0f, production has 0x0e. + */ + if ((ebx == 0x0e) || (ebx == 0x0f)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 7: + /* + * C-stepping [M-P4-M] + * needs to have ebx=0x0e, else it's a celeron: + * cf. 25130917.pdf / page 7, footnote 5 even + * though 25072120.pdf / page 7 doesn't say + * samples are only of B-stepping... + */ + if (ebx == 0x0e) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 9: + /* + * D-stepping [M-P4-M or M-P4/533] + * + * this is totally strange: CPUID 0x0F29 is + * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. + * The latter need to be sorted out as they don't + * support speedstep. + * Celerons with CPUID 0x0F29 may have either + * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything + * specific. + * M-P4-Ms may have either ebx=0xe or 0xf [see above] + * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] + * also, M-P4M HTs have ebx=0x8, too + * For now, they are distinguished by the model_id string + */ + if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + default: + break; + } + return 0; + } + + switch (c->x86_model) { + case 0x0B: /* Intel PIII [Tualatin] */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ + ebx = cpuid_ebx(0x00000001); + dprintk("ebx is %x\n", ebx); + + ebx &= 0x000000FF; + + if (ebx != 0x06) + return 0; + + /* So far all PIII-M processors support SpeedStep. See + * Intel's 24540640.pdf of June 2003 + */ + return SPEEDSTEP_PROCESSOR_PIII_T; + + case 0x08: /* Intel PIII [Coppermine] */ + + /* all mobile PIII Coppermines have FSB 100 MHz + * ==> sort out a few desktop PIIIs. */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); + msr_lo &= 0x00c0000; + if (msr_lo != 0x0080000) + return 0; + + /* + * If the processor is a mobile version, + * platform ID has bit 50 set + * it has SpeedStep technology if either + * bit 56 or 57 is set + */ + rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { + if (c->x86_mask == 0x01) { + dprintk("early PIII version\n"); + return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; + } else + return SPEEDSTEP_PROCESSOR_PIII_C; + } + + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(speedstep_detect_processor); + + +/********************************************************************* + * DETECT SPEEDSTEP SPEEDS * + *********************************************************************/ + +unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)) +{ + unsigned int prev_speed; + unsigned int ret = 0; + unsigned long flags; + struct timeval tv1, tv2; + + if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) + return -EINVAL; + + dprintk("trying to determine both speeds\n"); + + /* get current speed */ + prev_speed = speedstep_get_processor_frequency(processor); + if (!prev_speed) + return -EIO; + + dprintk("previous speed is %u\n", prev_speed); + + local_irq_save(flags); + + /* switch to low state */ + set_state(SPEEDSTEP_LOW); + *low_speed = speedstep_get_processor_frequency(processor); + if (!*low_speed) { + ret = -EIO; + goto out; + } + + dprintk("low speed is %u\n", *low_speed); + + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + + /* switch to high state */ + set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + + *high_speed = speedstep_get_processor_frequency(processor); + if (!*high_speed) { + ret = -EIO; + goto out; + } + + dprintk("high speed is %u\n", *high_speed); + + if (*low_speed == *high_speed) { + ret = -ENODEV; + goto out; + } + + /* switch to previous state, if necessary */ + if (*high_speed != prev_speed) + set_state(SPEEDSTEP_LOW); + + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + +out: + local_irq_restore(flags); + return (ret); +} +EXPORT_SYMBOL_GPL(speedstep_get_freqs); + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +module_param(relaxed_check, int, 0444); +MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); +#endif + +MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); +MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h new file mode 100644 index 00000000000..b11bcc608ca --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -0,0 +1,49 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + + +/* processors */ + +#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ + +/* the following processors are not speedstep-capable and are not auto-detected + * in speedstep_detect_processor(). However, their speed can be detected using + * the speedstep_get_processor_frequency() call. */ +#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ +#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ + +/* speedstep states -- only two of them */ + +#define SPEEDSTEP_HIGH 0x00000000 +#define SPEEDSTEP_LOW 0x00000001 + + +/* detect a speedstep-capable processor */ +extern unsigned int speedstep_detect_processor (void); + +/* detect the current speed (in khz) of the processor */ +extern unsigned int speedstep_get_processor_frequency(unsigned int processor); + + +/* detect the low and high speeds of the processor. The callback + * set_state"'s first argument is either SPEEDSTEP_HIGH or + * SPEEDSTEP_LOW; the second argument is zero so that no + * cpufreq_notify_transition calls are initiated. + */ +extern unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c new file mode 100644 index 00000000000..f2b5a621d27 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -0,0 +1,423 @@ +/* + * Intel SpeedStep SMI driver. + * + * (C) 2003 Hiroshi Miura <miura@da-cha.org> + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <asm/ist.h> +#include <asm/io.h> + +#include "speedstep-lib.h" + +/* speedstep system management interface port/command. + * + * These parameters are got from IST-SMI BIOS call. + * If user gives it, these are used. + * + */ +static int smi_port = 0; +static int smi_cmd = 0; +static unsigned int smi_sig = 0; + +/* info about the processor */ +static unsigned int speedstep_processor = 0; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +#define GET_SPEEDSTEP_OWNER 0 +#define GET_SPEEDSTEP_STATE 1 +#define SET_SPEEDSTEP_STATE 2 +#define GET_SPEEDSTEP_FREQS 4 + +/* how often shall the SMI call be tried if it failed, e.g. because + * of DMA activity going on? */ +#define SMI_TRIES 5 + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) + +/** + * speedstep_smi_ownership + */ +static int speedstep_smi_ownership (void) +{ + u32 command, result, magic; + u32 function = GET_SPEEDSTEP_OWNER; + unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + magic = virt_to_phys(magic_data); + + dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__( + "out %%al, (%%dx)\n" + : "=D" (result) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" + ); + + dprintk("result is %x\n", result); + + return result; +} + +/** + * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. + * @low: the low frequency value is placed here + * @high: the high frequency value is placed here + * + * Only available on later SpeedStep-enabled systems, returns false results or + * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing + * shows that the latter occurs if !(ist_info.event & 0xFFFF). + */ +static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) +{ + u32 command, result = 0, edi, high_mhz, low_mhz; + u32 state=0; + u32 function = GET_SPEEDSTEP_FREQS; + + if (!(ist_info.event & 0xFFFF)) { + dprintk("bug #1422 -- can't read freqs from BIOS\n"); + return -ENODEV; + } + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + + dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); + + /* abort if results are obviously incorrect... */ + if ((high_mhz + low_mhz) < 600) + return -EINVAL; + + *high = high_mhz * 1000; + *low = low_mhz * 1000; + + return result; +} + +/** + * speedstep_get_state - set the SpeedStep state + * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static int speedstep_get_state (void) +{ + u32 function=GET_SPEEDSTEP_STATE; + u32 result, state, edi, command; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) + ); + + dprintk("state is %x, result is %x\n", state, result); + + return (state & 1); +} + + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static void speedstep_set_state (unsigned int state) +{ + unsigned int result = 0, command, new_state; + unsigned long flags; + unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int retry = 0; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); + + do { + if (retry) { + dprintk("retry %u, previous result %u, waiting...\n", retry, result); + mdelay(retry * 50); + } + retry++; + __asm__ __volatile__( + "movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=b" (new_state), "=D" (result) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + } while ((new_state != state) && (retry <= SMI_TRIES)); + + /* enable IRQs */ + local_irq_restore(flags); + + if (new_state == state) { + dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); + } else { + printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); + } + + return; +} + + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: new freq + * @relation: + * + * Sets a new CPUFreq policy/freq. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = speedstep_freqs[speedstep_get_state()].frequency; + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = 0; /* speedstep.c is UP only driver */ + + if (freqs.old == freqs.new) + return 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + speedstep_set_state(newstate); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result; + unsigned int speed,state; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + result = speedstep_smi_ownership(); + if (result) { + dprintk("fails in aquiring ownership of a SMI interface.\n"); + return -EINVAL; + } + + /* detect low and high frequency */ + result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency); + if (result) { + /* fall back to speedstep_lib.c dection mechanism: try both states out */ + dprintk("could not detect low and high frequencies by SMI call.\n"); + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, + &speedstep_set_state); + + if (result) { + dprintk("could not detect two different speeds -- aborting.\n"); + return result; + } else + dprintk("workaround worked.\n"); + } + + /* get current speed setting */ + state = speedstep_get_state(); + speed = speedstep_freqs[state].frequency; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + if (cpu) + return -ENODEV; + return speedstep_get_processor_frequency(speedstep_processor); +} + + +static int speedstep_resume(struct cpufreq_policy *policy) +{ + int result = speedstep_smi_ownership(); + + if (result) + dprintk("fails in re-aquiring ownership of a SMI interface.\n"); + + return result; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-smi", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .resume = speedstep_resume, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * BIOS, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + speedstep_processor = speedstep_detect_processor(); + + switch (speedstep_processor) { + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + break; + default: + speedstep_processor = 0; + } + + if (!speedstep_processor) { + dprintk ("No supported Intel CPU detected.\n"); + return -ENODEV; + } + + dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", + ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); + + /* Error if no IST-SMI BIOS or no PARM + sig= 'ISGE' aka 'Intel Speedstep Gate E' */ + if ((ist_info.signature != 0x47534943) && ( + (smi_port == 0) || (smi_cmd == 0))) + return -ENODEV; + + if (smi_sig == 1) + smi_sig = 0x47534943; + else + smi_sig = ist_info.signature; + + /* setup smi_port from MODLULE_PARM or BIOS */ + if ((smi_port > 0xff) || (smi_port < 0)) + return -EINVAL; + else if (smi_port == 0) + smi_port = ist_info.command & 0xff; + + if ((smi_cmd > 0xff) || (smi_cmd < 0)) + return -EINVAL; + else if (smi_cmd == 0) + smi_cmd = (ist_info.command >> 16) & 0xff; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + cpufreq_unregister_driver(&speedstep_driver); +} + +module_param(smi_port, int, 0444); +module_param(smi_cmd, int, 0444); +module_param(smi_sig, uint, 0444); + +MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); +MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); +MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); + +MODULE_AUTHOR ("Hiroshi Miura"); +MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c new file mode 100644 index 00000000000..122d2d75aa9 --- /dev/null +++ b/arch/x86/kernel/cpu/cyrix.c @@ -0,0 +1,463 @@ +#include <linux/init.h> +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/processor-cyrix.h> +#include <asm/timer.h> +#include <asm/pci-direct.h> +#include <asm/tsc.h> + +#include "cpu.h" + +/* + * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU + */ +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned char ccr2, ccr3; + unsigned long flags; + + /* we test for DEVID by checking whether CCR3 is writable */ + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, ccr3 ^ 0x80); + getCx86(0xc0); /* dummy to change bus */ + + if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ + ccr2 = getCx86(CX86_CCR2); + setCx86(CX86_CCR2, ccr2 ^ 0x04); + getCx86(0xc0); /* dummy */ + + if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ + *dir0 = 0xfd; + else { /* Cx486S A step */ + setCx86(CX86_CCR2, ccr2); + *dir0 = 0xfe; + } + } + else { + setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ + + /* read DIR0 and DIR1 CPU registers */ + *dir0 = getCx86(CX86_DIR0); + *dir1 = getCx86(CX86_DIR1); + } + local_irq_restore(flags); +} + +/* + * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in + * order to identify the Cyrix CPU model after we're out of setup.c + * + * Actually since bugs.h doesn't even reference this perhaps someone should + * fix the documentation ??? + */ +static unsigned char Cx86_dir0_msb __cpuinitdata = 0; + +static char Cx86_model[][9] __cpuinitdata = { + "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", + "M II ", "Unknown" +}; +static char Cx486_name[][5] __cpuinitdata = { + "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", + "SRx2", "DRx2" +}; +static char Cx486S_name[][4] __cpuinitdata = { + "S", "S2", "Se", "S2e" +}; +static char Cx486D_name[][4] __cpuinitdata = { + "DX", "DX2", "?", "?", "?", "DX4" +}; +static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __cpuinitdata = "12??43"; +static char cyrix_model_mult2[] __cpuinitdata = "12233445"; + +/* + * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old + * BIOSes for compatibility with DOS games. This makes the udelay loop + * work correctly, and improves performance. + * + * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP + */ + +extern void calibrate_delay(void) __init; + +static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) +{ + unsigned long flags; + + if (Cx86_dir0_msb == 3) { + unsigned char ccr3, ccr5; + + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ccr5 = getCx86(CX86_CCR5); + if (ccr5 & 2) + setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + + if (ccr5 & 2) { /* possible wrong calibration done */ + printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + calibrate_delay(); + c->loops_per_jiffy = loops_per_jiffy; + } + } +} + + +static void __cpuinit set_cx86_reorder(void) +{ + u8 ccr3; + + printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + + /* Load/Store Serialize to mem access disable (=reorder it) */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + /* set load/store serialize from 1GB to 4GB */ + ccr3 |= 0xe0; + setCx86(CX86_CCR3, ccr3); +} + +static void __cpuinit set_cx86_memwb(void) +{ + u32 cr0; + + printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + + /* CCR2 bit 2: unlock NW bit */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + /* set 'Not Write-through' */ + cr0 = 0x20000000; + write_cr0(read_cr0() | cr0); + /* CCR2 bit 2: lock NW bit and set WT1 */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); +} + +static void __cpuinit set_cx86_inc(void) +{ + unsigned char ccr3; + + printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + /* PCR1 -- Performance Control */ + /* Incrementor on, whatever that is */ + setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + /* PCR0 -- Performance Control */ + /* Incrementor Margin 10 */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ +} + +/* + * Configure later MediaGX and/or Geode processor. + */ + +static void __cpuinit geode_configure(void) +{ + unsigned long flags; + u8 ccr3; + local_irq_save(flags); + + /* Suspend on halt power saving and enable #SUSP pin */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + + + /* FPU fast, DTE cache, Mem bypass */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + set_cx86_memwb(); + set_cx86_reorder(); + set_cx86_inc(); + + local_irq_restore(flags); +} + + +static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; + char *buf = c->x86_model_id; + const char *p = NULL; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ + if ( test_bit(1*32+24, c->x86_capability) ) { + clear_bit(1*32+24, c->x86_capability); + set_bit(X86_FEATURE_CXMMX, c->x86_capability); + } + + do_cyrix_devid(&dir0, &dir1); + + check_cx686_slop(c); + + Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ + + /* common case step number/rev -- exceptions handled below */ + c->x86_model = (dir1 >> 4) + 1; + c->x86_mask = dir1 & 0xf; + + /* Now cook; the original recipe is by Channing Corn, from Cyrix. + * We do the same thing for each generation: we work out + * the model, multiplier and stepping. Black magic included, + * to make the silicon step/rev numbers match the printed ones. + */ + + switch (dir0_msn) { + unsigned char tmp; + + case 0: /* Cx486SLC/DLC/SRx/DRx */ + p = Cx486_name[dir0_lsn & 7]; + break; + + case 1: /* Cx486S/DX/DX2/DX4 */ + p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] + : Cx486S_name[dir0_lsn & 3]; + break; + + case 2: /* 5x86 */ + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + p = Cx86_cb+2; + break; + + case 3: /* 6x86/6x86L */ + Cx86_cb[1] = ' '; + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + if (dir1 > 0x21) { /* 686L */ + Cx86_cb[0] = 'L'; + p = Cx86_cb; + (c->x86_model)++; + } else /* 686 */ + p = Cx86_cb+1; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + /* 6x86's contain this bug */ + c->coma_bug = 1; + break; + + case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ +#ifdef CONFIG_PCI + { + u32 vendor, device; + /* It isn't really a PCI quirk directly, but the cure is the + same. The MediaGX has deep magic SMM stuff that handles the + SB emulation. It thows away the fifo on disable_dma() which + is wrong and ruins the audio. + + Bug2: VSA1 has a wrap bug so that using maximum sized DMA + causes bad things. According to NatSemi VSA2 has another + bug to do with 'hlt'. I've not seen any boards using VSA2 + and X doesn't seem to support it either so who cares 8). + VSA1 we work around however. + */ + + printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + isa_dma_bridge_buggy = 2; + + /* We do this before the PCI layer is running. However we + are safe here as we know the bridge must be a Cyrix + companion and must be present */ + vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); + device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); + + /* + * The 5510/5520 companion chips have a funky PIT. + */ + if (vendor == PCI_VENDOR_ID_CYRIX && + (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) + mark_tsc_unstable("cyrix 5510/5520 detected"); + } +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + + /* GXm supports extended cpuid levels 'ala' AMD */ + if (c->cpuid_level == 2) { + /* Enable cxMMX extensions (GX1 Datasheet 54) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + + /* + * GXm : 0x30 ... 0x5f GXm datasheet 51 + * GXlv: 0x6x GXlv datasheet 54 + * ? : 0x7x + * GX1 : 0x8x GX1 datasheet 56 + */ + if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) + geode_configure(); + get_model_name(c); /* get CPU marketing name */ + return; + } + else { /* MediaGX */ + Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; + p = Cx86_cb+2; + c->x86_model = (dir1 & 0x20) ? 1 : 2; + } + break; + + case 5: /* 6x86MX/M II */ + if (dir1 > 7) + { + dir0_msn++; /* M II */ + /* Enable MMX extensions (App note 108) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + } + else + { + c->coma_bug = 1; /* 6x86MX, it has the bug. */ + } + tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; + Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; + p = Cx86_cb+tmp; + if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) + (c->x86_model)++; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + break; + + case 0xf: /* Cyrix 486 without DEVID registers */ + switch (dir0_lsn) { + case 0xd: /* either a 486SLC or DLC w/o DEVID */ + dir0_msn = 0; + p = Cx486_name[(c->hard_math) ? 1 : 0]; + break; + + case 0xe: /* a 486S A step */ + dir0_msn = 0; + p = Cx486S_name[0]; + break; + } + break; + + default: /* unknown (shouldn't happen, we know everyone ;-) */ + dir0_msn = 7; + break; + } + strcpy(buf, Cx86_model[dir0_msn & 7]); + if (p) strcat(buf, p); + return; +} + +/* + * Handle National Semiconductor branded processors + */ +static void __cpuinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC acquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* + * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected + * by the fact that they preserve the flags across the division of 5/2. + * PII and PPro exhibit this behavior too, but they have cpuid available. + */ + +/* + * Perform the Cyrix 5/2 test. A Cyrix won't change + * the flags, while other 486 chips will. + */ +static inline int test_cyrix_52div(void) +{ + unsigned int test; + + __asm__ __volatile__( + "sahf\n\t" /* clear flags (%eax = 0x0005) */ + "div %b2\n\t" /* divide 5 by 2 */ + "lahf" /* store flags into %ah */ + : "=a" (test) + : "0" (5), "q" (2) + : "cc"); + + /* AH is 0x02 on Cyrix after the divide.. */ + return (unsigned char) (test >> 8) == 0x02; +} + +static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) +{ + /* Detect Cyrix with disabled CPUID */ + if ( c->x86 == 4 && test_cyrix_52div() ) { + unsigned char dir0, dir1; + + strcpy(c->x86_vendor_id, "CyrixInstead"); + c->x86_vendor = X86_VENDOR_CYRIX; + + /* Actually enable cpuid on the older cyrix */ + + /* Retrieve CPU revisions */ + + do_cyrix_devid(&dir0, &dir1); + + dir0>>=4; + + /* Check it is an affected model */ + + if (dir0 == 5 || dir0 == 3) + { + unsigned char ccr3; + unsigned long flags; + printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + } + } +} + +static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { + .c_vendor = "Cyrix", + .c_ident = { "CyrixInstead" }, + .c_init = init_cyrix, + .c_identify = cyrix_identify, +}; + +int __init cyrix_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; + return 0; +} + +static struct cpu_dev nsc_cpu_dev __cpuinitdata = { + .c_vendor = "NSC", + .c_ident = { "Geode by NSC" }, + .c_init = init_nsc, +}; + +int __init nsc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; + return 0; +} + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c new file mode 100644 index 00000000000..dc4e08147b1 --- /dev/null +++ b/arch/x86/kernel/cpu/intel.c @@ -0,0 +1,333 @@ +#include <linux/init.h> +#include <linux/kernel.h> + +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/module.h> + +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/uaccess.h> + +#include "cpu.h" + +#ifdef CONFIG_X86_LOCAL_APIC +#include <asm/mpspec.h> +#include <asm/apic.h> +#include <mach_apic.h> +#endif + +extern int trap_init_f00f_bug(void); + +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_INTEL) + return; + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +} + +/* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + return 0; +} + + +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + unsigned int l2 = 0; + char *p = NULL; + +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled = 0; + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + select_idle_routine(c); + l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); + + c->x86_max_cores = num_cpu_cores(c); + + detect_ht(c); + + /* Work around errata */ + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + + if (c->x86 == 15) { + set_bit(X86_FEATURE_P4, c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); + } + if (c->x86 == 6) + set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } +} + +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_init = init_intel, + .c_size_cache = intel_size_cache, +}; + +__init int intel_cpu_init(void) +{ + cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; + return 0; +} + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +// arch_initcall(intel_cpu_init); + diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c new file mode 100644 index 00000000000..db6c25aa577 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -0,0 +1,806 @@ +/* + * Routines to indentify caches on Intel CPU. + * + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. + * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/compiler.h> +#include <linux/cpu.h> +#include <linux/sched.h> + +#include <asm/processor.h> +#include <asm/smp.h> + +#define LVL_1_INST 1 +#define LVL_1_DATA 2 +#define LVL_2 3 +#define LVL_3 4 +#define LVL_TRACE 5 + +struct _cache_table +{ + unsigned char descriptor; + char cache_type; + short size; +}; + +/* all the cache descriptor types we care about (no TLB or trace cache entries) */ +static struct _cache_table cache_table[] __cpuinitdata = +{ + { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ + { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ + { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ + { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ + { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ + { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ + { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ + { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ + { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ + { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ + { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ + { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ + { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ + { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ + { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ + { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ + { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0x00, 0, 0} +}; + + +enum _cache_type +{ + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, + CACHE_TYPE_INST = 2, + CACHE_TYPE_UNIFIED = 3 +}; + +union _cpuid4_leaf_eax { + struct { + enum _cache_type type:5; + unsigned int level:3; + unsigned int is_self_initializing:1; + unsigned int is_fully_associative:1; + unsigned int reserved:4; + unsigned int num_threads_sharing:12; + unsigned int num_cores_on_die:6; + } split; + u32 full; +}; + +union _cpuid4_leaf_ebx { + struct { + unsigned int coherency_line_size:12; + unsigned int physical_line_partition:10; + unsigned int ways_of_associativity:10; + } split; + u32 full; +}; + +union _cpuid4_leaf_ecx { + struct { + unsigned int number_of_sets:32; + } split; + u32 full; +}; + +struct _cpuid4_info { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + cpumask_t shared_cpu_map; +}; + +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +union l3_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned res : 2; + unsigned size_encoded : 14; + }; + unsigned val; +}; + +static const unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, [0xa] = 32, [0xb] = 48, + [0xc] = 64, + [0xf] = 0xffff // ?? +}; + +static const unsigned char levels[] = { 1, 1, 2, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + union l3_cache l3; + union l1_cache *l1 = &l1d; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + + switch (leaf) { + case 1: + l1 = &l1i; + case 0: + if (!l1->val) + return; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + break; + case 2: + if (!l2.val) + return; + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + break; + case 3: + if (!l3.val) + return; + assoc = l3.assoc; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; + break; + default: + return; + } + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + if (leaf == 3) + eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + else + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} + +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) + return -EIO; /* better error ? */ + + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; +} + +static int __cpuinit find_num_cache_leaves(void) +{ + unsigned int eax, ebx, ecx, edx; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ + cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + cache_eax.full = eax; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; +} + +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ + unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_X86_HT + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif + + if (c->cpuid_level > 3) { + static int is_initialized; + + if (is_initialized == 0) { + /* Init num_cache_leaves from boot CPU */ + num_cache_leaves = find_num_cache_leaves(); + is_initialized++; + } + + /* + * Whenever possible use cpuid(4), deterministic cache + * parameters cpuid leaf to find the cache details + */ + for (i = 0; i < num_cache_leaves; i++) { + struct _cpuid4_info this_leaf; + + int retval; + + retval = cpuid4_cache_lookup(i, &this_leaf); + if (retval >= 0) { + switch(this_leaf.eax.split.level) { + case 1: + if (this_leaf.eax.split.type == + CACHE_TYPE_DATA) + new_l1d = this_leaf.size/1024; + else if (this_leaf.eax.split.type == + CACHE_TYPE_INST) + new_l1i = this_leaf.size/1024; + break; + case 2: + new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; + break; + case 3: + new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; + break; + default: + break; + } + } + } + } + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char k = 0; + + /* look up this descriptor in the table */ + while (cache_table[k].descriptor != 0) + { + if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; + switch (cache_table[k].cache_type) { + case LVL_1_INST: + l1i += cache_table[k].size; + break; + case LVL_1_DATA: + l1d += cache_table[k].size; + break; + case LVL_2: + l2 += cache_table[k].size; + break; + case LVL_3: + l3 += cache_table[k].size; + break; + case LVL_TRACE: + trace += cache_table[k].size; + break; + } + + break; + } + + k++; + } + } + } + } + + if (new_l1d) + l1d = new_l1d; + + if (new_l1i) + l1i = new_l1i; + + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l2_id; +#endif + } + + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l3_id; +#endif + } + + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + + return l2; +} + +/* pointer to _cpuid4_info array (for each cache leaf) */ +static struct _cpuid4_info *cpuid4_info[NR_CPUS]; +#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) + +#ifdef CONFIG_SMP +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + unsigned long num_threads_sharing; + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; + + if (num_threads_sharing == 1) + cpu_set(cpu, this_leaf->shared_cpu_map); + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } +} +#else +static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +#endif + +static void free_cache_attributes(unsigned int cpu) +{ + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; +} + +static int __cpuinit detect_cache_attributes(unsigned int cpu) +{ + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + cpuid4_info[cpu] = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (cpuid4_info[cpu] == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + goto out; + + /* Do cpuid and store the results */ + retval = 0; + for (j = 0; j < num_cache_leaves; j++) { + this_leaf = CPUID4_INFO_IDX(cpu, j); + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) + break; + cache_shared_cpu_map_setup(cpu, j); + } + set_cpus_allowed(current, oldmask); + +out: + if (retval) + free_cache_attributes(cpu); + return retval; +} + +#ifdef CONFIG_SYSFS + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ + +/* pointer to kobject for cpuX/cache */ +static struct kobject * cache_kobject[NR_CPUS]; + +struct _index_kobject { + struct kobject kobj; + unsigned int cpu; + unsigned short index; +}; + +/* pointer to array of kobjects for cpuX/cache/indexY */ +static struct _index_kobject *index_kobject[NR_CPUS]; +#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) + +#define show_one_plus(file_name, object, val) \ +static ssize_t show_##file_name \ + (struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ +} + +show_one_plus(level, eax.split.level, 0); +show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); +show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); +show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); +show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); + +static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) +{ + return sprintf (buf, "%luK\n", this_leaf->size / 1024); +} + +static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) +{ + char mask_str[NR_CPUS]; + cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); + return sprintf(buf, "%s\n", mask_str); +} + +static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { + switch(this_leaf->eax.split.type) { + case CACHE_TYPE_DATA: + return sprintf(buf, "Data\n"); + break; + case CACHE_TYPE_INST: + return sprintf(buf, "Instruction\n"); + break; + case CACHE_TYPE_UNIFIED: + return sprintf(buf, "Unified\n"); + break; + default: + return sprintf(buf, "Unknown\n"); + break; + } +} + +struct _cache_attr { + struct attribute attr; + ssize_t (*show)(struct _cpuid4_info *, char *); + ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); +}; + +#define define_one_ro(_name) \ +static struct _cache_attr _name = \ + __ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(physical_line_partition); +define_one_ro(ways_of_associativity); +define_one_ro(number_of_sets); +define_one_ro(size); +define_one_ro(shared_cpu_map); + +static struct attribute * default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &physical_line_partition.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &shared_cpu_map.attr, + NULL +}; + +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? + fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf) : + 0; + return ret; +} + +static ssize_t store(struct kobject * kobj, struct attribute * attr, + const char * buf, size_t count) +{ + return 0; +} + +static struct sysfs_ops sysfs_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type ktype_cache = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs, +}; + +static struct kobj_type ktype_percpu_entry = { + .sysfs_ops = &sysfs_ops, +}; + +static void cpuid4_cache_sysfs_exit(unsigned int cpu) +{ + kfree(cache_kobject[cpu]); + kfree(index_kobject[cpu]); + cache_kobject[cpu] = NULL; + index_kobject[cpu] = NULL; + free_cache_attributes(cpu); +} + +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) +{ + + if (num_cache_leaves == 0) + return -ENOENT; + + detect_cache_attributes(cpu); + if (cpuid4_info[cpu] == NULL) + return -ENOENT; + + /* Allocate all required memory */ + cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (unlikely(cache_kobject[cpu] == NULL)) + goto err_out; + + index_kobject[cpu] = kzalloc( + sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); + if (unlikely(index_kobject[cpu] == NULL)) + goto err_out; + + return 0; + +err_out: + cpuid4_cache_sysfs_exit(cpu); + return -ENOMEM; +} + +/* Add/Remove cache interface for CPU device */ +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i, j; + struct _index_kobject *this_object; + int retval = 0; + + retval = cpuid4_cache_sysfs_init(cpu); + if (unlikely(retval < 0)) + return retval; + + cache_kobject[cpu]->parent = &sys_dev->kobj; + kobject_set_name(cache_kobject[cpu], "%s", "cache"); + cache_kobject[cpu]->ktype = &ktype_percpu_entry; + retval = kobject_register(cache_kobject[cpu]); + + for (i = 0; i < num_cache_leaves; i++) { + this_object = INDEX_KOBJECT_PTR(cpu,i); + this_object->cpu = cpu; + this_object->index = i; + this_object->kobj.parent = cache_kobject[cpu]; + kobject_set_name(&(this_object->kobj), "index%1lu", i); + this_object->kobj.ktype = &ktype_cache; + retval = kobject_register(&(this_object->kobj)); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_unregister( + &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + break; + } + } + return retval; +} + +static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i; + + if (cpuid4_info[cpu] == NULL) + return; + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); + kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, +}; + +static int __cpuinit cache_sysfs_init(void) +{ + int i; + + if (num_cache_leaves == 0) + return 0; + + register_hotcpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; +} + +device_initcall(cache_sysfs_init); + +#endif diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile new file mode 100644 index 00000000000..f1ebe1c1c17 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -0,0 +1,2 @@ +obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o +obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c new file mode 100644 index 00000000000..eef63e3630c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -0,0 +1,102 @@ +/* + * Athlon/Hammer specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/smp.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> + +#include "mce.h" + +/* Machine Check Handler For AMD Athlon/Duron */ +static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=1; i<nr_mce_banks; i++) { + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + if (high&(1<<31)) { + if (high & (1<<29)) + recover |= 1; + if (high & (1<<25)) + recover |= 2; + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); + high &= ~(1<<31); + if (high & (1<<27)) { + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); + printk ("[%08x%08x]", ahigh, alow); + } + if (high & (1<<26)) { + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + printk (" at %08x%08x", ahigh, alow); + } + printk ("\n"); + /* Clear it */ + wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + } + + if (recover&2) + panic ("CPU context corrupt"); + if (recover&1) + panic ("Unable to continue"); + printk (KERN_EMERG "Attempting to continue.\n"); + mcgstl &= ~(1<<2); + wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); +} + + +/* AMD K7 machine check is Intel like */ +void amd_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + int i; + + if (!cpu_has(c, X86_FEATURE_MCE)) + return; + + machine_check_vector = k7_machine_check; + wmb(); + + printk (KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & (1<<8)) /* Control register present ? */ + wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + nr_mce_banks = l & 0xff; + + /* Clear status for MC index 0 separately, we don't touch CTL, + * as some K7 Athlons cause spurious MCEs when its enabled. */ + if (boot_cpu_data.x86 == 6) { + wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); + i = 1; + } else + i = 0; + for (; i<nr_mce_banks; i++) { + wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + } + + set_in_cr4 (X86_CR4_MCE); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 00000000000..34c781eddee --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -0,0 +1,90 @@ +/* + * mce.c - x86 Machine Check Exception Reporting + * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/thread_info.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/mce.h> + +#include "mce.h" + +int mce_disabled = 0; +int nr_mce_banks; + +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled==1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86==5) + intel_p5_mcheck_init(c); + if (c->x86==6) + intel_p6_mcheck_init(c); + if (c->x86==15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86==5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h new file mode 100644 index 00000000000..81fb6e2d35f --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <asm/mce.h> + +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); + +/* Call the installed machine check handler for this CPU setup. */ +extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); + +extern int nr_mce_banks; + diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c new file mode 100644 index 00000000000..bf39409b383 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -0,0 +1,91 @@ +/* + * Non Fatal Machine Check Exception Reporting + * + * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> + * + * This file contains routines to check for non-fatal MCEs every 15s + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/module.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> + +#include "mce.h" + +static int firstbank; + +#define MCE_RATE 15*HZ /* timer rate is 15s */ + +static void mce_checkregs (void *info) +{ + u32 low, high; + int i; + + for (i=firstbank; i<nr_mce_banks; i++) { + rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high); + + if (high & (1<<31)) { + printk(KERN_INFO "MCE: The hardware reports a non " + "fatal, correctable incident occurred on " + "CPU %d.\n", + smp_processor_id()); + printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low); + + /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */ + wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); + + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + } +} + +static void mce_work_fn(struct work_struct *work); +static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); + +static void mce_work_fn(struct work_struct *work) +{ + on_each_cpu(mce_checkregs, NULL, 1, 1); + schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); +} + +static int __init init_nonfatal_mce_checker(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + /* Check for MCE support */ + if (!cpu_has(c, X86_FEATURE_MCE)) + return -ENODEV; + + /* Check for PPro style MCA */ + if (!cpu_has(c, X86_FEATURE_MCA)) + return -ENODEV; + + /* Some Athlons misbehave when we frob bank 0 */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 6) + firstbank = 1; + else + firstbank = 0; + + /* + * Check for non-fatal errors every MCE_RATE s + */ + schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); + printk(KERN_INFO "Machine check exception polling timer started.\n"); + return 0; +} +module_init(init_nonfatal_mce_checker); + +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c new file mode 100644 index 00000000000..1509edfb231 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -0,0 +1,253 @@ +/* + * P4 specific Machine Check Exception Reporting + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/smp.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> +#include <asm/apic.h> + +#include <asm/therm_throt.h> + +#include "mce.h" + +/* as supported by the P4/Xeon family */ +struct intel_mce_extended_msrs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; + u32 esp; + u32 eflags; + u32 eip; + /* u32 *reserved[]; */ +}; + +static int mce_num_extended_msrs = 0; + + +#ifdef CONFIG_X86_MCE_P4THERMAL +static void unexpected_thermal_interrupt(struct pt_regs *regs) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct pt_regs *regs) +{ + __u64 msr_val; + + ack_APIC_irq(); + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + therm_throt_process(msr_val & 0x1); +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct pt_regs *regs) +{ + irq_enter(); + vendor_thermal_interrupt(regs); + irq_exit(); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", + cpu); + return; /* -EBUSY */ + } + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + +/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ +static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +{ + u32 h; + + rdmsr (MSR_IA32_MCG_EAX, r->eax, h); + rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr (MSR_IA32_MCG_EDX, r->edx, h); + rdmsr (MSR_IA32_MCG_ESI, r->esi, h); + rdmsr (MSR_IA32_MCG_EDI, r->edi, h); + rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr (MSR_IA32_MCG_ESP, r->esp, h); + rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr (MSR_IA32_MCG_EIP, r->eip, h); +} + +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + if (mce_num_extended_msrs > 0) { + struct intel_mce_extended_msrs dbg; + intel_get_extended_msrs(&dbg); + printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags); + printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); + printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + dbg.esi, dbg.edi, dbg.ebp, dbg.esp); + } + + for (i=0; i<nr_mce_banks; i++) { + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + if (high & (1<<31)) { + if (high & (1<<29)) + recover |= 1; + if (high & (1<<25)) + recover |= 2; + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); + high &= ~(1<<31); + if (high & (1<<27)) { + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); + printk ("[%08x%08x]", ahigh, alow); + } + if (high & (1<<26)) { + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + printk (" at %08x%08x", ahigh, alow); + } + printk ("\n"); + } + } + + if (recover & 2) + panic ("CPU context corrupt"); + if (recover & 1) + panic ("Unable to continue"); + + printk(KERN_EMERG "Attempting to continue.\n"); + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not + * recoverable/continuable.This will allow BIOS to look at the MSRs + * for errors if the OS could not log the error. + */ + for (i=0; i<nr_mce_banks; i++) { + u32 msr; + msr = MSR_IA32_MC0_STATUS+i*4; + rdmsr (msr, low, high); + if (high&(1<<31)) { + /* Clear it */ + wrmsr(msr, 0UL, 0UL); + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + } + mcgstl &= ~(1<<2); + wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); +} + + +void intel_p4_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + int i; + + machine_check_vector = intel_machine_check; + wmb(); + + printk (KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & (1<<8)) /* Control register present ? */ + wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + nr_mce_banks = l & 0xff; + + for (i=0; i<nr_mce_banks; i++) { + wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + } + + set_in_cr4 (X86_CR4_MCE); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); + + /* Check for P4/Xeon extended MCE MSRs */ + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & (1<<9)) {/* MCG_EXT_P */ + mce_num_extended_msrs = (l >> 16) & 0xff; + printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + " available\n", + smp_processor_id(), mce_num_extended_msrs); + +#ifdef CONFIG_X86_MCE_P4THERMAL + /* Check for P4/Xeon Thermal monitor */ + intel_init_thermal(c); +#endif + } +} diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c new file mode 100644 index 00000000000..94bc43d950c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -0,0 +1,53 @@ +/* + * P5 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox <alan@redhat.com> + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/smp.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> + +#include "mce.h" + +/* Machine check handler for Pentium class Intel */ +static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) +{ + u32 loaddr, hi, lotype; + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); + rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); + printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); + if(lotype&(1<<5)) + printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting for processors with Intel style MCE */ +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /*Check for MCE support */ + if( !cpu_has(c, X86_FEATURE_MCE) ) + return; + + /* Default P5 to off as its often misconnected */ + if(mce_disabled != -1) + return; + machine_check_vector = pentium_machine_check; + wmb(); + + /* Read registers before enabling */ + rdmsr(MSR_IA32_P5_MC_ADDR, l, h); + rdmsr(MSR_IA32_P5_MC_TYPE, l, h); + printk(KERN_INFO "Intel old style machine check architecture supported.\n"); + + /* Enable MCE */ + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); +} diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c new file mode 100644 index 00000000000..deeae42ce19 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -0,0 +1,119 @@ +/* + * P6 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox <alan@redhat.com> + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/smp.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> + +#include "mce.h" + +/* Machine Check Handler For PII/PIII */ +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; i<nr_mce_banks; i++) { + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + if (high & (1<<31)) { + if (high & (1<<29)) + recover |= 1; + if (high & (1<<25)) + recover |= 2; + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); + high &= ~(1<<31); + if (high & (1<<27)) { + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); + printk ("[%08x%08x]", ahigh, alow); + } + if (high & (1<<26)) { + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + printk (" at %08x%08x", ahigh, alow); + } + printk ("\n"); + } + } + + if (recover & 2) + panic ("CPU context corrupt"); + if (recover & 1) + panic ("Unable to continue"); + + printk (KERN_EMERG "Attempting to continue.\n"); + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not + * recoverable/continuable.This will allow BIOS to look at the MSRs + * for errors if the OS could not log the error. + */ + for (i=0; i<nr_mce_banks; i++) { + unsigned int msr; + msr = MSR_IA32_MC0_STATUS+i*4; + rdmsr (msr,low, high); + if (high & (1<<31)) { + /* Clear it */ + wrmsr (msr, 0UL, 0UL); + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + } + mcgstl &= ~(1<<2); + wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); +} + +/* Set up machine check reporting for processors with Intel style MCE */ +void intel_p6_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + int i; + + /* Check for MCE support */ + if (!cpu_has(c, X86_FEATURE_MCE)) + return; + + /* Check for PPro style MCA */ + if (!cpu_has(c, X86_FEATURE_MCA)) + return; + + /* Ok machine check is available */ + machine_check_vector = intel_machine_check; + wmb(); + + printk (KERN_INFO "Intel machine check architecture supported.\n"); + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & (1<<8)) /* Control register present ? */ + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + nr_mce_banks = l & 0xff; + + /* + * Following the example in IA-32 SDM Vol 3: + * - MC0_CTL should not be written + * - Status registers on all banks should be cleared on reset + */ + for (i=1; i<nr_mce_banks; i++) + wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + + for (i=0; i<nr_mce_banks; i++) + wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); + + set_in_cr4 (X86_CR4_MCE); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); +} diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c new file mode 100644 index 00000000000..1203dc5ab87 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -0,0 +1,186 @@ +/* + * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c + * + * Thermal throttle event support code (such as syslog messaging and rate + * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). + * This allows consistent reporting of CPU thermal throttle events. + * + * Maintains a counter in /sys that keeps track of the number of thermal + * events, such that the user knows how bad the thermal problem might be + * (since the logging to syslog and mcelog is rate limited). + * + * Author: Dmitriy Zavin (dmitriyz@google.com) + * + * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. + * Inspired by Ross Biro's and Al Borchers' counter code. + */ + +#include <linux/percpu.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <asm/cpu.h> +#include <linux/notifier.h> +#include <linux/jiffies.h> +#include <asm/therm_throt.h> + +/* How long to wait between reporting thermal events */ +#define CHECK_INTERVAL (300 * HZ) + +static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +atomic_t therm_throt_en = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ +} + +define_therm_throt_sysdev_show_func(count); +define_therm_throt_sysdev_one_ro(count); + +static struct attribute *thermal_throttle_attrs[] = { + &attr_count.attr, + NULL +}; + +static struct attribute_group thermal_throttle_attr_group = { + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" +}; +#endif /* CONFIG_SYSFS */ + +/*** + * therm_throt_process - Process thermal throttling event from interrupt + * @curr: Whether the condition is current or not (boolean), since the + * thermal interrupt normally gets called both when the thermal + * event begins and once the event has ended. + * + * This function is called by the thermal interrupt after the + * IRQ has been acknowledged. + * + * It will take care of rate limiting and printing messages to the syslog. + * + * Returns: 0 : Event should NOT be further logged, i.e. still in + * "timeout" from previous log message. + * 1 : Event should be logged further, and a message has been + * printed to the syslog. + */ +int therm_throt_process(int curr) +{ + unsigned int cpu = smp_processor_id(); + __u64 tmp_jiffs = get_jiffies_64(); + + if (curr) + __get_cpu_var(thermal_throttle_count)++; + + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + return 0; + + __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + + /* if we just entered the thermal event */ + if (curr) { + printk(KERN_CRIT "CPU%d: Temperature above threshold, " + "cpu clock throttled (total events = %lu)\n", cpu, + __get_cpu_var(thermal_throttle_count)); + + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } + + return 1; +} + +#ifdef CONFIG_SYSFS +/* Add/Remove thermal_throttle interface for CPU device */ +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) +{ + return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +/* Mutex protecting device creation against CPU hotplug */ +static DEFINE_MUTEX(therm_cpu_lock); + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + int err; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mutex_lock(&therm_cpu_lock); + err = thermal_throttle_add_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + WARN_ON(err); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + mutex_lock(&therm_cpu_lock); + thermal_throttle_remove_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block thermal_throttle_cpu_notifier = +{ + .notifier_call = thermal_throttle_cpu_callback, +}; + +static __init int thermal_throttle_init_device(void) +{ + unsigned int cpu = 0; + int err; + + if (!atomic_read(&therm_throt_en)) + return 0; + + register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&therm_cpu_lock); +#endif + /* connect live CPUs to sysfs */ + for_each_online_cpu(cpu) { + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + WARN_ON(err); + } +#ifdef CONFIG_HOTPLUG_CPU + mutex_unlock(&therm_cpu_lock); +#endif + + return 0; +} + +device_initcall(thermal_throttle_init_device); +#endif /* CONFIG_SYSFS */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c new file mode 100644 index 00000000000..9e424b6c293 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -0,0 +1,36 @@ +/* + * IDT Winchip specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox <alan@redhat.com> + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> + +#include "mce.h" + +/* Machine check handler for WinChip C6 */ +static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting on the Winchip C6 series */ +void winchip_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + machine_check_vector = winchip_machine_check; + wmb(); + rdmsr(MSR_IDT_FCR1, lo, hi); + lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ + lo&= ~(1<<4); /* Enable MCE */ + wrmsr(MSR_IDT_FCR1, lo, hi); + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); +} diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile new file mode 100644 index 00000000000..191fc053364 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -0,0 +1,3 @@ +obj-y := main.o if.o generic.o state.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o + diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c new file mode 100644 index 00000000000..0949cdbf848 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -0,0 +1,121 @@ +#include <linux/init.h> +#include <linux/mm.h> +#include <asm/mtrr.h> +#include <asm/msr.h> + +#include "mtrr.h" + +static void +amd_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long low, high; + + rdmsr(MSR_K6_UWCCR, low, high); + /* Upper dword is region 1, lower is region 0 */ + if (reg == 1) + low = high; + /* The base masks off on the right alignment */ + *base = (low & 0xFFFE0000) >> PAGE_SHIFT; + *type = 0; + if (low & 1) + *type = MTRR_TYPE_UNCACHABLE; + if (low & 2) + *type = MTRR_TYPE_WRCOMB; + if (!(low & 3)) { + *size = 0; + return; + } + /* + * This needs a little explaining. The size is stored as an + * inverted mask of bits of 128K granularity 15 bits long offset + * 2 bits + * + * So to get a size we do invert the mask and add 1 to the lowest + * mask bit (4 as its 2 bits in). This gives us a size we then shift + * to turn into 128K blocks + * + * eg 111 1111 1111 1100 is 512K + * + * invert 000 0000 0000 0011 + * +1 000 0000 0000 0100 + * *128K ... + */ + low = (~low) & 0x1FFFC; + *size = (low + 4) << (15 - PAGE_SHIFT); + return; +} + +static void amd_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + <reg> The register to set. + <base> The base address of the region. + <size> The size of the region. If this is 0 the region is disabled. + <type> The type of the region. + <do_safe> If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + u32 regs[2]; + + /* + * Low is MTRR0 , High MTRR 1 + */ + rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); + /* + * Blank to disable + */ + if (size == 0) + regs[reg] = 0; + else + /* Set the register to the base, the type (off by one) and an + inverted bitmask of the size The size is the only odd + bit. We are fed say 512K We invert this and we get 111 1111 + 1111 1011 but if you subtract one and invert you get the + desired 111 1111 1111 1100 mask + + But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ + regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) + | (base << PAGE_SHIFT) | (type + 1); + + /* + * The writeback rule is quite specific. See the manual. Its + * disable local interrupts, write back the cache, set the mtrr + */ + wbinvd(); + wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); +} + +static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + /* Apply the K6 block alignment and size rules + In order + o Uncached or gathering only + o 128K or bigger block + o Power of 2 block + o base suitably aligned to the power + */ + if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) + || (size & ~(size - 1)) - size || (base & (size - 1))) + return -EINVAL; + return 0; +} + +static struct mtrr_ops amd_mtrr_ops = { + .vendor = X86_VENDOR_AMD, + .set = amd_set_mtrr, + .get = amd_get_mtrr, + .get_free_region = generic_get_free_region, + .validate_add_page = amd_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init amd_init_mtrr(void) +{ + set_mtrr_ops(&amd_mtrr_ops); + return 0; +} + +//arch_initcall(amd_mtrr_init); diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c new file mode 100644 index 00000000000..cb9aa3a7a7a --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -0,0 +1,224 @@ +#include <linux/init.h> +#include <linux/mm.h> +#include <asm/mtrr.h> +#include <asm/msr.h> +#include "mtrr.h" + +static struct { + unsigned long high; + unsigned long low; +} centaur_mcr[8]; + +static u8 centaur_mcr_reserved; +static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ + +/* + * Report boot time MCR setups + */ + +static int +centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + <base> The starting (base) address of the region. + <size> The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + if (centaur_mcr_reserved & (1 << i)) + continue; + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +void +mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +{ + centaur_mcr[mcr].low = lo; + centaur_mcr[mcr].high = hi; +} + +static void +centaur_get_mcr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + *base = centaur_mcr[reg].high >> PAGE_SHIFT; + *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; + *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ + if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) + *type = MTRR_TYPE_UNCACHABLE; + if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) + *type = MTRR_TYPE_WRBACK; + if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) + *type = MTRR_TYPE_WRBACK; + +} + +static void centaur_set_mcr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned long low, high; + + if (size == 0) { + /* Disable */ + high = low = 0; + } else { + high = base << PAGE_SHIFT; + if (centaur_mcr_type == 0) + low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ + else { + if (type == MTRR_TYPE_UNCACHABLE) + low = -size << PAGE_SHIFT | 0x02; /* NC */ + else + low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ + } + } + centaur_mcr[reg].high = high; + centaur_mcr[reg].low = low; + wrmsr(MSR_IDT_MCR0 + reg, low, high); +} + +#if 0 +/* + * Initialise the later (saner) Winchip MCR variant. In this version + * the BIOS can pass us the registers it has used (but not their values) + * and the control register is read/write + */ + +static void __init +centaur_mcr1_init(void) +{ + unsigned i; + u32 lo, hi; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ + lo &= ~0x1C0; /* clear key */ + lo |= 0x040; /* set key to 1 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ + } + + centaur_mcr_type = 1; + + /* + * Clear any unconfigured MCR's. + */ + + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { + if (!(lo & (1 << (9 + i)))) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + else + /* + * If the BIOS set up an MCR we cannot see it + * but we don't wish to obliterate it + */ + centaur_mcr_reserved |= (1 << i); + } + } + /* + * Throw the main write-combining switch... + * However if OOSTORE is enabled then people have already done far + * cleverer things and we should behave. + */ + + lo |= 15; /* Write combine enables */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +/* + * Initialise the original winchip with read only MCR registers + * no used bitmask for the BIOS to pass on and write only control + */ + +static void __init +centaur_mcr0_init(void) +{ + unsigned i; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + /* Clear any unconfigured MCR's. + * This way we are sure that the centaur_mcr array contains the actual + * values. The disadvantage is that any BIOS tweaks are thus undone. + * + */ + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + } + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ +} + +/* + * Initialise Winchip series MCR registers + */ + +static void __init +centaur_mcr_init(void) +{ + struct set_mtrr_context ctxt; + + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + if (boot_cpu_data.x86_model == 4) + centaur_mcr0_init(); + else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) + centaur_mcr1_init(); + + set_mtrr_done(&ctxt); +} +#endif + +static int centaur_validate_add_page(unsigned long base, + unsigned long size, unsigned int type) +{ + /* + * FIXME: Winchip2 supports uncached + */ + if (type != MTRR_TYPE_WRCOMB && + (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { + printk(KERN_WARNING + "mtrr: only write-combining%s supported\n", + centaur_mcr_type ? " and uncacheable are" + : " is"); + return -EINVAL; + } + return 0; +} + +static struct mtrr_ops centaur_mtrr_ops = { + .vendor = X86_VENDOR_CENTAUR, +// .init = centaur_mcr_init, + .set = centaur_set_mcr, + .get = centaur_get_mcr, + .get_free_region = centaur_get_free_region, + .validate_add_page = centaur_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init centaur_init_mtrr(void) +{ + set_mtrr_ops(¢aur_mtrr_ops); + return 0; +} + +//arch_initcall(centaur_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c new file mode 100644 index 00000000000..2287d4863a8 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -0,0 +1,380 @@ +#include <linux/init.h> +#include <linux/mm.h> +#include <asm/mtrr.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/processor-cyrix.h> +#include "mtrr.h" + +int arr3_protected; + +static void +cyrix_get_arr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long flags; + unsigned char arr, ccr3, rcr, shift; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* Save flags and disable interrupts */ + local_irq_save(flags); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ((unsigned char *) base)[3] = getCx86(arr); + ((unsigned char *) base)[2] = getCx86(arr + 1); + ((unsigned char *) base)[1] = getCx86(arr + 2); + rcr = getCx86(CX86_RCR_BASE + reg); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + /* Enable interrupts if it was enabled previously */ + local_irq_restore(flags); + shift = ((unsigned char *) base)[1] & 0x0f; + *base >>= PAGE_SHIFT; + + /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 + * Note: shift==0xf means 4G, this is unsupported. + */ + if (shift) + *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); + else + *size = 0; + + /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ + if (reg < 7) { + switch (rcr) { + case 1: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRBACK; + break; + case 9: + *type = MTRR_TYPE_WRCOMB; + break; + case 24: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } else { + switch (rcr) { + case 0: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRCOMB; + break; + case 9: + *type = MTRR_TYPE_WRBACK; + break; + case 25: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } +} + +static int +cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free ARR. + <base> The starting (base) address of the region. + <size> The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i; + mtrr_type ltype; + unsigned long lbase, lsize; + + switch (replace_reg) { + case 7: + if (size < 0x40) + break; + case 6: + case 5: + case 4: + return replace_reg; + case 3: + if (arr3_protected) + break; + case 2: + case 1: + case 0: + return replace_reg; + } + /* If we are to set up a region >32M then look at ARR7 immediately */ + if (size > 0x2000) { + cyrix_get_arr(7, &lbase, &lsize, <ype); + if (lsize == 0) + return 7; + /* Else try ARR0-ARR6 first */ + } else { + for (i = 0; i < 7; i++) { + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((i == 3) && arr3_protected) + continue; + if (lsize == 0) + return i; + } + /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((lsize == 0) && (size >= 0x40)) + return i; + } + return -ENOSPC; +} + +static u32 cr4 = 0; +static u32 ccr3; + +static void prepare_set(void) +{ + u32 cr0; + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + /* Cyrix ARRs - everything else were excluded at the top */ + ccr3 = getCx86(CX86_CCR3); + + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); + +} + +static void post_set(void) +{ + /* Flush caches and TLBs */ + wbinvd(); + + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); +} + +static void cyrix_set_arr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned char arr, arr_type, arr_size; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ + if (reg >= 7) + size >>= 6; + + size &= 0x7fff; /* make sure arr_size <= 14 */ + for (arr_size = 0; size; arr_size++, size >>= 1) ; + + if (reg < 7) { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 1; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 9; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 24; + break; + default: + arr_type = 8; + break; + } + } else { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 0; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 8; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 25; + break; + default: + arr_type = 9; + break; + } + } + + prepare_set(); + + base <<= PAGE_SHIFT; + setCx86(arr, ((unsigned char *) &base)[3]); + setCx86(arr + 1, ((unsigned char *) &base)[2]); + setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); + setCx86(CX86_RCR_BASE + reg, arr_type); + + post_set(); +} + +typedef struct { + unsigned long base; + unsigned long size; + mtrr_type type; +} arr_state_t; + +static arr_state_t arr_state[8] = { + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} +}; + +static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; + +static void cyrix_set_all(void) +{ + int i; + + prepare_set(); + + /* the CCRs are not contiguous */ + for (i = 0; i < 4; i++) + setCx86(CX86_CCR0 + i, ccr_state[i]); + for (; i < 7; i++) + setCx86(CX86_CCR4 + i, ccr_state[i]); + for (i = 0; i < 8; i++) + cyrix_set_arr(i, arr_state[i].base, + arr_state[i].size, arr_state[i].type); + + post_set(); +} + +#if 0 +/* + * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection + * with the SMM (System Management Mode) mode. So we need the following: + * Check whether SMI_LOCK (CCR3 bit 0) is set + * if it is set, write a warning message: ARR3 cannot be changed! + * (it cannot be changed until the next processor reset) + * if it is reset, then we can change it, set all the needed bits: + * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) + * - disable access to SMM memory (CCR1 bit 2 reset) + * - disable SMM mode (CCR1 bit 1 reset) + * - disable write protection of ARR3 (CCR6 bit 1 reset) + * - (maybe) disable ARR3 + * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) + */ +static void __init +cyrix_arr_init(void) +{ + struct set_mtrr_context ctxt; + unsigned char ccr[7]; + int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; +#ifdef CONFIG_SMP + int i; +#endif + + /* flush cache and enable MAPEN */ + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + /* Save all CCRs locally */ + ccr[0] = getCx86(CX86_CCR0); + ccr[1] = getCx86(CX86_CCR1); + ccr[2] = getCx86(CX86_CCR2); + ccr[3] = ctxt.ccr3; + ccr[4] = getCx86(CX86_CCR4); + ccr[5] = getCx86(CX86_CCR5); + ccr[6] = getCx86(CX86_CCR6); + + if (ccr[3] & 1) { + ccrc[3] = 1; + arr3_protected = 1; + } else { + /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and + * access to SMM memory through ARR3 (bit 7). + */ + if (ccr[1] & 0x80) { + ccr[1] &= 0x7f; + ccrc[1] |= 0x80; + } + if (ccr[1] & 0x04) { + ccr[1] &= 0xfb; + ccrc[1] |= 0x04; + } + if (ccr[1] & 0x02) { + ccr[1] &= 0xfd; + ccrc[1] |= 0x02; + } + arr3_protected = 0; + if (ccr[6] & 0x02) { + ccr[6] &= 0xfd; + ccrc[6] = 1; /* Disable write protection of ARR3 */ + setCx86(CX86_CCR6, ccr[6]); + } + /* Disable ARR3. This is safe now that we disabled SMM. */ + /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ + } + /* If we changed CCR1 in memory, change it in the processor, too. */ + if (ccrc[1]) + setCx86(CX86_CCR1, ccr[1]); + + /* Enable ARR usage by the processor */ + if (!(ccr[5] & 0x20)) { + ccr[5] |= 0x20; + ccrc[5] = 1; + setCx86(CX86_CCR5, ccr[5]); + } +#ifdef CONFIG_SMP + for (i = 0; i < 7; i++) + ccr_state[i] = ccr[i]; + for (i = 0; i < 8; i++) + cyrix_get_arr(i, + &arr_state[i].base, &arr_state[i].size, + &arr_state[i].type); +#endif + + set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ + + if (ccrc[5]) + printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); + if (ccrc[3]) + printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); +/* + if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); + if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); + if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); +*/ + if (ccrc[6]) + printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); +} +#endif + +static struct mtrr_ops cyrix_mtrr_ops = { + .vendor = X86_VENDOR_CYRIX, +// .init = cyrix_arr_init, + .set_all = cyrix_set_all, + .set = cyrix_set_arr, + .get = cyrix_get_arr, + .get_free_region = cyrix_get_free_region, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init cyrix_init_mtrr(void) +{ + set_mtrr_ops(&cyrix_mtrr_ops); + return 0; +} + +//arch_initcall(cyrix_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c new file mode 100644 index 00000000000..56f64e34829 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -0,0 +1,509 @@ +/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong + because MTRRs can span upto 40 bits (36bits on most modern x86) */ +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <asm/io.h> +#include <asm/mtrr.h> +#include <asm/msr.h> +#include <asm/system.h> +#include <asm/cpufeature.h> +#include <asm/tlbflush.h> +#include "mtrr.h" + +struct mtrr_state { + struct mtrr_var_range *var_ranges; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +struct fixed_range_block { + int base_msr; /* start address of an MTRR block */ + int ranges; /* number of MTRRs in this block */ +}; + +static struct fixed_range_block fixed_range_blocks[] = { + { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ + { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ + { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ + {} +}; + +static unsigned long smp_changes_mask; +static struct mtrr_state mtrr_state = {}; + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "mtrr." + +static int mtrr_show; +module_param_named(show, mtrr_show, bool, 0); + +/* Get the MSR pair relating to a var range */ +static void +get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) +{ + rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); +} + +static void +get_fixed_ranges(mtrr_type * frs) +{ + unsigned int *p = (unsigned int *) frs; + int i; + + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) + rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); + for (i = 0; i < 8; i++) + rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); +} + +void mtrr_save_fixed_ranges(void *info) +{ + if (cpu_has_mtrr) + get_fixed_ranges(mtrr_state.fixed_ranges); +} + +static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) +{ + unsigned i; + + for (i = 0; i < 8; ++i, ++types, base += step) + printk(KERN_INFO "MTRR %05X-%05X %s\n", + base, base + step - 1, mtrr_attrib_to_str(*types)); +} + +/* Grab all of the MTRR state for this CPU into *state */ +void __init get_mtrr_state(void) +{ + unsigned int i; + struct mtrr_var_range *vrs; + unsigned lo, dummy; + + if (!mtrr_state.var_ranges) { + mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), + GFP_KERNEL); + if (!mtrr_state.var_ranges) + return; + } + vrs = mtrr_state.var_ranges; + + rdmsr(MTRRcap_MSR, lo, dummy); + mtrr_state.have_fixed = (lo >> 8) & 1; + + for (i = 0; i < num_var_ranges; i++) + get_mtrr_var_range(i, &vrs[i]); + if (mtrr_state.have_fixed) + get_fixed_ranges(mtrr_state.fixed_ranges); + + rdmsr(MTRRdefType_MSR, lo, dummy); + mtrr_state.def_type = (lo & 0xff); + mtrr_state.enabled = (lo & 0xc00) >> 10; + + if (mtrr_show) { + int high_width; + + printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + if (mtrr_state.have_fixed) { + printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); + print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); + for (i = 0; i < 2; ++i) + print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + for (i = 0; i < 8; ++i) + print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + } + printk(KERN_INFO "MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); + high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { + if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) + printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + else + printk(KERN_INFO "MTRR %u disabled\n", i); + } + } +} + +/* Some BIOS's are fucked and don't set all MTRRs the same! */ +void __init mtrr_state_warn(void) +{ + unsigned long mask = smp_changes_mask; + + if (!mask) + return; + if (mask & MTRR_CHANGE_MASK_FIXED) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_VARIABLE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_DEFTYPE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); + printk(KERN_INFO "mtrr: corrected configuration.\n"); +} + +/* Doesn't attempt to pass an error out to MTRR users + because it's quite complicated in some cases and probably not + worth it because the best error handling is to ignore it. */ +void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) +{ + if (wrmsr_safe(msr, a, b) < 0) + printk(KERN_ERR + "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + smp_processor_id(), msr, a, b); +} + +/** + * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs + * see AMD publication no. 24593, chapter 3.2.1 for more information + */ +static inline void k8_enable_fixed_iorrs(void) +{ + unsigned lo, hi; + + rdmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_K8_SYSCFG, lo + | K8_MTRRFIXRANGE_DRAM_ENABLE + | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); +} + +/** + * Checks and updates an fixed-range MTRR if it differs from the value it + * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. + * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information + * \param msr MSR address of the MTTR which should be checked and updated + * \param changed pointer which indicates whether the MTRR needed to be changed + * \param msrwords pointer to the MSR values which the MSR should have + */ +static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +{ + unsigned lo, hi; + + rdmsr(msr, lo, hi); + + if (lo != msrwords[0] || hi != msrwords[1]) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 15 && + ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) + k8_enable_fixed_iorrs(); + mtrr_wrmsr(msr, msrwords[0], msrwords[1]); + *changed = TRUE; + } +} + +int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + <base> The starting (base) address of the region. + <size> The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +static void generic_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + unsigned int mask_lo, mask_hi, base_lo, base_hi; + + rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); + if ((mask_lo & 0x800) == 0) { + /* Invalid (i.e. free) range */ + *base = 0; + *size = 0; + *type = 0; + return; + } + + rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); + + /* Work out the shifted address mask. */ + mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) + | mask_lo >> PAGE_SHIFT; + + /* This works correctly if size is a power of two, i.e. a + contiguous range. */ + *size = -mask_lo; + *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *type = base_lo & 0xff; +} + +/** + * Checks and updates the fixed-range MTRRs if they differ from the saved set + * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() + */ +static int set_fixed_ranges(mtrr_type * frs) +{ + unsigned long long *saved = (unsigned long long *) frs; + int changed = FALSE; + int block=-1, range; + + while (fixed_range_blocks[++block].ranges) + for (range=0; range < fixed_range_blocks[block].ranges; range++) + set_fixed_range(fixed_range_blocks[block].base_msr + range, + &changed, (unsigned int *) saved++); + + return changed; +} + +/* Set the MSR pair relating to a var range. Returns TRUE if + changes are made */ +static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +{ + unsigned int lo, hi; + int changed = FALSE; + + rdmsr(MTRRphysBase_MSR(index), lo, hi); + if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) + || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + changed = TRUE; + } + + rdmsr(MTRRphysMask_MSR(index), lo, hi); + + if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) + || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + changed = TRUE; + } + return changed; +} + +static u32 deftype_lo, deftype_hi; + +static unsigned long set_mtrr_state(void) +/* [SUMMARY] Set the MTRR state for this CPU. + <state> The MTRR state information to read. + <ctxt> Some relevant CPU context. + [NOTE] The CPU must already be in a safe state for MTRR changes. + [RETURNS] 0 if no changes made, else a mask indication what was changed. +*/ +{ + unsigned int i; + unsigned long change_mask = 0; + + for (i = 0; i < num_var_ranges; i++) + if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) + change_mask |= MTRR_CHANGE_MASK_VARIABLE; + + if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) + change_mask |= MTRR_CHANGE_MASK_FIXED; + + /* Set_mtrr_restore restores the old value of MTRRdefType, + so to set it we fiddle with the saved value */ + if ((deftype_lo & 0xff) != mtrr_state.def_type + || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { + deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); + change_mask |= MTRR_CHANGE_MASK_DEFTYPE; + } + + return change_mask; +} + + +static unsigned long cr4 = 0; +static DEFINE_SPINLOCK(set_atomicity_lock); + +/* + * Since we are disabling the cache don't allow any interrupts - they + * would run extremely slow and would only increase the pain. The caller must + * ensure that local interrupts are disabled and are reenabled after post_set() + * has been called. + */ + +static void prepare_set(void) __acquires(set_atomicity_lock) +{ + unsigned long cr0; + + /* Note that this is not ideal, since the cache is only flushed/disabled + for this CPU while the MTRRs are changed, but changing this requires + more invasive changes to the way the kernel boots */ + + spin_lock(&set_atomicity_lock); + + /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ + cr0 = read_cr0() | 0x40000000; /* set CD flag */ + write_cr0(cr0); + wbinvd(); + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ + __flush_tlb(); + + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); +} + +static void post_set(void) __releases(set_atomicity_lock) +{ + /* Flush TLBs (no need to flush caches - they are disabled) */ + __flush_tlb(); + + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); + spin_unlock(&set_atomicity_lock); +} + +static void generic_set_all(void) +{ + unsigned long mask, count; + unsigned long flags; + + local_irq_save(flags); + prepare_set(); + + /* Actually set the state */ + mask = set_mtrr_state(); + + post_set(); + local_irq_restore(flags); + + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) { + if (mask & 0x01) + set_bit(count, &smp_changes_mask); + mask >>= 1; + } + +} + +static void generic_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + <reg> The register to set. + <base> The base address of the region. + <size> The size of the region. If this is 0 the region is disabled. + <type> The type of the region. + <do_safe> If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; + + local_irq_save(flags); + prepare_set(); + + if (size == 0) { + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); + } else { + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); + } + + post_set(); + local_irq_restore(flags); +} + +int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + unsigned long lbase, last; + + /* For Intel PPro stepping <= 7, must be 4 MiB aligned + and not touch 0x70000000->0x7003FFFF */ + if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask <= 7) { + if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + return -EINVAL; + } + if (!(base + size < 0x70000 || base > 0x7003F) && + (type == MTRR_TYPE_WRCOMB + || type == MTRR_TYPE_WRBACK)) { + printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + return -EINVAL; + } + } + + /* Check upper bits of base and last are equal and lower bits are 0 + for base and 1 for last */ + last = base + size - 1; + for (lbase = base; !(lbase & 1) && (last & 1); + lbase = lbase >> 1, last = last >> 1) ; + if (lbase != last) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", + base, size); + return -EINVAL; + } + return 0; +} + + +static int generic_have_wrcomb(void) +{ + unsigned long config, dummy; + rdmsr(MTRRcap_MSR, config, dummy); + return (config & (1 << 10)); +} + +int positive_have_wrcomb(void) +{ + return 1; +} + +/* generic structure... + */ +struct mtrr_ops generic_mtrr_ops = { + .use_intel_if = 1, + .set_all = generic_set_all, + .get = generic_get_mtrr, + .get_free_region = generic_get_free_region, + .set = generic_set_mtrr, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = generic_have_wrcomb, +}; diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c new file mode 100644 index 00000000000..c7d8f175674 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -0,0 +1,439 @@ +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/capability.h> +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <asm/uaccess.h> + +#define LINE_SIZE 80 + +#include <asm/mtrr.h> +#include "mtrr.h" + +/* RED-PEN: this is accessed without any locking */ +extern unsigned int *usage_table; + + +#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) + +static const char *const mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; + +const char *mtrr_attrib_to_str(int x) +{ + return (x <= 6) ? mtrr_strings[x] : "?"; +} + +#ifdef CONFIG_PROC_FS + +static int +mtrr_file_add(unsigned long base, unsigned long size, + unsigned int type, char increment, struct file *file, int page) +{ + int reg, max; + unsigned int *fcount = FILE_FCOUNT(file); + + max = num_var_ranges; + if (fcount == NULL) { + fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); + if (!fcount) + return -ENOMEM; + FILE_FCOUNT(file) = fcount; + } + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_add_page(base, size, type, 1); + if (reg >= 0) + ++fcount[reg]; + return reg; +} + +static int +mtrr_file_del(unsigned long base, unsigned long size, + struct file *file, int page) +{ + int reg; + unsigned int *fcount = FILE_FCOUNT(file); + + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_del_page(-1, base, size); + if (reg < 0) + return reg; + if (fcount == NULL) + return reg; + if (fcount[reg] < 1) + return -EINVAL; + --fcount[reg]; + return reg; +} + +/* RED-PEN: seq_file can seek now. this is ignored. */ +static ssize_t +mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) +/* Format of control line: + "base=%Lx size=%Lx type=%s" OR: + "disable=%d" +*/ +{ + int i, err; + unsigned long reg; + unsigned long long base, size; + char *ptr; + char line[LINE_SIZE]; + size_t linelen; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!len) + return -EINVAL; + memset(line, 0, LINE_SIZE); + if (len > LINE_SIZE) + len = LINE_SIZE; + if (copy_from_user(line, buf, len - 1)) + return -EFAULT; + linelen = strlen(line); + ptr = line + linelen - 1; + if (linelen && *ptr == '\n') + *ptr = '\0'; + if (!strncmp(line, "disable=", 8)) { + reg = simple_strtoul(line + 8, &ptr, 0); + err = mtrr_del_page(reg, 0, 0); + if (err < 0) + return err; + return len; + } + if (strncmp(line, "base=", 5)) + return -EINVAL; + base = simple_strtoull(line + 5, &ptr, 0); + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "size=", 5)) + return -EINVAL; + size = simple_strtoull(ptr + 5, &ptr, 0); + if ((base & 0xfff) || (size & 0xfff)) + return -EINVAL; + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "type=", 5)) + return -EINVAL; + ptr += 5; + for (; isspace(*ptr); ++ptr) ; + for (i = 0; i < MTRR_NUM_TYPES; ++i) { + if (strcmp(ptr, mtrr_strings[i])) + continue; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = + mtrr_add_page((unsigned long) base, (unsigned long) size, i, + 1); + if (err < 0) + return err; + return len; + } + return -EINVAL; +} + +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) +{ + int err = 0; + mtrr_type type; + unsigned long size; + struct mtrr_sentry sentry; + struct mtrr_gentry gentry; + void __user *arg = (void __user *) __arg; + + switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { + default: + return -ENOTTY; + case MTRRIOC_ADD_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 0); + break; + case MTRRIOC_SET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 0); + break; + case MTRRIOC_KILL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + + /* Hide entries that go above 4GB */ + if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.base <<= PAGE_SHIFT; + gentry.size = size << PAGE_SHIFT; + gentry.type = type; + } + + break; + case MTRRIOC_ADD_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 1); + break; + case MTRRIOC_SET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 1); + break; + case MTRRIOC_KILL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del_page(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_PAGE_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + /* Hide entries that would overflow */ + if (size != (__typeof__(gentry.size))size) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.size = size; + gentry.type = type; + } + break; + } + + if (err) + return err; + + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_to_user(arg, &gentry, sizeof gentry)) + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); + break; + } +#endif + } + return err; +} + +static int +mtrr_close(struct inode *ino, struct file *file) +{ + int i, max; + unsigned int *fcount = FILE_FCOUNT(file); + + if (fcount != NULL) { + max = num_var_ranges; + for (i = 0; i < max; ++i) { + while (fcount[i] > 0) { + mtrr_del(i, 0, 0); + --fcount[i]; + } + } + kfree(fcount); + FILE_FCOUNT(file) = NULL; + } + return single_release(ino, file); +} + +static int mtrr_seq_show(struct seq_file *seq, void *offset); + +static int mtrr_open(struct inode *inode, struct file *file) +{ + if (!mtrr_if) + return -EIO; + if (!mtrr_if->get) + return -ENXIO; + return single_open(file, mtrr_seq_show, NULL); +} + +static const struct file_operations mtrr_fops = { + .owner = THIS_MODULE, + .open = mtrr_open, + .read = seq_read, + .llseek = seq_lseek, + .write = mtrr_write, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, + .release = mtrr_close, +}; + + +static struct proc_dir_entry *proc_root_mtrr; + + +static int mtrr_seq_show(struct seq_file *seq, void *offset) +{ + char factor; + int i, max, len; + mtrr_type type; + unsigned long base, size; + + len = 0; + max = num_var_ranges; + for (i = 0; i < max; i++) { + mtrr_if->get(i, &base, &size, &type); + if (size == 0) + usage_table[i] = 0; + else { + if (size < (0x100000 >> PAGE_SHIFT)) { + /* less than 1MB */ + factor = 'K'; + size <<= PAGE_SHIFT - 10; + } else { + factor = 'M'; + size >>= 20 - PAGE_SHIFT; + } + /* RED-PEN: base can be > 32bit */ + len += seq_printf(seq, + "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + i, base, base >> (20 - PAGE_SHIFT), size, factor, + mtrr_attrib_to_str(type), usage_table[i]); + } + } + return 0; +} + +static int __init mtrr_if_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if ((!cpu_has(c, X86_FEATURE_MTRR)) && + (!cpu_has(c, X86_FEATURE_K6_MTRR)) && + (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && + (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) + return -ENODEV; + + proc_root_mtrr = + create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); + if (proc_root_mtrr) { + proc_root_mtrr->owner = THIS_MODULE; + proc_root_mtrr->proc_fops = &mtrr_fops; + } + return 0; +} + +arch_initcall(mtrr_if_init); +#endif /* CONFIG_PROC_FS */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c new file mode 100644 index 00000000000..c48b6fea5ab --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -0,0 +1,768 @@ +/* Generic MTRR (Memory Type Range Register) driver. + + Copyright (C) 1997-2000 Richard Gooch + Copyright (c) 2002 Patrick Mochel + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. + + Source: "Pentium Pro Family Developer's Manual, Volume 3: + Operating System Writer's Guide" (Intel document number 242692), + section 11.11.7 + + This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> + on 6-7 March 2002. + Source: Intel Architecture Software Developers Manual, Volume 3: + System Programming Guide; Section 9.11. (1997 edition - PPro). +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/cpu.h> +#include <linux/mutex.h> + +#include <asm/mtrr.h> + +#include <asm/uaccess.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include "mtrr.h" + +u32 num_var_ranges = 0; + +unsigned int *usage_table; +static DEFINE_MUTEX(mtrr_mutex); + +u64 size_or_mask, size_and_mask; + +static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; + +struct mtrr_ops * mtrr_if = NULL; + +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + +#ifndef CONFIG_X86_64 +extern int arr3_protected; +#else +#define arr3_protected 0 +#endif + +void set_mtrr_ops(struct mtrr_ops * ops) +{ + if (ops->vendor && ops->vendor < X86_VENDOR_NUM) + mtrr_ops[ops->vendor] = ops; +} + +/* Returns non-zero if we have the write-combining memory type */ +static int have_wrcomb(void) +{ + struct pci_dev *dev; + u8 rev; + + if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { + /* ServerWorks LE chipsets < rev 6 have problems with write-combining + Don't allow it and leave room for other chipsets to be tagged */ + if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev <= 5) { + printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + } + /* Intel 450NX errata # 23. Non ascending cacheline evictions to + write combining memory may resulting in data corruption */ + if (dev->vendor == PCI_VENDOR_ID_INTEL && + dev->device == PCI_DEVICE_ID_INTEL_82451NX) { + printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + pci_dev_put(dev); + } + return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); +} + +/* This function returns the number of variable MTRRs */ +static void __init set_num_var_ranges(void) +{ + unsigned long config = 0, dummy; + + if (use_intel()) { + rdmsr(MTRRcap_MSR, config, dummy); + } else if (is_cpu(AMD)) + config = 2; + else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) + config = 8; + num_var_ranges = config & 0xff; +} + +static void __init init_table(void) +{ + int i, max; + + max = num_var_ranges; + if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) + == NULL) { + printk(KERN_ERR "mtrr: could not allocate\n"); + return; + } + for (i = 0; i < max; i++) + usage_table[i] = 1; +} + +struct set_mtrr_data { + atomic_t count; + atomic_t gate; + unsigned long smp_base; + unsigned long smp_size; + unsigned int smp_reg; + mtrr_type smp_type; +}; + +#ifdef CONFIG_SMP + +static void ipi_handler(void *info) +/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. + [RETURNS] Nothing. +*/ +{ + struct set_mtrr_data *data = info; + unsigned long flags; + + local_irq_save(flags); + + atomic_dec(&data->count); + while(!atomic_read(&data->gate)) + cpu_relax(); + + /* The master has cleared me to execute */ + if (data->smp_reg != ~0U) + mtrr_if->set(data->smp_reg, data->smp_base, + data->smp_size, data->smp_type); + else + mtrr_if->set_all(); + + atomic_dec(&data->count); + while(atomic_read(&data->gate)) + cpu_relax(); + + atomic_dec(&data->count); + local_irq_restore(flags); +} + +#endif + +static inline int types_compatible(mtrr_type type1, mtrr_type type2) { + return type1 == MTRR_TYPE_UNCACHABLE || + type2 == MTRR_TYPE_UNCACHABLE || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || + (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); +} + +/** + * set_mtrr - update mtrrs on all processors + * @reg: mtrr in question + * @base: mtrr base + * @size: mtrr size + * @type: mtrr type + * + * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: + * + * 1. Send IPI to do the following: + * 2. Disable Interrupts + * 3. Wait for all procs to do so + * 4. Enter no-fill cache mode + * 5. Flush caches + * 6. Clear PGE bit + * 7. Flush all TLBs + * 8. Disable all range registers + * 9. Update the MTRRs + * 10. Enable all range registers + * 11. Flush all TLBs and caches again + * 12. Enter normal cache mode and reenable caching + * 13. Set PGE + * 14. Wait for buddies to catch up + * 15. Enable interrupts. + * + * What does that mean for us? Well, first we set data.count to the number + * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait + * until it hits 0 and proceed. We set the data.gate flag and reset data.count. + * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it + * differently, so we call mtrr_if->set() callback and let them take care of it. + * When they're done, they again decrement data->count and wait for data.gate to + * be reset. + * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. + * Everyone then enables interrupts and we all continue on. + * + * Note that the mechanism is the same for UP systems, too; all the SMP stuff + * becomes nops. + */ +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data; + unsigned long flags; + + data.smp_reg = reg; + data.smp_base = base; + data.smp_size = size; + data.smp_type = type; + atomic_set(&data.count, num_booting_cpus() - 1); + /* make sure data.count is visible before unleashing other CPUs */ + smp_wmb(); + atomic_set(&data.gate,0); + + /* Start the ball rolling on other CPUs */ + if (smp_call_function(ipi_handler, &data, 1, 0) != 0) + panic("mtrr: timed out waiting for other CPUs\n"); + + local_irq_save(flags); + + while(atomic_read(&data.count)) + cpu_relax(); + + /* ok, reset count and toggle gate */ + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,1); + + /* do our MTRR business */ + + /* HACK! + * We use this same function to initialize the mtrrs on boot. + * The state of the boot cpu's mtrrs has been saved, and we want + * to replicate across all the APs. + * If we're doing that @reg is set to something special... + */ + if (reg != ~0U) + mtrr_if->set(reg,base,size,type); + + /* wait for the others */ + while(atomic_read(&data.count)) + cpu_relax(); + + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,0); + + /* + * Wait here for everyone to have seen the gate change + * So we're the last ones to touch 'data' + */ + while(atomic_read(&data.count)) + cpu_relax(); + + local_irq_restore(flags); +} + +/** + * mtrr_add_page - Add a memory type region + * @base: Physical base address of region in pages (in units of 4 kB!) + * @size: Physical size of region in pages (4 kB) + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + int i, replace, error; + mtrr_type ltype; + unsigned long lbase, lsize; + + if (!mtrr_if) + return -ENXIO; + + if ((error = mtrr_if->validate_add_page(base,size,type))) + return error; + + if (type >= MTRR_NUM_TYPES) { + printk(KERN_WARNING "mtrr: type: %u invalid\n", type); + return -EINVAL; + } + + /* If the type is WC, check that this processor supports it */ + if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -ENOSYS; + } + + if (!size) { + printk(KERN_WARNING "mtrr: zero sized request\n"); + return -EINVAL; + } + + if (base & size_or_mask || size & size_or_mask) { + printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); + return -EINVAL; + } + + error = -EINVAL; + replace = -1; + + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + /* Search for existing MTRR */ + mutex_lock(&mtrr_mutex); + for (i = 0; i < num_var_ranges; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) + continue; + /* At this point we know there is some kind of overlap/enclosure */ + if (base < lbase || base + size - 1 > lbase + lsize - 1) { + if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { + /* New region encloses an existing region */ + if (type == ltype) { + replace = replace == -1 ? i : -2; + continue; + } + else if (types_compatible(type, ltype)) + continue; + } + printk(KERN_WARNING + "mtrr: 0x%lx000,0x%lx000 overlaps existing" + " 0x%lx000,0x%lx000\n", base, size, lbase, + lsize); + goto out; + } + /* New region is enclosed by an existing region */ + if (ltype != type) { + if (types_compatible(type, ltype)) + continue; + printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + base, size, mtrr_attrib_to_str(ltype), + mtrr_attrib_to_str(type)); + goto out; + } + if (increment) + ++usage_table[i]; + error = i; + goto out; + } + /* Search for an empty MTRR */ + i = mtrr_if->get_free_region(base, size, replace); + if (i >= 0) { + set_mtrr(i, base, size, type); + if (likely(replace < 0)) + usage_table[i] = 1; + else { + usage_table[i] = usage_table[replace] + !!increment; + if (unlikely(replace != i)) { + set_mtrr(replace, 0, 0, 0); + usage_table[replace] = 0; + } + } + } else + printk(KERN_INFO "mtrr: no more MTRRs available\n"); + error = i; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} + +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + +/** + * mtrr_add - Add a memory type region + * @base: Physical base address of region + * @size: Physical size of region + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int +mtrr_add(unsigned long base, unsigned long size, unsigned int type, + char increment) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, + increment); +} + +/** + * mtrr_del_page - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int mtrr_del_page(int reg, unsigned long base, unsigned long size) +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + int error = -EINVAL; + + if (!mtrr_if) + return -ENXIO; + + max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + mutex_lock(&mtrr_mutex); + if (reg < 0) { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lbase == base && lsize == size) { + reg = i; + break; + } + } + if (reg < 0) { + printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, + size); + goto out; + } + } + if (reg >= max) { + printk(KERN_WARNING "mtrr: register: %d too big\n", reg); + goto out; + } + if (is_cpu(CYRIX) && !use_intel()) { + if ((reg == 3) && arr3_protected) { + printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); + goto out; + } + } + mtrr_if->get(reg, &lbase, &lsize, <ype); + if (lsize < 1) { + printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); + goto out; + } + if (usage_table[reg] < 1) { + printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); + goto out; + } + if (--usage_table[reg] < 1) + set_mtrr(reg, 0, 0, 0); + error = reg; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} +/** + * mtrr_del - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int +mtrr_del(int reg, unsigned long base, unsigned long size) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(mtrr_add); +EXPORT_SYMBOL(mtrr_del); + +/* HACK ALERT! + * These should be called implicitly, but we can't yet until all the initcall + * stuff is done... + */ +extern void amd_init_mtrr(void); +extern void cyrix_init_mtrr(void); +extern void centaur_init_mtrr(void); + +static void __init init_ifs(void) +{ +#ifndef CONFIG_X86_64 + amd_init_mtrr(); + cyrix_init_mtrr(); + centaur_init_mtrr(); +#endif +} + +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ +struct mtrr_value { + mtrr_type ltype; + unsigned long lbase; + unsigned long lsize; +}; + +static struct mtrr_value * mtrr_state; + +static int mtrr_save(struct sys_device * sysdev, pm_message_t state) +{ + int i; + int size = num_var_ranges * sizeof(struct mtrr_value); + + mtrr_state = kzalloc(size,GFP_ATOMIC); + if (!mtrr_state) + return -ENOMEM; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, + &mtrr_state[i].lbase, + &mtrr_state[i].lsize, + &mtrr_state[i].ltype); + } + return 0; +} + +static int mtrr_restore(struct sys_device * sysdev) +{ + int i; + + for (i = 0; i < num_var_ranges; i++) { + if (mtrr_state[i].lsize) + set_mtrr(i, + mtrr_state[i].lbase, + mtrr_state[i].lsize, + mtrr_state[i].ltype); + } + kfree(mtrr_state); + return 0; +} + + + +static struct sysdev_driver mtrr_sysdev_driver = { + .suspend = mtrr_save, + .resume = mtrr_restore, +}; + + +/** + * mtrr_bp_init - initialize mtrrs on the boot CPU + * + * This needs to be called early; before any of the other CPUs are + * initialized (i.e. before smp_init()). + * + */ +void __init mtrr_bp_init(void) +{ + init_ifs(); + + if (cpu_has_mtrr) { + mtrr_if = &generic_mtrr_ops; + size_or_mask = 0xff000000; /* 36 bits */ + size_and_mask = 0x00f00000; + + /* This is an AMD specific MSR, but we assume(hope?) that + Intel will implement it to when they extend the address + bus of the Xeon. */ + if (cpuid_eax(0x80000000) >= 0x80000008) { + u32 phys_addr; + phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + + size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfffff00000ULL; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && + boot_cpu_data.x86 == 6) { + /* VIA C* family have Intel style MTRRs, but + don't support PAE */ + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + } else { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (cpu_has_k6_mtrr) { + /* Pre-Athlon (K6) AMD CPU MTRRs */ + mtrr_if = mtrr_ops[X86_VENDOR_AMD]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CENTAUR: + if (cpu_has_centaur_mcr) { + mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CYRIX: + if (cpu_has_cyrix_arr) { + mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + default: + break; + } + } + + if (mtrr_if) { + set_num_var_ranges(); + init_table(); + if (use_intel()) + get_mtrr_state(); + } +} + +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +/** + * Save current fixed-range MTRR state of the BSP + */ +void mtrr_save_state(void) +{ + int cpu = get_cpu(); + + if (cpu == 0) + mtrr_save_fixed_ranges(NULL); + else + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); + put_cpu(); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h new file mode 100644 index 00000000000..289dfe6030e --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -0,0 +1,98 @@ +/* + * local mtrr defines. + */ + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define MTRRcap_MSR 0x0fe +#define MTRRdefType_MSR 0x2ff + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MTRRfix64K_00000_MSR 0x250 +#define MTRRfix16K_80000_MSR 0x258 +#define MTRRfix16K_A0000_MSR 0x259 +#define MTRRfix4K_C0000_MSR 0x268 +#define MTRRfix4K_C8000_MSR 0x269 +#define MTRRfix4K_D0000_MSR 0x26a +#define MTRRfix4K_D8000_MSR 0x26b +#define MTRRfix4K_E0000_MSR 0x26c +#define MTRRfix4K_E8000_MSR 0x26d +#define MTRRfix4K_F0000_MSR 0x26e +#define MTRRfix4K_F8000_MSR 0x26f + +#define MTRR_CHANGE_MASK_FIXED 0x01 +#define MTRR_CHANGE_MASK_VARIABLE 0x02 +#define MTRR_CHANGE_MASK_DEFTYPE 0x04 + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +struct mtrr_ops { + u32 vendor; + u32 use_intel_if; +// void (*init)(void); + void (*set)(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + void (*set_all)(void); + + void (*get)(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type); + int (*get_free_region)(unsigned long base, unsigned long size, + int replace_reg); + int (*validate_add_page)(unsigned long base, unsigned long size, + unsigned int type); + int (*have_wrcomb)(void); +}; + +extern int generic_get_free_region(unsigned long base, unsigned long size, + int replace_reg); +extern int generic_validate_add_page(unsigned long base, unsigned long size, + unsigned int type); + +extern struct mtrr_ops generic_mtrr_ops; + +extern int positive_have_wrcomb(void); + +/* library functions for processor-specific routines */ +struct set_mtrr_context { + unsigned long flags; + unsigned long cr4val; + u32 deftype_lo; + u32 deftype_hi; + u32 ccr3; +}; + +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +void set_mtrr_done(struct set_mtrr_context *ctxt); +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); + +void get_mtrr_state(void); + +extern void set_mtrr_ops(struct mtrr_ops * ops); + +extern u64 size_or_mask, size_and_mask; +extern struct mtrr_ops * mtrr_if; + +#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) + +extern unsigned int num_var_ranges; + +void mtrr_state_warn(void); +const char *mtrr_attrib_to_str(int x); +void mtrr_wrmsr(unsigned, unsigned, unsigned); + diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c new file mode 100644 index 00000000000..49e20c2afcd --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -0,0 +1,79 @@ +#include <linux/mm.h> +#include <linux/init.h> +#include <asm/io.h> +#include <asm/mtrr.h> +#include <asm/msr.h> +#include <asm/processor-cyrix.h> +#include "mtrr.h" + + +/* Put the processor into a state where MTRRs can be safely set */ +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) +{ + unsigned int cr0; + + /* Disable interrupts locally */ + local_irq_save(ctxt->flags); + + if (use_intel() || is_cpu(CYRIX)) { + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + ctxt->cr4val = read_cr4(); + write_cr4(ctxt->cr4val & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + if (use_intel()) + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else were excluded at the top */ + ctxt->ccr3 = getCx86(CX86_CCR3); + } +} + +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) +{ + if (use_intel()) + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + ctxt->deftype_hi); + else if (is_cpu(CYRIX)) + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); +} + +/* Restore the processor after a set_mtrr_prepare */ +void set_mtrr_done(struct set_mtrr_context *ctxt) +{ + if (use_intel() || is_cpu(CYRIX)) { + + /* Flush caches and TLBs */ + wbinvd(); + + /* Restore MTRRdefType */ + if (use_intel()) + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ctxt->ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(ctxt->cr4val); + } + /* Re-enable interrupts locally (if enabled previously) */ + local_irq_restore(ctxt->flags); +} + diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c new file mode 100644 index 00000000000..961fbe1a748 --- /dev/null +++ b/arch/x86/kernel/cpu/nexgen.c @@ -0,0 +1,60 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/string.h> +#include <asm/processor.h> + +#include "cpu.h" + +/* + * Detect a NexGen CPU running without BIOS hypercode new enough + * to have CPUID. (Thanks to Herbert Oppmann) + */ + +static int __cpuinit deep_magic_nexgen_probe(void) +{ + int ret; + + __asm__ __volatile__ ( + " movw $0x5555, %%ax\n" + " xorw %%dx,%%dx\n" + " movw $2, %%cx\n" + " divw %%cx\n" + " movl $0, %%eax\n" + " jnz 1f\n" + " movl $1, %%eax\n" + "1:\n" + : "=a" (ret) : : "cx", "dx" ); + return ret; +} + +static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) +{ + c->x86_cache_size = 256; /* A few had 1 MB... */ +} + +static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) +{ + /* Detect NexGen with old hypercode */ + if ( deep_magic_nexgen_probe() ) { + strcpy(c->x86_vendor_id, "NexGenDriven"); + } +} + +static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { + .c_vendor = "Nexgen", + .c_ident = { "NexGenDriven" }, + .c_models = { + { .vendor = X86_VENDOR_NEXGEN, + .family = 5, + .model_names = { [1] = "Nx586" } + }, + }, + .c_init = init_nexgen, + .c_identify = nexgen_identify, +}; + +int __init nexgen_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c new file mode 100644 index 00000000000..93fecd4b03d --- /dev/null +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -0,0 +1,713 @@ +/* local apic based NMI watchdog for various CPUs. + This file also handles reservation of performance counters for coordination + with other users (like oprofile). + + Note that these events normally don't tick when the CPU idles. This means + the frequency varies with CPU load. + + Original code for K7/P6 written by Keith Owens */ + +#include <linux/percpu.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/smp.h> +#include <linux/nmi.h> +#include <asm/apic.h> +#include <asm/intel_arch_perfmon.h> + +struct nmi_watchdog_ctlblk { + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; + +/* Interface defining a CPU specific perfctr watchdog */ +struct wd_ops { + int (*reserve)(void); + void (*unreserve)(void); + int (*setup)(unsigned nmi_hz); + void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (*stop)(void); + unsigned perfctr; + unsigned evntsel; + u64 checkbit; +}; + +static struct wd_ops *wd_ops; + +/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) + */ +#define NMI_MAX_COUNTER_BITS 66 + +/* perfctr_nmi_owner tracks the ownership of the perfctr registers: + * evtsel_nmi_owner tracks the ownership of the event selection + * - different performance counters/ event selection may be reserved for + * different subsystems this reservation system just tries to coordinate + * things a little + */ +static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); +static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); + +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; +} + +/* converts an msr to an appropriate reservation bit */ +/* returns the bit offset of the event selection register */ +static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; + +} + +/* checks for a bit availability (hack for oprofile) */ +int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) +{ + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +/* checks the an msr for availability */ +int avail_to_resrv_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +int reserve_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, perfctr_nmi_owner)) + return 1; + return 0; +} + +void release_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, perfctr_nmi_owner); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, evntsel_nmi_owner)) + return 1; + return 0; +} + +void release_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, evntsel_nmi_owner); +} + +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); +EXPORT_SYMBOL(reserve_perfctr_nmi); +EXPORT_SYMBOL(release_perfctr_nmi); +EXPORT_SYMBOL(reserve_evntsel_nmi); +EXPORT_SYMBOL(release_evntsel_nmi); + +void disable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) + return; + + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + wd_ops->unreserve(); + + BUG_ON(atomic_read(&nmi_active) != 0); +} + +void enable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (!wd_ops) + return; + if (!wd_ops->reserve()) { + printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); + return; + } + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + touch_nmi_watchdog(); +} + +/* + * Activate the NMI watchdog via the local APIC. + */ + +static unsigned int adjust_for_32bit_ctr(unsigned int hz) +{ + u64 counter_val; + unsigned int retval = hz; + + /* + * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + counter_val = (u64)cpu_khz * 1000; + do_div(counter_val, retval); + if (counter_val > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + retval = count + 1; + } + return retval; +} + +static void +write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(perfctr_msr, 0 - count); +} + +static void write_watchdog_counter32(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsr(perfctr_msr, (u32)(-count), 0); +} + +/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface + nicely stable so there is not much variety */ + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void single_msr_stop_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int single_msr_reserve(void) +{ + if (!reserve_perfctr_nmi(wd_ops->perfctr)) + return 0; + + if (!reserve_evntsel_nmi(wd_ops->evntsel)) { + release_perfctr_nmi(wd_ops->perfctr); + return 0; + } + return 1; +} + +static void single_msr_unreserve(void) +{ + release_evntsel_nmi(wd_ops->evntsel); + release_perfctr_nmi(wd_ops->perfctr); +} + +static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops k7_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL<<47, +}; + +/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ + +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* P6/ARCH_PERFMON has 32 bit counter write */ + write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); +} + +static struct wd_ops p6_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL<<39, +}; + +/* Intel P4 performance counters. By far the most complicated of all. */ + +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +#define P4_CCCR_OVF (1<<31) + +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ + +static int setup_p4_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; + unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + +#ifdef CONFIG_SMP + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else +#endif + ht_num = 0; + + /* performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + } + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + return 1; +} + +static void stop_p4_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int p4_reserve(void) +{ + if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) + return 0; +#ifdef CONFIG_SMP + if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) + goto fail1; +#endif + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail2; + /* RED-PEN why is ESCR1 not reserved here? */ + return 1; + fail2: +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); + fail1: +#endif + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); + return 0; +} + +static void p4_unreserve(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); +#endif + release_evntsel_nmi(MSR_P4_CRU_ESCR0); + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); +} + +static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + unsigned dummy; + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops p4_wd_ops = { + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, + /* RED-PEN this is wrong for the other sibling */ + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL<<39, +}; + +/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully + all future Intel CPUs. */ + +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static int setup_intel_arch_watchdog(unsigned nmi_hz) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + return 1; +} + +static struct wd_ops intel_arch_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, +}; + +static struct wd_ops coreduo_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, +}; + +static void probe_nmi_watchdog(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && + boot_cpu_data.x86 != 16) + return; + wd_ops = &k7_wd_ops; + break; + case X86_VENDOR_INTEL: + /* Work around Core Duo (Yonah) errata AE49 where perfctr1 + doesn't have a working enable bit. */ + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { + wd_ops = &coreduo_wd_ops; + break; + } + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + wd_ops = &intel_arch_wd_ops; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + return; + + wd_ops = &p6_wd_ops; + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + return; + + wd_ops = &p4_wd_ops; + break; + default: + return; + } + break; + } +} + +/* Interface to nmi.c */ + +int lapic_watchdog_init(unsigned nmi_hz) +{ + if (!wd_ops) { + probe_nmi_watchdog(); + if (!wd_ops) + return -1; + + if (!wd_ops->reserve()) { + printk(KERN_ERR + "NMI watchdog: cannot reserve perfctrs\n"); + return -1; + } + } + + if (!(wd_ops->setup(nmi_hz))) { + printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", + raw_smp_processor_id()); + return -1; + } + + return 0; +} + +void lapic_watchdog_stop(void) +{ + if (wd_ops) + wd_ops->stop(); +} + +unsigned lapic_adjust_nmi_hz(unsigned hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) + hz = adjust_for_32bit_ctr(hz); + return hz; +} + +int lapic_wd_event(unsigned nmi_hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 ctr; + rdmsrl(wd->perfctr_msr, ctr); + if (ctr & wd_ops->checkbit) { /* perfctr still running? */ + return 0; + } + wd_ops->rearm(wd, nmi_hz); + return 1; +} + +int lapic_watchdog_ok(void) +{ + return wd_ops != NULL; +} diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c new file mode 100644 index 00000000000..1e31b6caffb --- /dev/null +++ b/arch/x86/kernel/cpu/proc.c @@ -0,0 +1,192 @@ +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/string.h> +#include <asm/semaphore.h> +#include <linux/seq_file.h> +#include <linux/cpufreq.h> + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in <asm/cpufeature.h>. + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu/<cpu_nr>/cpuid instead. + */ + static const char * const x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + static const char * const x86_power_flags[] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* constant_tsc - moved to flags */ + /* nothing */ + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + n, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", + freq / 1000, (freq % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); +#ifdef CONFIG_X86_HT + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + for (i = 0; i < 32; i++) + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c new file mode 100644 index 00000000000..200fb3f9ebf --- /dev/null +++ b/arch/x86/kernel/cpu/transmeta.c @@ -0,0 +1,116 @@ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include "cpu.h" + +static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) +{ + unsigned int cap_mask, uk, max, dummy; + unsigned int cms_rev1, cms_rev2; + unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; + char cpu_info[65]; + + get_model_name(c); /* Same as AMD/Cyrix */ + display_cacheinfo(c); + + /* Print CMS and CPU revision */ + max = cpuid_eax(0x80860000); + cpu_rev = 0; + if ( max >= 0x80860001 ) { + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + if (cpu_rev != 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + (cpu_rev >> 24) & 0xff, + (cpu_rev >> 16) & 0xff, + (cpu_rev >> 8) & 0xff, + cpu_rev & 0xff, + cpu_freq); + } + } + if ( max >= 0x80860002 ) { + cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); + if (cpu_rev == 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + new_cpu_rev, cpu_freq); + } + printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + (cms_rev1 >> 24) & 0xff, + (cms_rev1 >> 16) & 0xff, + (cms_rev1 >> 8) & 0xff, + cms_rev1 & 0xff, + cms_rev2); + } + if ( max >= 0x80860006 ) { + cpuid(0x80860003, + (void *)&cpu_info[0], + (void *)&cpu_info[4], + (void *)&cpu_info[8], + (void *)&cpu_info[12]); + cpuid(0x80860004, + (void *)&cpu_info[16], + (void *)&cpu_info[20], + (void *)&cpu_info[24], + (void *)&cpu_info[28]); + cpuid(0x80860005, + (void *)&cpu_info[32], + (void *)&cpu_info[36], + (void *)&cpu_info[40], + (void *)&cpu_info[44]); + cpuid(0x80860006, + (void *)&cpu_info[48], + (void *)&cpu_info[52], + (void *)&cpu_info[56], + (void *)&cpu_info[60]); + cpu_info[64] = '\0'; + printk(KERN_INFO "CPU: %s\n", cpu_info); + } + + /* Unhide possibly hidden capability flags */ + rdmsr(0x80860004, cap_mask, uk); + wrmsr(0x80860004, ~0, uk); + c->x86_capability[0] = cpuid_edx(0x00000001); + wrmsr(0x80860004, cap_mask, uk); + + /* All Transmeta CPUs have a constant TSC */ + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + /* If we can run i686 user-space code, call us an i686 */ +#define USER686 ((1 << X86_FEATURE_TSC)|\ + (1 << X86_FEATURE_CX8)|\ + (1 << X86_FEATURE_CMOV)) + if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) + c->x86 = 6; + +#ifdef CONFIG_SYSCTL + /* randomize_va_space slows us down enormously; + it probably triggers retranslation of x86->native bytecode */ + randomize_va_space = 0; +#endif +} + +static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + +static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { + .c_vendor = "Transmeta", + .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_init = init_transmeta, + .c_identify = transmeta_identify, +}; + +int __init transmeta_init_cpu(void) +{ + cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c new file mode 100644 index 00000000000..a7a4e75bdcd --- /dev/null +++ b/arch/x86/kernel/cpu/umc.c @@ -0,0 +1,26 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <asm/processor.h> +#include "cpu.h" + +/* UMC chips appear to be only either 386 or 486, so no special init takes place. + */ + +static struct cpu_dev umc_cpu_dev __cpuinitdata = { + .c_vendor = "UMC", + .c_ident = { "UMC UMC UMC" }, + .c_models = { + { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = + { + [1] = "U5D", + [2] = "U5S", + } + }, + }, +}; + +int __init umc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; + return 0; +} |