56 files changed, 17536 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
new file mode 100644
index 00000000000..778396c78d6
--- /dev/null
+++ b/arch/x86/kernel/cpu/Makefile
@@ -0,0 +1,20 @@
+#
+# Makefile for x86-compatible CPU details and quirks
+#
+
+obj-y	:=	common.o proc.o bugs.o
+
+obj-y	+=	amd.o
+obj-y	+=	cyrix.o
+obj-y	+=	centaur.o
+obj-y	+=	transmeta.o
+obj-y	+=	intel.o intel_cacheinfo.o addon_cpuid_features.o
+obj-y	+=	nexgen.o
+obj-y	+=	umc.o
+
+obj-$(CONFIG_X86_MCE)	+=	mcheck/
+
+obj-$(CONFIG_MTRR)	+= 	mtrr/
+obj-$(CONFIG_CPU_FREQ)	+=	cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
new file mode 100644
index 00000000000..3e91d3ee26e
--- /dev/null
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -0,0 +1,50 @@
+
+/*
+ *	Routines to indentify additional cpu features that are scattered in
+ *	cpuid space.
+ */
+
+#include <linux/cpu.h>
+
+#include <asm/processor.h>
+
+struct cpuid_bit {
+	u16 feature;
+	u8 reg;
+	u8 bit;
+	u32 level;
+};
+
+enum cpuid_regs {
+	CR_EAX = 0,
+	CR_ECX,
+	CR_EDX,
+	CR_EBX
+};
+
+void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
+{
+	u32 max_level;
+	u32 regs[4];
+	const struct cpuid_bit *cb;
+
+	static const struct cpuid_bit cpuid_bits[] = {
+		{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+		{ 0, 0, 0, 0 }
+	};
+
+	for (cb = cpuid_bits; cb->feature; cb++) {
+
+		/* Verify that the level is valid */
+		max_level = cpuid_eax(cb->level & 0xffff0000);
+		if (max_level < cb->level ||
+		    max_level > (cb->level | 0xffff))
+			continue;
+
+		cpuid(cb->level, &regs[CR_EAX], &regs[CR_EBX],
+			&regs[CR_ECX], &regs[CR_EDX]);
+
+		if (regs[cb->reg] & (1 << cb->bit))
+			set_bit(cb->feature, c->x86_capability);
+	}
+}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
new file mode 100644
index 00000000000..dcf6bbb1c7c
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd.c
@@ -0,0 +1,337 @@
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/apic.h>
+
+#include "cpu.h"
+
+/*
+ *	B step AMD K6 before B 9730xxxx have hardware bugs that can cause
+ *	misexecution of code under Linux. Owners of such processors should
+ *	contact AMD for precise details and a CPU swap.
+ *
+ *	See	http://www.multimania.com/poulot/k6bug.html
+ *		http://www.amd.com/K6/k6docs/revgd.html
+ *
+ *	The following test is erm.. interesting. AMD neglected to up
+ *	the chip setting when fixing the bug but they also tweaked some
+ *	performance at the same time..
+ */
+ 
+extern void vide(void);
+__asm__(".align 4\nvide: ret");
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#define ENABLE_C1E_MASK         0x18000000
+#define CPUID_PROCESSOR_SIGNATURE       1
+#define CPUID_XFAM              0x0ff00000
+#define CPUID_XFAM_K8           0x00000000
+#define CPUID_XFAM_10H          0x00100000
+#define CPUID_XFAM_11H          0x00200000
+#define CPUID_XMOD              0x000f0000
+#define CPUID_XMOD_REV_F        0x00040000
+
+/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
+static __cpuinit int amd_apic_timer_broken(void)
+{
+	u32 lo, hi;
+	u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	switch (eax & CPUID_XFAM) {
+	case CPUID_XFAM_K8:
+		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
+			break;
+	case CPUID_XFAM_10H:
+	case CPUID_XFAM_11H:
+		rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
+		if (lo & ENABLE_C1E_MASK)
+			return 1;
+                break;
+        default:
+                /* err on the side of caution */
+		return 1;
+        }
+	return 0;
+}
+#endif
+
+int force_mwait __cpuinitdata;
+
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int mbytes = num_physpages >> (20-PAGE_SHIFT);
+	int r;
+
+#ifdef CONFIG_SMP
+	unsigned long long value;
+
+	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
+	 * bit 6 of msr C001_0015
+	 *
+	 * Errata 63 for SH-B3 steppings
+	 * Errata 122 for all steppings (F+ have it disabled by default)
+	 */
+	if (c->x86 == 15) {
+		rdmsrl(MSR_K7_HWCR, value);
+		value |= 1 << 6;
+		wrmsrl(MSR_K7_HWCR, value);
+	}
+#endif
+
+	/*
+	 *	FIXME: We should handle the K5 here. Set up the write
+	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
+	 *	no bus pipeline)
+	 */
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+	
+	r = get_model_name(c);
+
+	switch(c->x86)
+	{
+		case 4:
+		/*
+		 * General Systems BIOSen alias the cpu frequency registers
+		 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+		 * drivers subsequently pokes it, and changes the CPU speed.
+		 * Workaround : Remove the unneeded alias.
+		 */
+#define CBAR		(0xfffc) /* Configuration Base Address  (32-bit) */
+#define CBAR_ENB	(0x80000000)
+#define CBAR_KEY	(0X000000CB)
+			if (c->x86_model==9 || c->x86_model == 10) {
+				if (inl (CBAR) & CBAR_ENB)
+					outl (0 | CBAR_KEY, CBAR);
+			}
+			break;
+		case 5:
+			if( c->x86_model < 6 )
+			{
+				/* Based on AMD doc 20734R - June 2000 */
+				if ( c->x86_model == 0 ) {
+					clear_bit(X86_FEATURE_APIC, c->x86_capability);
+					set_bit(X86_FEATURE_PGE, c->x86_capability);
+				}
+				break;
+			}
+			
+			if ( c->x86_model == 6 && c->x86_mask == 1 ) {
+				const int K6_BUG_LOOP = 1000000;
+				int n;
+				void (*f_vide)(void);
+				unsigned long d, d2;
+				
+				printk(KERN_INFO "AMD K6 stepping B detected - ");
+				
+				/*
+				 * It looks like AMD fixed the 2.6.2 bug and improved indirect 
+				 * calls at the same time.
+				 */
+
+				n = K6_BUG_LOOP;
+				f_vide = vide;
+				rdtscl(d);
+				while (n--) 
+					f_vide();
+				rdtscl(d2);
+				d = d2-d;
+
+				if (d > 20*K6_BUG_LOOP) 
+					printk("system stability may be impaired when more than 32 MB are used.\n");
+				else 
+					printk("probably OK (after B9730xxxx).\n");
+				printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
+			}
+
+			/* K6 with old style WHCR */
+			if (c->x86_model < 8 ||
+			   (c->x86_model== 8 && c->x86_mask < 8)) {
+				/* We can only write allocate on the low 508Mb */
+				if(mbytes>508)
+					mbytes=508;
+
+				rdmsr(MSR_K6_WHCR, l, h);
+				if ((l&0x0000FFFF)==0) {
+					unsigned long flags;
+					l=(1<<0)|((mbytes/4)<<1);
+					local_irq_save(flags);
+					wbinvd();
+					wrmsr(MSR_K6_WHCR, l, h);
+					local_irq_restore(flags);
+					printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+						mbytes);
+				}
+				break;
+			}
+
+			if ((c->x86_model == 8 && c->x86_mask >7) ||
+			     c->x86_model == 9 || c->x86_model == 13) {
+				/* The more serious chips .. */
+
+				if(mbytes>4092)
+					mbytes=4092;
+
+				rdmsr(MSR_K6_WHCR, l, h);
+				if ((l&0xFFFF0000)==0) {
+					unsigned long flags;
+					l=((mbytes>>2)<<22)|(1<<16);
+					local_irq_save(flags);
+					wbinvd();
+					wrmsr(MSR_K6_WHCR, l, h);
+					local_irq_restore(flags);
+					printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+						mbytes);
+				}
+
+				/*  Set MTRR capability flag if appropriate */
+				if (c->x86_model == 13 || c->x86_model == 9 ||
+				   (c->x86_model == 8 && c->x86_mask >= 8))
+					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
+				break;
+			}
+
+			if (c->x86_model == 10) {
+				/* AMD Geode LX is model 10 */
+				/* placeholder for any needed mods */
+				break;
+			}
+			break;
+		case 6: /* An Athlon/Duron */
+ 
+			/* Bit 15 of Athlon specific MSR 15, needs to be 0
+ 			 * to enable SSE on Palomino/Morgan/Barton CPU's.
+			 * If the BIOS didn't enable it already, enable it here.
+			 */
+			if (c->x86_model >= 6 && c->x86_model <= 10) {
+				if (!cpu_has(c, X86_FEATURE_XMM)) {
+					printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+					rdmsr(MSR_K7_HWCR, l, h);
+					l &= ~0x00008000;
+					wrmsr(MSR_K7_HWCR, l, h);
+					set_bit(X86_FEATURE_XMM, c->x86_capability);
+				}
+			}
+
+			/* It's been determined by AMD that Athlons since model 8 stepping 1
+			 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
+			 * As per AMD technical note 27212 0.2
+			 */
+			if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
+				rdmsr(MSR_K7_CLK_CTL, l, h);
+				if ((l & 0xfff00000) != 0x20000000) {
+					printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
+						((l & 0x000fffff)|0x20000000));
+					wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
+				}
+			}
+			break;
+	}
+
+	switch (c->x86) {
+	case 15:
+	/* Use K8 tuning for Fam10h and Fam11h */
+	case 0x10:
+	case 0x11:
+		set_bit(X86_FEATURE_K8, c->x86_capability);
+		break;
+	case 6:
+		set_bit(X86_FEATURE_K7, c->x86_capability); 
+		break;
+	}
+	if (c->x86 >= 6)
+		set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
+
+	display_cacheinfo(c);
+
+	if (cpuid_eax(0x80000000) >= 0x80000008) {
+		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+	}
+
+	if (cpuid_eax(0x80000000) >= 0x80000007) {
+		c->x86_power = cpuid_edx(0x80000007);
+		if (c->x86_power & (1<<8))
+			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+	}
+
+#ifdef CONFIG_X86_HT
+	/*
+	 * On a AMD multi core setup the lower bits of the APIC id
+	 * distingush the cores.
+	 */
+	if (c->x86_max_cores > 1) {
+		int cpu = smp_processor_id();
+		unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+		if (bits == 0) {
+			while ((1 << bits) < c->x86_max_cores)
+				bits++;
+		}
+		c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
+		c->phys_proc_id >>= bits;
+		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
+		       cpu, c->x86_max_cores, c->cpu_core_id);
+	}
+#endif
+
+	if (cpuid_eax(0x80000000) >= 0x80000006) {
+		if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000))
+			num_cache_leaves = 4;
+		else
+			num_cache_leaves = 3;
+	}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (amd_apic_timer_broken())
+		local_apic_timer_disabled = 1;
+#endif
+
+	if (c->x86 == 0x10 && !force_mwait)
+		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
+
+	/* K6s reports MCEs but don't actually have all the MSRs */
+	if (c->x86 < 6)
+		clear_bit(X86_FEATURE_MCE, c->x86_capability);
+}
+
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* AMD errata T13 (order #21922) */
+	if ((c->x86 == 6)) {
+		if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
+			size = 64;
+		if (c->x86_model == 4 &&
+		    (c->x86_mask==0 || c->x86_mask==1))	/* Tbird rev A1/A2 */
+			size = 256;
+	}
+	return size;
+}
+
+static struct cpu_dev amd_cpu_dev __cpuinitdata = {
+	.c_vendor	= "AMD",
+	.c_ident 	= { "AuthenticAMD" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
+		  {
+			  [3] = "486 DX/2",
+			  [7] = "486 DX/2-WB",
+			  [8] = "486 DX/4", 
+			  [9] = "486 DX/4-WB", 
+			  [14] = "Am5x86-WT",
+			  [15] = "Am5x86-WB" 
+		  }
+		},
+	},
+	.c_init		= init_amd,
+	.c_size_cache	= amd_size_cache,
+};
+
+int __init amd_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
new file mode 100644
index 00000000000..59266f03d1c
--- /dev/null
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -0,0 +1,192 @@
+/*
+ *  arch/i386/cpu/bugs.c
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *
+ *  Cyrix stuff, June 1998 by:
+ *	- Rafael R. Reilova (moved everything from head.S),
+ *        <rreilova@ececs.uc.edu>
+ *	- Channing Corn (tests & fixes),
+ *	- Andrew D. Balsa (code cleanup).
+ */
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <asm/bugs.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/paravirt.h>
+#include <asm/alternative.h>
+
+static int __init no_halt(char *s)
+{
+	boot_cpu_data.hlt_works_ok = 0;
+	return 1;
+}
+
+__setup("no-hlt", no_halt);
+
+static int __init mca_pentium(char *s)
+{
+	mca_pentium_flag = 1;
+	return 1;
+}
+
+__setup("mca-pentium", mca_pentium);
+
+static int __init no_387(char *s)
+{
+	boot_cpu_data.hard_math = 0;
+	write_cr0(0xE | read_cr0());
+	return 1;
+}
+
+__setup("no387", no_387);
+
+static double __initdata x = 4195835.0;
+static double __initdata y = 3145727.0;
+
+/*
+ * This used to check for exceptions..
+ * However, it turns out that to support that,
+ * the XMM trap handlers basically had to
+ * be buggy. So let's have a correct XMM trap
+ * handler, and forget about printing out
+ * some status at boot.
+ *
+ * We should really only care about bugs here
+ * anyway. Not features.
+ */
+static void __init check_fpu(void)
+{
+	if (!boot_cpu_data.hard_math) {
+#ifndef CONFIG_MATH_EMULATION
+		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
+		printk(KERN_EMERG "Giving up.\n");
+		for (;;) ;
+#endif
+		return;
+	}
+
+/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
+	/* Test for the divl bug.. */
+	__asm__("fninit\n\t"
+		"fldl %1\n\t"
+		"fdivl %2\n\t"
+		"fmull %2\n\t"
+		"fldl %1\n\t"
+		"fsubp %%st,%%st(1)\n\t"
+		"fistpl %0\n\t"
+		"fwait\n\t"
+		"fninit"
+		: "=m" (*&boot_cpu_data.fdiv_bug)
+		: "m" (*&x), "m" (*&y));
+	if (boot_cpu_data.fdiv_bug)
+		printk("Hmm, FPU with FDIV bug.\n");
+}
+
+static void __init check_hlt(void)
+{
+	if (paravirt_enabled())
+		return;
+
+	printk(KERN_INFO "Checking 'hlt' instruction... ");
+	if (!boot_cpu_data.hlt_works_ok) {
+		printk("disabled\n");
+		return;
+	}
+	halt();
+	halt();
+	halt();
+	halt();
+	printk("OK.\n");
+}
+
+/*
+ *	Most 386 processors have a bug where a POPAD can lock the
+ *	machine even from user space.
+ */
+
+static void __init check_popad(void)
+{
+#ifndef CONFIG_X86_POPAD_OK
+	int res, inp = (int) &res;
+
+	printk(KERN_INFO "Checking for popad bug... ");
+	__asm__ __volatile__(
+	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
+	  : "=&a" (res)
+	  : "d" (inp)
+	  : "ecx", "edi" );
+	/* If this fails, it means that any user program may lock the CPU hard. Too bad. */
+	if (res != 12345678) printk( "Buggy.\n" );
+		        else printk( "OK.\n" );
+#endif
+}
+
+/*
+ * Check whether we are able to run this kernel safely on SMP.
+ *
+ * - In order to run on a i386, we need to be compiled for i386
+ *   (for due to lack of "invlpg" and working WP on a i386)
+ * - In order to run on anything without a TSC, we need to be
+ *   compiled for a i486.
+ * - In order to support the local APIC on a buggy Pentium machine,
+ *   we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
+ *   which happens implicitly if compiled for a Pentium or lower
+ *   (unless an advanced selection of CPU features is used) as an
+ *   otherwise config implies a properly working local APIC without
+ *   the need to do extra reads from the APIC.
+*/
+
+static void __init check_config(void)
+{
+/*
+ * We'd better not be a i386 if we're configured to use some
+ * i486+ only features! (WP works in supervisor mode and the
+ * new "invlpg" and "bswap" instructions)
+ */
+#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
+	if (boot_cpu_data.x86 == 3)
+		panic("Kernel requires i486+ for 'invlpg' and other features");
+#endif
+
+/*
+ * If we configured ourselves for a TSC, we'd better have one!
+ */
+#ifdef CONFIG_X86_TSC
+	if (!cpu_has_tsc && !tsc_disable)
+		panic("Kernel compiled for Pentium+, requires TSC feature!");
+#endif
+
+/*
+ * If we were told we had a good local APIC, check for buggy Pentia,
+ * i.e. all B steppings and the C2 stepping of P54C when using their
+ * integrated APIC (see 11AP erratum in "Pentium Processor
+ * Specification Update").
+ */
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
+	    && cpu_has_apic
+	    && boot_cpu_data.x86 == 5
+	    && boot_cpu_data.x86_model == 2
+	    && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
+		panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
+#endif
+}
+
+
+void __init check_bugs(void)
+{
+	identify_boot_cpu();
+#ifndef CONFIG_SMP
+	printk("CPU: ");
+	print_cpu_info(&boot_cpu_data);
+#endif
+	check_config();
+	check_fpu();
+	check_hlt();
+	check_popad();
+	init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+	alternative_instructions();
+}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
new file mode 100644
index 00000000000..473eac883c7
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -0,0 +1,471 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/e820.h>
+#include <asm/mtrr.h>
+#include "cpu.h"
+
+#ifdef CONFIG_X86_OOSTORE
+
+static u32 __cpuinit power2(u32 x)
+{
+	u32 s=1;
+	while(s<=x)
+		s<<=1;
+	return s>>=1;
+}
+
+
+/*
+ *	Set up an actual MCR
+ */
+ 
+static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+{
+	u32 lo, hi;
+	
+	hi = base & ~0xFFF;
+	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
+	lo &= ~0xFFF;		/* Remove the ctrl value bits */
+	lo |= key;		/* Attribute we wish to set */
+	wrmsr(reg+MSR_IDT_MCR0, lo, hi);
+	mtrr_centaur_report_mcr(reg, lo, hi);	/* Tell the mtrr driver */
+}
+
+/*
+ *	Figure what we can cover with MCR's
+ *
+ *	Shortcut: We know you can't put 4Gig of RAM on a winchip
+ */
+
+static u32 __cpuinit ramtop(void)		/* 16388 */
+{
+	int i;
+	u32 top = 0;
+	u32 clip = 0xFFFFFFFFUL;
+	
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long start, end;
+
+		if (e820.map[i].addr > 0xFFFFFFFFUL)
+			continue;
+		/*
+		 *	Don't MCR over reserved space. Ignore the ISA hole
+		 *	we frob around that catastrophy already
+		 */
+		 			
+		if (e820.map[i].type == E820_RESERVED)
+		{
+			if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
+				clip = e820.map[i].addr;
+			continue;
+		}
+		start = e820.map[i].addr;
+		end = e820.map[i].addr + e820.map[i].size;
+		if (start >= end)
+			continue;
+		if (end > top)
+			top = end;
+	}
+	/* Everything below 'top' should be RAM except for the ISA hole.
+	   Because of the limited MCR's we want to map NV/ACPI into our
+	   MCR range for gunk in RAM 
+	   
+	   Clip might cause us to MCR insufficient RAM but that is an
+	   acceptable failure mode and should only bite obscure boxes with
+	   a VESA hole at 15Mb
+	   
+	   The second case Clip sometimes kicks in is when the EBDA is marked
+	   as reserved. Again we fail safe with reasonable results
+	*/
+	
+	if(top>clip)
+		top=clip;
+		
+	return top;
+}
+
+/*
+ *	Compute a set of MCR's to give maximum coverage
+ */
+
+static int __cpuinit centaur_mcr_compute(int nr, int key)
+{
+	u32 mem = ramtop();
+	u32 root = power2(mem);
+	u32 base = root;
+	u32 top = root;
+	u32 floor = 0;
+	int ct = 0;
+	
+	while(ct<nr)
+	{
+		u32 fspace = 0;
+
+		/*
+		 *	Find the largest block we will fill going upwards
+		 */
+
+		u32 high = power2(mem-top);	
+
+		/*
+		 *	Find the largest block we will fill going downwards
+		 */
+
+		u32 low = base/2;
+
+		/*
+		 *	Don't fill below 1Mb going downwards as there
+		 *	is an ISA hole in the way.
+		 */		
+		 
+		if(base <= 1024*1024)
+			low = 0;
+			
+		/*
+		 *	See how much space we could cover by filling below
+		 *	the ISA hole
+		 */
+		 
+		if(floor == 0)
+			fspace = 512*1024;
+		else if(floor ==512*1024)
+			fspace = 128*1024;
+
+		/* And forget ROM space */
+		
+		/*
+		 *	Now install the largest coverage we get
+		 */
+		 
+		if(fspace > high && fspace > low)
+		{
+			centaur_mcr_insert(ct, floor, fspace, key);
+			floor += fspace;
+		}
+		else if(high > low)
+		{
+			centaur_mcr_insert(ct, top, high, key);
+			top += high;
+		}
+		else if(low > 0)
+		{
+			base -= low;
+			centaur_mcr_insert(ct, base, low, key);
+		}
+		else break;
+		ct++;
+	}
+	/*
+	 *	We loaded ct values. We now need to set the mask. The caller
+	 *	must do this bit.
+	 */
+	 
+	return ct;
+}
+
+static void __cpuinit centaur_create_optimal_mcr(void)
+{
+	int i;
+	/*
+	 *	Allocate up to 6 mcrs to mark as much of ram as possible
+	 *	as write combining and weak write ordered.
+	 *
+	 *	To experiment with: Linux never uses stack operations for 
+	 *	mmio spaces so we could globally enable stack operation wc
+	 *
+	 *	Load the registers with type 31 - full write combining, all
+	 *	writes weakly ordered.
+	 */
+	int used = centaur_mcr_compute(6, 31);
+
+	/*
+	 *	Wipe unused MCRs
+	 */
+	 
+	for(i=used;i<8;i++)
+		wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+static void __cpuinit winchip2_create_optimal_mcr(void)
+{
+	u32 lo, hi;
+	int i;
+
+	/*
+	 *	Allocate up to 6 mcrs to mark as much of ram as possible
+	 *	as write combining, weak store ordered.
+	 *
+	 *	Load the registers with type 25
+	 *		8	-	weak write ordering
+	 *		16	-	weak read ordering
+	 *		1	-	write combining
+	 */
+
+	int used = centaur_mcr_compute(6, 25);
+	
+	/*
+	 *	Mark the registers we are using.
+	 */
+	 
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	for(i=0;i<used;i++)
+		lo|=1<<(9+i);
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	
+	/*
+	 *	Wipe unused MCRs
+	 */
+	 
+	for(i=used;i<8;i++)
+		wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+/*
+ *	Handle the MCR key on the Winchip 2.
+ */
+
+static void __cpuinit winchip2_unprotect_mcr(void)
+{
+	u32 lo, hi;
+	u32 key;
+	
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	lo&=~0x1C0;	/* blank bits 8-6 */
+	key = (lo>>17) & 7;
+	lo |= key<<6;	/* replace with unlock key */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+static void __cpuinit winchip2_protect_mcr(void)
+{
+	u32 lo, hi;
+	
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	lo&=~0x1C0;	/* blank bits 8-6 */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+#endif /* CONFIG_X86_OOSTORE */
+
+#define ACE_PRESENT	(1 << 6)
+#define ACE_ENABLED	(1 << 7)
+#define ACE_FCR		(1 << 28)	/* MSR_VIA_FCR */
+
+#define RNG_PRESENT	(1 << 2)
+#define RNG_ENABLED	(1 << 3)
+#define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
+
+static void __cpuinit init_c3(struct cpuinfo_x86 *c)
+{
+	u32  lo, hi;
+
+	/* Test for Centaur Extended Feature Flags presence */
+	if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+		u32 tmp = cpuid_edx(0xC0000001);
+
+		/* enable ACE unit, if present and disabled */
+		if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+			rdmsr (MSR_VIA_FCR, lo, hi);
+			lo |= ACE_FCR;		/* enable ACE unit */
+			wrmsr (MSR_VIA_FCR, lo, hi);
+			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+		}
+
+		/* enable RNG unit, if present and disabled */
+		if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+			rdmsr (MSR_VIA_RNG, lo, hi);
+			lo |= RNG_ENABLE;	/* enable RNG unit */
+			wrmsr (MSR_VIA_RNG, lo, hi);
+			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+		}
+
+		/* store Centaur Extended Feature Flags as
+		 * word 5 of the CPU capability bit array
+		 */
+		c->x86_capability[5] = cpuid_edx(0xC0000001);
+	}
+
+	/* Cyrix III family needs CX8 & PGE explicity enabled. */
+	if (c->x86_model >=6 && c->x86_model <= 9) {
+		rdmsr (MSR_VIA_FCR, lo, hi);
+		lo |= (1<<1 | 1<<7);
+		wrmsr (MSR_VIA_FCR, lo, hi);
+		set_bit(X86_FEATURE_CX8, c->x86_capability);
+	}
+
+	/* Before Nehemiah, the C3's had 3dNOW! */
+	if (c->x86_model >=6 && c->x86_model <9)
+		set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+
+	get_model_name(c);
+	display_cacheinfo(c);
+}
+
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+	enum {
+		ECX8=1<<1,
+		EIERRINT=1<<2,
+		DPM=1<<3,
+		DMCE=1<<4,
+		DSTPCLK=1<<5,
+		ELINEAR=1<<6,
+		DSMC=1<<7,
+		DTLOCK=1<<8,
+		EDCTLB=1<<8,
+		EMMX=1<<9,
+		DPDC=1<<11,
+		EBRPRED=1<<12,
+		DIC=1<<13,
+		DDC=1<<14,
+		DNA=1<<15,
+		ERETSTK=1<<16,
+		E2MMX=1<<19,
+		EAMD3D=1<<20,
+	};
+
+	char *name;
+	u32  fcr_set=0;
+	u32  fcr_clr=0;
+	u32  lo,hi,newlo;
+	u32  aa,bb,cc,dd;
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+
+	switch (c->x86) {
+
+		case 5:
+			switch(c->x86_model) {
+			case 4:
+				name="C6";
+				fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+				fcr_clr=DPDC;
+				printk(KERN_NOTICE "Disabling bugged TSC.\n");
+				clear_bit(X86_FEATURE_TSC, c->x86_capability);
+#ifdef CONFIG_X86_OOSTORE
+				centaur_create_optimal_mcr();
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+					
+				   The C6 original lacks weak read order 
+				   
+				   Note 0x120 is write only on Winchip 1 */
+				   
+				wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
+#endif				
+				break;
+			case 8:
+				switch(c->x86_mask) {
+				default:
+					name="2";
+					break;
+				case 7 ... 9:
+					name="2A";
+					break;
+				case 10 ... 15:
+					name="2B";
+					break;
+				}
+				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+				fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+				winchip2_unprotect_mcr();
+				winchip2_create_optimal_mcr();
+				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+				*/
+				lo|=31;				
+				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				winchip2_protect_mcr();
+#endif
+				break;
+			case 9:
+				name="3";
+				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+				fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+				winchip2_unprotect_mcr();
+				winchip2_create_optimal_mcr();
+				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				/* Enable
+					write combining on non-stack, non-string
+					write combining on string, all types
+					weak write ordering 
+				*/
+				lo|=31;				
+				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+				winchip2_protect_mcr();
+#endif
+				break;
+			default:
+				name="??";
+			}
+
+			rdmsr(MSR_IDT_FCR1, lo, hi);
+			newlo=(lo|fcr_set) & (~fcr_clr);
+
+			if (newlo!=lo) {
+				printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
+				wrmsr(MSR_IDT_FCR1, newlo, hi );
+			} else {
+				printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
+			}
+			/* Emulate MTRRs using Centaur's MCR. */
+			set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
+			/* Report CX8 */
+			set_bit(X86_FEATURE_CX8, c->x86_capability);
+			/* Set 3DNow! on Winchip 2 and above. */
+			if (c->x86_model >=8)
+				set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+			/* See if we can find out some more. */
+			if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
+				/* Yes, we can. */
+				cpuid(0x80000005,&aa,&bb,&cc,&dd);
+				/* Add L1 data and code cache sizes. */
+				c->x86_cache_size = (cc>>24)+(dd>>24);
+			}
+			sprintf( c->x86_model_id, "WinChip %s", name );
+			break;
+
+		case 6:
+			init_c3(c);
+			break;
+	}
+}
+
+static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* VIA C3 CPUs (670-68F) need further shifting. */
+	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
+		size >>= 8;
+
+	/* VIA also screwed up Nehemiah stepping 1, and made
+	   it return '65KB' instead of '64KB'
+	   - Note, it seems this may only be in engineering samples. */
+	if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
+		size -=1;
+
+	return size;
+}
+
+static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Centaur",
+	.c_ident	= { "CentaurHauls" },
+	.c_init		= init_centaur,
+	.c_size_cache	= centaur_size_cache,
+};
+
+int __init centaur_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
new file mode 100644
index 00000000000..d506201d397
--- /dev/null
+++ b/arch/x86/kernel/cpu/common.c
@@ -0,0 +1,733 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+#include "cpu.h"
+
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+	[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
+	[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+	/*
+	 * Segments used for calling PnP BIOS have byte granularity.
+	 * They code segments and data segments have fixed 64k limits,
+	 * the transfer segment sizes are set at run time.
+	 */
+	[GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	[GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
+	[GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	/*
+	 * The APM segments have byte granularity and their bases
+	 * are set at run time.  All have 64k limits.
+	 */
+	[GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	/* 16-bit code */
+	[GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
+	[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
+
+	[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
+	[GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern int disable_pse;
+
+static void __cpuinit default_init(struct cpuinfo_x86 * c)
+{
+	/* Not much we can do here... */
+	/* Check if at least it has cpuid */
+	if (c->cpuid_level == -1) {
+		/* No cpuid. It must be an ancient CPU */
+		if (c->x86 == 4)
+			strcpy(c->x86_model_id, "486");
+		else if (c->x86 == 3)
+			strcpy(c->x86_model_id, "386");
+	}
+}
+
+static struct cpu_dev __cpuinitdata default_cpu = {
+	.c_init	= default_init,
+	.c_vendor = "Unknown",
+};
+static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
+
+static int __init cachesize_setup(char *str)
+{
+	get_option (&str, &cachesize_override);
+	return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+	unsigned int *v;
+	char *p, *q;
+
+	if (cpuid_eax(0x80000000) < 0x80000004)
+		return 0;
+
+	v = (unsigned int *) c->x86_model_id;
+	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+	c->x86_model_id[48] = 0;
+
+	/* Intel chips right-justify this string for some dumb reason;
+	   undo that brain damage */
+	p = q = &c->x86_model_id[0];
+	while ( *p == ' ' )
+	     p++;
+	if ( p != q ) {
+	     while ( *p )
+		  *q++ = *p++;
+	     while ( q <= &c->x86_model_id[48] )
+		  *q++ = '\0';	/* Zero-pad the rest */
+	}
+
+	return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int n, dummy, ecx, edx, l2size;
+
+	n = cpuid_eax(0x80000000);
+
+	if (n >= 0x80000005) {
+		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size=(ecx>>24)+(edx>>24);	
+	}
+
+	if (n < 0x80000006)	/* Some chips just has a large L1. */
+		return;
+
+	ecx = cpuid_ecx(0x80000006);
+	l2size = ecx >> 16;
+	
+	/* do processor-specific cache resizing */
+	if (this_cpu->c_size_cache)
+		l2size = this_cpu->c_size_cache(c,l2size);
+
+	/* Allow user to override all this if necessary. */
+	if (cachesize_override != -1)
+		l2size = cachesize_override;
+
+	if ( l2size == 0 )
+		return;		/* Again, no L2 cache is possible */
+
+	c->x86_cache_size = l2size;
+
+	printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+	       l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+
+/* Look up CPU names by table lookup. */
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+{
+	struct cpu_model_info *info;
+
+	if ( c->x86_model >= 16 )
+		return NULL;	/* Range check */
+
+	if (!this_cpu)
+		return NULL;
+
+	info = this_cpu->c_models;
+
+	while (info && info->family) {
+		if (info->family == c->x86)
+			return info->model_names[c->x86_model];
+		info++;
+	}
+	return NULL;		/* Not found */
+}
+
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+	char *v = c->x86_vendor_id;
+	int i;
+	static int printed;
+
+	for (i = 0; i < X86_VENDOR_NUM; i++) {
+		if (cpu_devs[i]) {
+			if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] && 
+			     !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+				c->x86_vendor = i;
+				if (!early)
+					this_cpu = cpu_devs[i];
+				return;
+			}
+		}
+	}
+	if (!printed) {
+		printed++;
+		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+		printk(KERN_ERR "CPU: Your system may be unstable.\n");
+	}
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+	this_cpu = &default_cpu;
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+	/* Tell all the other CPU's to not use it... */
+	disable_x86_fxsr = 1;
+
+	/*
+	 * ... and clear the bits early in the boot_cpu_data
+	 * so that the bootup process doesn't try to do this
+	 * either.
+	 */
+	clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
+	clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
+	return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+static int __init x86_sep_setup(char * s)
+{
+	disable_x86_sep = 1;
+	return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+	u32 f1, f2;
+
+	asm("pushfl\n\t"
+	    "pushfl\n\t"
+	    "popl %0\n\t"
+	    "movl %0,%1\n\t"
+	    "xorl %2,%0\n\t"
+	    "pushl %0\n\t"
+	    "popfl\n\t"
+	    "pushfl\n\t"
+	    "popl %0\n\t"
+	    "popfl\n\t"
+	    : "=&r" (f1), "=&r" (f2)
+	    : "ir" (flag));
+
+	return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __cpuinit have_cpuid_p(void)
+{
+	return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+void __init cpu_detect(struct cpuinfo_x86 *c)
+{
+	/* Get vendor name */
+	cpuid(0x00000000, &c->cpuid_level,
+	      (int *)&c->x86_vendor_id[0],
+	      (int *)&c->x86_vendor_id[8],
+	      (int *)&c->x86_vendor_id[4]);
+
+	c->x86 = 4;
+	if (c->cpuid_level >= 0x00000001) {
+		u32 junk, tfms, cap0, misc;
+		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+		c->x86 = (tfms >> 8) & 15;
+		c->x86_model = (tfms >> 4) & 15;
+		if (c->x86 == 0xf)
+			c->x86 += (tfms >> 20) & 0xff;
+		if (c->x86 >= 0x6)
+			c->x86_model += ((tfms >> 16) & 0xF) << 4;
+		c->x86_mask = tfms & 15;
+		if (cap0 & (1<<19))
+			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+	}
+}
+
+/* Do minimum CPU detection early.
+   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+   The others are not touched to avoid unwanted side effects.
+
+   WARNING: this function is only called on the BP.  Don't add code here
+   that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	c->x86_cache_alignment = 32;
+
+	if (!have_cpuid_p())
+		return;
+
+	cpu_detect(c);
+
+	get_cpu_vendor(c, 1);
+}
+
+static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+{
+	u32 tfms, xlvl;
+	int ebx;
+
+	if (have_cpuid_p()) {
+		/* Get vendor name */
+		cpuid(0x00000000, &c->cpuid_level,
+		      (int *)&c->x86_vendor_id[0],
+		      (int *)&c->x86_vendor_id[8],
+		      (int *)&c->x86_vendor_id[4]);
+		
+		get_cpu_vendor(c, 0);
+		/* Initialize the standard set of capabilities */
+		/* Note that the vendor-specific code below might override */
+	
+		/* Intel-defined flags: level 0x00000001 */
+		if ( c->cpuid_level >= 0x00000001 ) {
+			u32 capability, excap;
+			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+			c->x86_capability[0] = capability;
+			c->x86_capability[4] = excap;
+			c->x86 = (tfms >> 8) & 15;
+			c->x86_model = (tfms >> 4) & 15;
+			if (c->x86 == 0xf)
+				c->x86 += (tfms >> 20) & 0xff;
+			if (c->x86 >= 0x6)
+				c->x86_model += ((tfms >> 16) & 0xF) << 4;
+			c->x86_mask = tfms & 15;
+#ifdef CONFIG_X86_HT
+			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+			c->apicid = (ebx >> 24) & 0xFF;
+#endif
+			if (c->x86_capability[0] & (1<<19))
+				c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
+		} else {
+			/* Have CPUID level 0 only - unheard of */
+			c->x86 = 4;
+		}
+
+		/* AMD-defined flags: level 0x80000001 */
+		xlvl = cpuid_eax(0x80000000);
+		if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+			if ( xlvl >= 0x80000001 ) {
+				c->x86_capability[1] = cpuid_edx(0x80000001);
+				c->x86_capability[6] = cpuid_ecx(0x80000001);
+			}
+			if ( xlvl >= 0x80000004 )
+				get_model_name(c); /* Default name */
+		}
+
+		init_scattered_cpuid_features(c);
+	}
+
+	early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
+}
+
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+		/* Disable processor serial number */
+		unsigned long lo,hi;
+		rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		lo |= 0x200000;
+		wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		printk(KERN_NOTICE "CPU serial number disabled.\n");
+		clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+		/* Disabling the serial number may affect the cpuid level */
+		c->cpuid_level = cpuid_eax(0);
+	}
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+	disable_x86_serial_nr = 0;
+	return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	c->loops_per_jiffy = loops_per_jiffy;
+	c->x86_cache_size = -1;
+	c->x86_vendor = X86_VENDOR_UNKNOWN;
+	c->cpuid_level = -1;	/* CPUID not detected */
+	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
+	c->x86_vendor_id[0] = '\0'; /* Unset */
+	c->x86_model_id[0] = '\0';  /* Unset */
+	c->x86_max_cores = 1;
+	c->x86_clflush_size = 32;
+	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+	if (!have_cpuid_p()) {
+		/* First of all, decide if this is a 486 or higher */
+		/* It's a 486 if we can modify the AC flag */
+		if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+			c->x86 = 4;
+		else
+			c->x86 = 3;
+	}
+
+	generic_identify(c);
+
+	printk(KERN_DEBUG "CPU: After generic identify, caps:");
+	for (i = 0; i < NCAPINTS; i++)
+		printk(" %08lx", c->x86_capability[i]);
+	printk("\n");
+
+	if (this_cpu->c_identify) {
+		this_cpu->c_identify(c);
+
+		printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+		for (i = 0; i < NCAPINTS; i++)
+			printk(" %08lx", c->x86_capability[i]);
+		printk("\n");
+	}
+
+	/*
+	 * Vendor-specific initialization.  In this section we
+	 * canonicalize the feature flags, meaning if there are
+	 * features a certain CPU supports which CPUID doesn't
+	 * tell us, CPUID claiming incorrect flags, or other bugs,
+	 * we handle them here.
+	 *
+	 * At the end of this section, c->x86_capability better
+	 * indicate the features this CPU genuinely supports!
+	 */
+	if (this_cpu->c_init)
+		this_cpu->c_init(c);
+
+	/* Disable the PN if appropriate */
+	squash_the_stupid_serial_number(c);
+
+	/*
+	 * The vendor-specific functions might have changed features.  Now
+	 * we do "generic changes."
+	 */
+
+	/* TSC disabled? */
+	if ( tsc_disable )
+		clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+	/* FXSR disabled? */
+	if (disable_x86_fxsr) {
+		clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+		clear_bit(X86_FEATURE_XMM, c->x86_capability);
+	}
+
+	/* SEP disabled? */
+	if (disable_x86_sep)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+	if (disable_pse)
+		clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+	/* If the model name is still unset, do table lookup. */
+	if ( !c->x86_model_id[0] ) {
+		char *p;
+		p = table_lookup_model(c);
+		if ( p )
+			strcpy(c->x86_model_id, p);
+		else
+			/* Last resort... */
+			sprintf(c->x86_model_id, "%02x/%02x",
+				c->x86, c->x86_model);
+	}
+
+	/* Now the feature flags better reflect actual CPU features! */
+
+	printk(KERN_DEBUG "CPU: After all inits, caps:");
+	for (i = 0; i < NCAPINTS; i++)
+		printk(" %08lx", c->x86_capability[i]);
+	printk("\n");
+
+	/*
+	 * On SMP, boot_cpu_data holds the common feature set between
+	 * all CPUs; so make sure that we indicate which features are
+	 * common between the CPUs.  The first time this routine gets
+	 * executed, c == &boot_cpu_data.
+	 */
+	if ( c != &boot_cpu_data ) {
+		/* AND the already accumulated flags with these */
+		for ( i = 0 ; i < NCAPINTS ; i++ )
+			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+	}
+
+	/* Init Machine Check Exception if available. */
+	mcheck_init(c);
+}
+
+void __init identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+	sysenter_setup();
+	enable_sep_cpu();
+	mtrr_bp_init();
+}
+
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	enable_sep_cpu();
+	mtrr_ap_init();
+}
+
+#ifdef CONFIG_X86_HT
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+	u32 	eax, ebx, ecx, edx;
+	int 	index_msb, core_bits;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+		return;
+
+	smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+	if (smp_num_siblings == 1) {
+		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+	} else if (smp_num_siblings > 1 ) {
+
+		if (smp_num_siblings > NR_CPUS) {
+			printk(KERN_WARNING "CPU: Unsupported number of the "
+					"siblings %d", smp_num_siblings);
+			smp_num_siblings = 1;
+			return;
+		}
+
+		index_msb = get_count_order(smp_num_siblings);
+		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       c->phys_proc_id);
+
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+		index_msb = get_count_order(smp_num_siblings) ;
+
+		core_bits = get_count_order(c->x86_max_cores);
+
+		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+					       ((1 << core_bits) - 1);
+
+		if (c->x86_max_cores > 1)
+			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+			       c->cpu_core_id);
+	}
+}
+#endif
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+	char *vendor = NULL;
+
+	if (c->x86_vendor < X86_VENDOR_NUM)
+		vendor = this_cpu->c_vendor;
+	else if (c->cpuid_level >= 0)
+		vendor = c->x86_vendor_id;
+
+	if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+		printk("%s ", vendor);
+
+	if (!c->x86_model_id[0])
+		printk("%d86", c->x86);
+	else
+		printk("%s", c->x86_model_id);
+
+	if (c->x86_mask || c->cpuid_level >= 0) 
+		printk(" stepping %02x\n", c->x86_mask);
+	else
+		printk("\n");
+}
+
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int nexgen_init_cpu(void);
+extern int umc_init_cpu(void);
+
+void __init early_cpu_init(void)
+{
+	intel_cpu_init();
+	cyrix_init_cpu();
+	nsc_init_cpu();
+	amd_init_cpu();
+	centaur_init_cpu();
+	transmeta_init_cpu();
+	nexgen_init_cpu();
+	umc_init_cpu();
+	early_cpu_detect();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/* pse is not compatible with on-the-fly unmapping,
+	 * disable it even if the cpus claim to support it.
+	 */
+	clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+	disable_pse = 1;
+#endif
+}
+
+/* Make sure %fs is initialized properly in idle threads */
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+	regs->xfs = __KERNEL_PERCPU;
+	return regs;
+}
+
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
+{
+	struct Xgt_desc_struct gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+	int cpu = smp_processor_id();
+	struct task_struct *curr = current;
+	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct thread_struct *thread = &curr->thread;
+
+	if (cpu_test_and_set(cpu, cpu_initialized)) {
+		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+		for (;;) local_irq_enable();
+	}
+
+	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+	if (tsc_disable && cpu_has_tsc) {
+		printk(KERN_NOTICE "Disabling TSC...\n");
+		/**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+		set_in_cr4(X86_CR4_TSD);
+	}
+
+	load_idt(&idt_descr);
+	switch_to_new_gdt();
+
+	/*
+	 * Set up and load the per-CPU TSS and LDT
+	 */
+	atomic_inc(&init_mm.mm_count);
+	curr->active_mm = &init_mm;
+	if (curr->mm)
+		BUG();
+	enter_lazy_tlb(&init_mm, curr);
+
+	load_esp0(t, thread);
+	set_tss_desc(cpu,t);
+	load_TR_desc();
+	load_LDT(&init_mm.context);
+
+#ifdef CONFIG_DOUBLEFAULT
+	/* Set up doublefault TSS pointer in the GDT */
+	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+
+	/* Clear %gs. */
+	asm volatile ("mov %0, %%gs" : : "r" (0));
+
+	/* Clear all 6 debug registers: */
+	set_debugreg(0, 0);
+	set_debugreg(0, 1);
+	set_debugreg(0, 2);
+	set_debugreg(0, 3);
+	set_debugreg(0, 6);
+	set_debugreg(0, 7);
+
+	/*
+	 * Force FPU initialization:
+	 */
+	current_thread_info()->status = 0;
+	clear_used_math();
+	mxcsr_feature_mask_init();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __cpuinit cpu_uninit(void)
+{
+	int cpu = raw_smp_processor_id();
+	cpu_clear(cpu, cpu_initialized);
+
+	/* lazy TLB state */
+	per_cpu(cpu_tlbstate, cpu).state = 0;
+	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
new file mode 100644
index 00000000000..2f6432cef6f
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -0,0 +1,28 @@
+
+struct cpu_model_info {
+	int vendor;
+	int family;
+	char *model_names[16];
+};
+
+/* attempt to consolidate cpu attributes */
+struct cpu_dev {
+	char	* c_vendor;
+
+	/* some have two possibilities for cpuid string */
+	char	* c_ident[2];	
+
+	struct		cpu_model_info c_models[4];
+
+	void		(*c_init)(struct cpuinfo_x86 * c);
+	void		(*c_identify)(struct cpuinfo_x86 * c);
+	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
+};
+
+extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
+
+extern int get_model_name(struct cpuinfo_x86 *c);
+extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void early_intel_workaround(struct cpuinfo_x86 *c);
+
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
new file mode 100644
index 00000000000..d8c6f132dc7
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -0,0 +1,250 @@
+#
+# CPU Frequency scaling
+#
+
+menu "CPU Frequency scaling"
+
+source "drivers/cpufreq/Kconfig"
+
+if CPU_FREQ
+
+comment "CPUFreq processor drivers"
+
+config X86_ACPI_CPUFREQ
+	tristate "ACPI Processor P-States driver"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	  This driver adds a CPUFreq driver which utilizes the ACPI
+	  Processor Performance States.
+	  This driver also supports Intel Enhanced Speedstep.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config ELAN_CPUFREQ
+	tristate "AMD Elan SC400 and SC410"
+	select CPU_FREQ_TABLE
+	depends on X86_ELAN
+	---help---
+	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
+	  processors.
+
+	  You need to specify the processor maximum speed as boot
+	  parameter: elanfreq=maxspeed (in kHz) or as module
+	  parameter "max_freq".
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config SC520_CPUFREQ
+	tristate "AMD Elan SC520"
+	select CPU_FREQ_TABLE
+	depends on X86_ELAN
+	---help---
+	  This adds the CPUFreq driver for AMD Elan SC520 processor.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+
+config X86_POWERNOW_K6
+	tristate "AMD Mobile K6-2/K6-3 PowerNow!"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
+	  AMD K6-3+ processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7
+	tristate "AMD Mobile Athlon/Duron PowerNow!"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for mobile AMD K7 mobile processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7_ACPI
+	bool
+	depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
+	depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
+	default y
+
+config X86_POWERNOW_K8
+	tristate "AMD Opteron/Athlon64 PowerNow!"
+	select CPU_FREQ_TABLE
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K8_ACPI
+	bool "ACPI Support"
+	select ACPI_PROCESSOR
+	depends on ACPI && X86_POWERNOW_K8
+	default y
+	help
+	  This provides access to the K8s Processor Performance States via ACPI.
+	  This driver is probably required for CPUFreq to work with multi-socket and
+	  SMP systems.  It is not required on at least some single-socket yet
+	  multi-core systems, even if SMP is enabled.
+
+	  It is safe to say Y here.
+
+config X86_GX_SUSPMOD
+	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
+	depends on PCI
+	help
+	 This add the CPUFreq driver for NatSemi Geode processors which
+	 support suspend modulation.
+
+	 For details, take a look at <file:Documentation/cpu-freq/>.
+
+	 If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO
+	tristate "Intel Enhanced SpeedStep"
+	select CPU_FREQ_TABLE
+	select X86_SPEEDSTEP_CENTRINO_TABLE
+	help
+	  This adds the CPUFreq driver for Enhanced SpeedStep enabled
+	  mobile CPUs.  This means Intel Pentium M (Centrino) CPUs. However,
+	  you also need to say Y to "Use ACPI tables to decode..." below
+	  [which might imply enabling ACPI] if you want to use this driver
+	  on non-Banias CPUs.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO_TABLE
+	bool "Built-in tables for Banias CPUs"
+	depends on X86_SPEEDSTEP_CENTRINO
+	default y
+	help
+	  Use built-in tables for Banias CPUs if ACPI encoding
+	  is not available.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_ICH
+	tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
+	  mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
+	  ICH3 or ICH4 southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_SMI
+	tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
+	select CPU_FREQ_TABLE
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin)
+	  on systems which have an Intel 440BX/ZX/MX southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_P4_CLOCKMOD
+	tristate "Intel Pentium 4 clock modulation"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for Intel Pentium 4 / XEON
+	  processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_CPUFREQ_NFORCE2
+	tristate "nVidia nForce2 FSB changing"
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for FSB changing on nVidia nForce2
+	  platforms.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGRUN
+	tristate "Transmeta LongRun"
+	help
+	  This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
+	  which support LongRun.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGHAUL
+	tristate "VIA Cyrix III Longhaul"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	  This adds the CPUFreq driver for VIA Samuel/CyrixIII,
+	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
+	  processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_E_POWERSAVER
+	tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+	select CPU_FREQ_TABLE
+	depends on EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for VIA C7 processors.
+
+	  If in doubt, say N.
+
+comment "shared options"
+
+config X86_ACPI_CPUFREQ_PROC_INTF
+	bool "/proc/acpi/processor/../performance interface (deprecated)"
+	depends on PROC_FS
+	depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
+	help
+	  This enables the deprecated /proc/acpi/processor/../performance
+	  interface. While it is helpful for debugging, the generic,
+	  cross-architecture cpufreq interfaces should be used.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_LIB
+	tristate
+	default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD
+
+config X86_SPEEDSTEP_RELAXED_CAP_CHECK
+	bool "Relaxed speedstep capability checks"
+	depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
+	help
+	  Don't perform all checks for a speedstep capable system which would
+	  normally be done. Some ancient or strange systems, though speedstep
+	  capable, don't always indicate that they are speedstep capable. This
+	  option lets the probing code bypass some of those checks if the
+	  parameter "relaxed_check=1" is passed to the module.
+
+endif	# CPU_FREQ
+
+endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
new file mode 100644
index 00000000000..560f7760dae
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -0,0 +1,16 @@
+obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
+obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
+obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
+obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
+obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
+obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
+obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
+obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
+obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
+obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
+obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
+obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
+obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
+obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
new file mode 100644
index 00000000000..ffd01e5dcb5
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,798 @@
+/*
+ * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/dmi.h>
+
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/delay.h>
+#include <asm/uaccess.h>
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+
+MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+enum {
+	UNDEFINED_CAPABLE = 0,
+	SYSTEM_INTEL_MSR_CAPABLE,
+	SYSTEM_IO_CAPABLE,
+};
+
+#define INTEL_MSR_RANGE		(0xffff)
+#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
+
+struct acpi_cpufreq_data {
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int max_freq;
+	unsigned int resume;
+	unsigned int cpu_feature;
+};
+
+static struct acpi_cpufreq_data *drv_data[NR_CPUS];
+/* acpi_perf_data is a pointer to percpu data. */
+static struct acpi_processor_performance *acpi_perf_data;
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+static unsigned int acpi_pstate_strict;
+
+static int check_est_cpu(unsigned int cpuid)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
+
+	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
+	    !cpu_has(cpu, X86_FEATURE_EST))
+		return 0;
+
+	return 1;
+}
+
+static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+{
+	struct acpi_processor_performance *perf;
+	int i;
+
+	perf = data->acpi_data;
+
+	for (i=0; i<perf->state_count; i++) {
+		if (value == perf->states[i].status)
+			return data->freq_table[i].frequency;
+	}
+	return 0;
+}
+
+static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+{
+	int i;
+	struct acpi_processor_performance *perf;
+
+	msr &= INTEL_MSR_RANGE;
+	perf = data->acpi_data;
+
+	for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (msr == perf->states[data->freq_table[i].index].status)
+			return data->freq_table[i].frequency;
+	}
+	return data->freq_table[0].frequency;
+}
+
+static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+{
+	switch (data->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		return extract_msr(val, data);
+	case SYSTEM_IO_CAPABLE:
+		return extract_io(val, data);
+	default:
+		return 0;
+	}
+}
+
+struct msr_addr {
+	u32 reg;
+};
+
+struct io_addr {
+	u16 port;
+	u8 bit_width;
+};
+
+typedef union {
+	struct msr_addr msr;
+	struct io_addr io;
+} drv_addr_union;
+
+struct drv_cmd {
+	unsigned int type;
+	cpumask_t mask;
+	drv_addr_union addr;
+	u32 val;
+};
+
+static void do_drv_read(struct drv_cmd *cmd)
+{
+	u32 h;
+
+	switch (cmd->type) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		rdmsr(cmd->addr.msr.reg, cmd->val, h);
+		break;
+	case SYSTEM_IO_CAPABLE:
+		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
+				&cmd->val,
+				(u32)cmd->addr.io.bit_width);
+		break;
+	default:
+		break;
+	}
+}
+
+static void do_drv_write(struct drv_cmd *cmd)
+{
+	u32 lo, hi;
+
+	switch (cmd->type) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		rdmsr(cmd->addr.msr.reg, lo, hi);
+		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
+		wrmsr(cmd->addr.msr.reg, lo, hi);
+		break;
+	case SYSTEM_IO_CAPABLE:
+		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
+				cmd->val,
+				(u32)cmd->addr.io.bit_width);
+		break;
+	default:
+		break;
+	}
+}
+
+static void drv_read(struct drv_cmd *cmd)
+{
+	cpumask_t saved_mask = current->cpus_allowed;
+	cmd->val = 0;
+
+	set_cpus_allowed(current, cmd->mask);
+	do_drv_read(cmd);
+	set_cpus_allowed(current, saved_mask);
+}
+
+static void drv_write(struct drv_cmd *cmd)
+{
+	cpumask_t saved_mask = current->cpus_allowed;
+	unsigned int i;
+
+	for_each_cpu_mask(i, cmd->mask) {
+		set_cpus_allowed(current, cpumask_of_cpu(i));
+		do_drv_write(cmd);
+	}
+
+	set_cpus_allowed(current, saved_mask);
+	return;
+}
+
+static u32 get_cur_val(cpumask_t mask)
+{
+	struct acpi_processor_performance *perf;
+	struct drv_cmd cmd;
+
+	if (unlikely(cpus_empty(mask)))
+		return 0;
+
+	switch (drv_data[first_cpu(mask)]->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
+		break;
+	case SYSTEM_IO_CAPABLE:
+		cmd.type = SYSTEM_IO_CAPABLE;
+		perf = drv_data[first_cpu(mask)]->acpi_data;
+		cmd.addr.io.port = perf->control_register.address;
+		cmd.addr.io.bit_width = perf->control_register.bit_width;
+		break;
+	default:
+		return 0;
+	}
+
+	cmd.mask = mask;
+
+	drv_read(&cmd);
+
+	dprintk("get_cur_val = %u\n", cmd.val);
+
+	return cmd.val;
+}
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+static unsigned int get_measured_perf(unsigned int cpu)
+{
+	union {
+		struct {
+			u32 lo;
+			u32 hi;
+		} split;
+		u64 whole;
+	} aperf_cur, mperf_cur;
+
+	cpumask_t saved_mask;
+	unsigned int perf_percent;
+	unsigned int retval;
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (get_cpu() != cpu) {
+		/* We were not able to run on requested processor */
+		put_cpu();
+		return 0;
+	}
+
+	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
+	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+
+	wrmsr(MSR_IA32_APERF, 0,0);
+	wrmsr(MSR_IA32_MPERF, 0,0);
+
+#ifdef __i386__
+	/*
+	 * We dont want to do 64 bit divide with 32 bit kernel
+	 * Get an approximate value. Return failure in case we cannot get
+	 * an approximate value.
+	 */
+	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
+		int shift_count;
+		u32 h;
+
+		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
+		shift_count = fls(h);
+
+		aperf_cur.whole >>= shift_count;
+		mperf_cur.whole >>= shift_count;
+	}
+
+	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
+		int shift_count = 7;
+		aperf_cur.split.lo >>= shift_count;
+		mperf_cur.split.lo >>= shift_count;
+	}
+
+	if (aperf_cur.split.lo && mperf_cur.split.lo)
+		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
+	else
+		perf_percent = 0;
+
+#else
+	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+		int shift_count = 7;
+		aperf_cur.whole >>= shift_count;
+		mperf_cur.whole >>= shift_count;
+	}
+
+	if (aperf_cur.whole && mperf_cur.whole)
+		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
+	else
+		perf_percent = 0;
+
+#endif
+
+	retval = drv_data[cpu]->max_freq * perf_percent / 100;
+
+	put_cpu();
+	set_cpus_allowed(current, saved_mask);
+
+	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
+	return retval;
+}
+
+static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
+{
+	struct acpi_cpufreq_data *data = drv_data[cpu];
+	unsigned int freq;
+
+	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
+
+	if (unlikely(data == NULL ||
+		     data->acpi_data == NULL || data->freq_table == NULL)) {
+		return 0;
+	}
+
+	freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
+	dprintk("cur freq = %u\n", freq);
+
+	return freq;
+}
+
+static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
+				struct acpi_cpufreq_data *data)
+{
+	unsigned int cur_freq;
+	unsigned int i;
+
+	for (i=0; i<100; i++) {
+		cur_freq = extract_freq(get_cur_val(mask), data);
+		if (cur_freq == freq)
+			return 1;
+		udelay(10);
+	}
+	return 0;
+}
+
+static int acpi_cpufreq_target(struct cpufreq_policy *policy,
+			       unsigned int target_freq, unsigned int relation)
+{
+	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+	struct acpi_processor_performance *perf;
+	struct cpufreq_freqs freqs;
+	cpumask_t online_policy_cpus;
+	struct drv_cmd cmd;
+	unsigned int next_state = 0; /* Index into freq_table */
+	unsigned int next_perf_state = 0; /* Index into perf table */
+	unsigned int i;
+	int result = 0;
+
+	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+
+	if (unlikely(data == NULL ||
+	     data->acpi_data == NULL || data->freq_table == NULL)) {
+		return -ENODEV;
+	}
+
+	perf = data->acpi_data;
+	result = cpufreq_frequency_table_target(policy,
+						data->freq_table,
+						target_freq,
+						relation, &next_state);
+	if (unlikely(result))
+		return -ENODEV;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/* cpufreq holds the hotplug lock, so we are safe from here on */
+	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+#else
+	online_policy_cpus = policy->cpus;
+#endif
+
+	next_perf_state = data->freq_table[next_state].index;
+	if (perf->state == next_perf_state) {
+		if (unlikely(data->resume)) {
+			dprintk("Called after resume, resetting to P%d\n",
+				next_perf_state);
+			data->resume = 0;
+		} else {
+			dprintk("Already at target state (P%d)\n",
+				next_perf_state);
+			return 0;
+		}
+	}
+
+	switch (data->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
+	case SYSTEM_IO_CAPABLE:
+		cmd.type = SYSTEM_IO_CAPABLE;
+		cmd.addr.io.port = perf->control_register.address;
+		cmd.addr.io.bit_width = perf->control_register.bit_width;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	cpus_clear(cmd.mask);
+
+	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
+		cmd.mask = online_policy_cpus;
+	else
+		cpu_set(policy->cpu, cmd.mask);
+
+	freqs.old = perf->states[perf->state].core_frequency * 1000;
+	freqs.new = data->freq_table[next_state].frequency;
+	for_each_cpu_mask(i, cmd.mask) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	drv_write(&cmd);
+
+	if (acpi_pstate_strict) {
+		if (!check_freqs(cmd.mask, freqs.new, data)) {
+			dprintk("acpi_cpufreq_target failed (%d)\n",
+				policy->cpu);
+			return -EAGAIN;
+		}
+	}
+
+	for_each_cpu_mask(i, cmd.mask) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	perf->state = next_perf_state;
+
+	return result;
+}
+
+static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_verify\n");
+
+	return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
+static unsigned long
+acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
+{
+	struct acpi_processor_performance *perf = data->acpi_data;
+
+	if (cpu_khz) {
+		/* search the closest match to cpu_khz */
+		unsigned int i;
+		unsigned long freq;
+		unsigned long freqn = perf->states[0].core_frequency * 1000;
+
+		for (i=0; i<(perf->state_count-1); i++) {
+			freq = freqn;
+			freqn = perf->states[i+1].core_frequency * 1000;
+			if ((2 * cpu_khz) > (freqn + freq)) {
+				perf->state = i;
+				return freq;
+			}
+		}
+		perf->state = perf->state_count-1;
+		return freqn;
+	} else {
+		/* assume CPU is at P0... */
+		perf->state = 0;
+		return perf->states[0].core_frequency * 1000;
+	}
+}
+
+/*
+ * acpi_cpufreq_early_init - initialize ACPI P-States library
+ *
+ * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
+ * in order to determine correct frequency and voltage pairings. We can
+ * do _PDC and _PSD and find out the processor dependency for the
+ * actual init that will happen later...
+ */
+static int __init acpi_cpufreq_early_init(void)
+{
+	dprintk("acpi_cpufreq_early_init\n");
+
+	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+	if (!acpi_perf_data) {
+		dprintk("Memory allocation error for acpi_perf_data.\n");
+		return -ENOMEM;
+	}
+
+	/* Do initialization in ACPI core */
+	acpi_processor_preregister_performance(acpi_perf_data);
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int sw_any_bug_found(const struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+static const struct dmi_system_id sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+#endif
+
+static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	unsigned int valid_states = 0;
+	unsigned int cpu = policy->cpu;
+	struct acpi_cpufreq_data *data;
+	unsigned int result = 0;
+	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	struct acpi_processor_performance *perf;
+
+	dprintk("acpi_cpufreq_cpu_init\n");
+
+	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+	drv_data[cpu] = data;
+
+	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
+		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	result = acpi_processor_register_performance(data->acpi_data, cpu);
+	if (result)
+		goto err_free;
+
+	perf = data->acpi_data;
+	policy->shared_type = perf->shared_type;
+
+	/*
+	 * Will let policy->cpus know about dependency only when software
+	 * coordination is required.
+	 */
+	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+		policy->cpus = perf->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
+
+	/* capability check */
+	if (perf->state_count <= 1) {
+		dprintk("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	if (perf->control_register.space_id != perf->status_register.space_id) {
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	switch (perf->control_register.space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		dprintk("SYSTEM IO addr space\n");
+		data->cpu_feature = SYSTEM_IO_CAPABLE;
+		break;
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		dprintk("HARDWARE addr space\n");
+		if (!check_est_cpu(cpu)) {
+			result = -ENODEV;
+			goto err_unreg;
+		}
+		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+		break;
+	default:
+		dprintk("Unknown addr space %d\n",
+			(u32) (perf->control_register.space_id));
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
+		    (perf->state_count+1), GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i=0; i<perf->state_count; i++) {
+		if ((perf->states[i].transition_latency * 1000) >
+		    policy->cpuinfo.transition_latency)
+			policy->cpuinfo.transition_latency =
+			    perf->states[i].transition_latency * 1000;
+	}
+
+	data->max_freq = perf->states[0].core_frequency * 1000;
+	/* table init */
+	for (i=0; i<perf->state_count; i++) {
+		if (i>0 && perf->states[i].core_frequency >=
+		    data->freq_table[valid_states-1].frequency / 1000)
+			continue;
+
+		data->freq_table[valid_states].index = i;
+		data->freq_table[valid_states].frequency =
+		    perf->states[i].core_frequency * 1000;
+		valid_states++;
+	}
+	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+	perf->state = 0;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+	if (result)
+		goto err_freqfree;
+
+	switch (perf->control_register.space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		/* Current speed is unknown and not detectable by IO port */
+		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
+		break;
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
+		policy->cur = get_cur_freq_on_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	/* Check for APERF/MPERF support in hardware */
+	if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
+		unsigned int ecx;
+		ecx = cpuid_ecx(6);
+		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
+			acpi_cpufreq_driver.getavg = get_measured_perf;
+	}
+
+	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
+	for (i = 0; i < perf->state_count; i++)
+		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+			(i == perf->state ? '*' : ' '), i,
+			(u32) perf->states[i].core_frequency,
+			(u32) perf->states[i].power,
+			(u32) perf->states[i].transition_latency);
+
+	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+	/*
+	 * the first call to ->target() should result in us actually
+	 * writing something to the appropriate registers.
+	 */
+	data->resume = 1;
+
+	return result;
+
+err_freqfree:
+	kfree(data->freq_table);
+err_unreg:
+	acpi_processor_unregister_performance(perf, cpu);
+err_free:
+	kfree(data);
+	drv_data[cpu] = NULL;
+
+	return result;
+}
+
+static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		cpufreq_frequency_table_put_attr(policy->cpu);
+		drv_data[policy->cpu] = NULL;
+		acpi_processor_unregister_performance(data->acpi_data,
+						      policy->cpu);
+		kfree(data);
+	}
+
+	return 0;
+}
+
+static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = drv_data[policy->cpu];
+
+	dprintk("acpi_cpufreq_resume\n");
+
+	data->resume = 1;
+
+	return 0;
+}
+
+static struct freq_attr *acpi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+	.verify = acpi_cpufreq_verify,
+	.target = acpi_cpufreq_target,
+	.init = acpi_cpufreq_cpu_init,
+	.exit = acpi_cpufreq_cpu_exit,
+	.resume = acpi_cpufreq_resume,
+	.name = "acpi-cpufreq",
+	.owner = THIS_MODULE,
+	.attr = acpi_cpufreq_attr,
+};
+
+static int __init acpi_cpufreq_init(void)
+{
+	int ret;
+
+	dprintk("acpi_cpufreq_init\n");
+
+	ret = acpi_cpufreq_early_init();
+	if (ret)
+		return ret;
+
+	return cpufreq_register_driver(&acpi_cpufreq_driver);
+}
+
+static void __exit acpi_cpufreq_exit(void)
+{
+	dprintk("acpi_cpufreq_exit\n");
+
+	cpufreq_unregister_driver(&acpi_cpufreq_driver);
+
+	free_percpu(acpi_perf_data);
+
+	return;
+}
+
+module_param(acpi_pstate_strict, uint, 0644);
+MODULE_PARM_DESC(acpi_pstate_strict,
+	"value 0 or non-zero. non-zero -> strict ACPI checks are "
+	"performed during frequency changes.");
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
+MODULE_ALIAS("acpi");
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
new file mode 100644
index 00000000000..32f0bda3fc9
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -0,0 +1,440 @@
+/*
+ * (C) 2004-2006  Sebastian Witt <se.witt@gmx.net>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#define NFORCE2_XTAL 25
+#define NFORCE2_BOOTFSB 0x48
+#define NFORCE2_PLLENABLE 0xa8
+#define NFORCE2_PLLREG 0xa4
+#define NFORCE2_PLLADR 0xa0
+#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
+
+#define NFORCE2_MIN_FSB 50
+#define NFORCE2_SAFE_DISTANCE 50
+
+/* Delay in ms between FSB changes */
+//#define NFORCE2_DELAY 10
+
+/* nforce2_chipset:
+ * FSB is changed using the chipset
+ */
+static struct pci_dev *nforce2_chipset_dev;
+
+/* fid:
+ * multiplier * 10
+ */
+static int fid = 0;
+
+/* min_fsb, max_fsb:
+ * minimum and maximum FSB (= FSB at boot time)
+ */
+static int min_fsb = 0;
+static int max_fsb = 0;
+
+MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
+MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
+MODULE_LICENSE("GPL");
+
+module_param(fid, int, 0444);
+module_param(min_fsb, int, 0444);
+
+MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
+MODULE_PARM_DESC(min_fsb,
+                 "Minimum FSB to use, if not defined: current FSB - 50");
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
+
+/**
+ * nforce2_calc_fsb - calculate FSB
+ * @pll: PLL value
+ *
+ *   Calculates FSB from PLL value
+ */
+static int nforce2_calc_fsb(int pll)
+{
+	unsigned char mul, div;
+
+	mul = (pll >> 8) & 0xff;
+	div = pll & 0xff;
+
+	if (div > 0)
+		return NFORCE2_XTAL * mul / div;
+
+	return 0;
+}
+
+/**
+ * nforce2_calc_pll - calculate PLL value
+ * @fsb: FSB
+ *
+ *   Calculate PLL value for given FSB
+ */
+static int nforce2_calc_pll(unsigned int fsb)
+{
+	unsigned char xmul, xdiv;
+	unsigned char mul = 0, div = 0;
+	int tried = 0;
+
+	/* Try to calculate multiplier and divider up to 4 times */
+	while (((mul == 0) || (div == 0)) && (tried <= 3)) {
+		for (xdiv = 2; xdiv <= 0x80; xdiv++)
+			for (xmul = 1; xmul <= 0xfe; xmul++)
+				if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
+				    fsb + tried) {
+					mul = xmul;
+					div = xdiv;
+				}
+		tried++;
+	}
+
+	if ((mul == 0) || (div == 0))
+		return -1;
+
+	return NFORCE2_PLL(mul, div);
+}
+
+/**
+ * nforce2_write_pll - write PLL value to chipset
+ * @pll: PLL value
+ *
+ *   Writes new FSB PLL value to chipset
+ */
+static void nforce2_write_pll(int pll)
+{
+	int temp;
+
+	/* Set the pll addr. to 0x00 */
+	pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0);
+
+	/* Now write the value in all 64 registers */
+	for (temp = 0; temp <= 0x3f; temp++)
+		pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll);
+
+	return;
+}
+
+/**
+ * nforce2_fsb_read - Read FSB
+ *
+ *   Read FSB from chipset
+ *   If bootfsb != 0, return FSB at boot-time
+ */
+static unsigned int nforce2_fsb_read(int bootfsb)
+{
+	struct pci_dev *nforce2_sub5;
+	u32 fsb, temp = 0;
+
+	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
+	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+						0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL);
+	if (!nforce2_sub5)
+		return 0;
+
+	pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
+	fsb /= 1000000;
+
+	/* Check if PLL register is already set */
+	pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+
+	if(bootfsb || !temp)
+		return fsb;
+		
+	/* Use PLL register FSB value */
+	pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp);
+	fsb = nforce2_calc_fsb(temp);
+
+	return fsb;
+}
+
+/**
+ * nforce2_set_fsb - set new FSB
+ * @fsb: New FSB
+ *
+ *   Sets new FSB
+ */
+static int nforce2_set_fsb(unsigned int fsb)
+{
+	u32 temp = 0;
+	unsigned int tfsb;
+	int diff;
+	int pll = 0;
+
+	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
+		printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
+		return -EINVAL;
+	}
+
+	tfsb = nforce2_fsb_read(0);
+	if (!tfsb) {
+		printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
+		return -EINVAL;
+	}
+
+	/* First write? Then set actual value */
+	pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp);
+	if (!temp) {
+		pll = nforce2_calc_pll(tfsb);
+
+		if (pll < 0)
+			return -EINVAL;
+
+		nforce2_write_pll(pll);
+	}
+
+	/* Enable write access */
+	temp = 0x01;
+	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
+
+	diff = tfsb - fsb;
+
+	if (!diff)
+		return 0;
+
+	while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
+		if (diff < 0)
+			tfsb++;
+		else
+			tfsb--;
+
+		/* Calculate the PLL reg. value */
+		if ((pll = nforce2_calc_pll(tfsb)) == -1)
+			return -EINVAL;
+
+		nforce2_write_pll(pll);
+#ifdef NFORCE2_DELAY
+		mdelay(NFORCE2_DELAY);
+#endif
+	}
+
+	temp = 0x40;
+	pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
+
+	return 0;
+}
+
+/**
+ * nforce2_get - get the CPU frequency
+ * @cpu: CPU number
+ *
+ * Returns the CPU frequency
+ */
+static unsigned int nforce2_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return nforce2_fsb_read(0) * fid * 100;
+}
+
+/**
+ * nforce2_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int nforce2_target(struct cpufreq_policy *policy,
+			  unsigned int target_freq, unsigned int relation)
+{
+//        unsigned long         flags;
+	struct cpufreq_freqs freqs;
+	unsigned int target_fsb;
+
+	if ((target_freq > policy->max) || (target_freq < policy->min))
+		return -EINVAL;
+
+	target_fsb = target_freq / (fid * 100);
+
+	freqs.old = nforce2_get(policy->cpu);
+	freqs.new = target_fsb * fid * 100;
+	freqs.cpu = 0;		/* Only one CPU on nForce2 plattforms */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
+	       freqs.old, freqs.new);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Disable IRQs */
+	//local_irq_save(flags);
+
+	if (nforce2_set_fsb(target_fsb) < 0)
+		printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
+                       target_fsb);
+	else
+		dprintk("Changed FSB successfully to %d\n",
+                       target_fsb);
+
+	/* Enable IRQs */
+	//local_irq_restore(flags);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+/**
+ * nforce2_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ */
+static int nforce2_verify(struct cpufreq_policy *policy)
+{
+	unsigned int fsb_pol_max;
+
+	fsb_pol_max = policy->max / (fid * 100);
+
+	if (policy->min < (fsb_pol_max * fid * 100))
+		policy->max = (fsb_pol_max + 1) * fid * 100;
+
+	cpufreq_verify_within_limits(policy,
+                                     policy->cpuinfo.min_freq,
+                                     policy->cpuinfo.max_freq);
+	return 0;
+}
+
+static int nforce2_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int fsb;
+	unsigned int rfid;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* Get current FSB */
+	fsb = nforce2_fsb_read(0);
+
+	if (!fsb)
+		return -EIO;
+
+	/* FIX: Get FID from CPU */
+	if (!fid) {
+		if (!cpu_khz) {
+			printk(KERN_WARNING
+			       "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
+			return -ENODEV;
+		}
+
+		fid = cpu_khz / (fsb * 100);
+		rfid = fid % 5;
+
+		if (rfid) {
+			if (rfid > 2)
+				fid += 5 - rfid;
+			else
+				fid -= rfid;
+		}
+	}
+
+	printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
+	       fid / 10, fid % 10);
+
+	/* Set maximum FSB to FSB at boot time */
+	max_fsb = nforce2_fsb_read(1);
+
+	if(!max_fsb)
+		return -EIO;
+
+	if (!min_fsb)
+		min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
+
+	if (min_fsb < NFORCE2_MIN_FSB)
+		min_fsb = NFORCE2_MIN_FSB;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = min_fsb * fid * 100;
+	policy->cpuinfo.max_freq = max_fsb * fid * 100;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = nforce2_get(policy->cpu);
+	policy->min = policy->cpuinfo.min_freq;
+	policy->max = policy->cpuinfo.max_freq;
+
+	return 0;
+}
+
+static int nforce2_cpu_exit(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static struct cpufreq_driver nforce2_driver = {
+	.name = "nforce2",
+	.verify = nforce2_verify,
+	.target = nforce2_target,
+	.get = nforce2_get,
+	.init = nforce2_cpu_init,
+	.exit = nforce2_cpu_exit,
+	.owner = THIS_MODULE,
+};
+
+/**
+ * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
+ *
+ * Detects nForce2 A2 and C1 stepping
+ *
+ */
+static unsigned int nforce2_detect_chipset(void)
+{
+	nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+					PCI_DEVICE_ID_NVIDIA_NFORCE2,
+					PCI_ANY_ID, PCI_ANY_ID, NULL);
+
+	if (nforce2_chipset_dev == NULL)
+		return -ENODEV;
+
+	printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
+	       nforce2_chipset_dev->revision);
+	printk(KERN_INFO
+	       "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
+
+	return 0;
+}
+
+/**
+ * nforce2_init - initializes the nForce2 CPUFreq driver
+ *
+ * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init nforce2_init(void)
+{
+	/* TODO: do we need to detect the processor? */
+
+	/* detect chipset */
+	if (nforce2_detect_chipset()) {
+		printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
+		return -ENODEV;
+	}
+
+	return cpufreq_register_driver(&nforce2_driver);
+}
+
+/**
+ * nforce2_exit - unregisters cpufreq module
+ *
+ *   Unregisters nForce2 FSB change support.
+ */
+static void __exit nforce2_exit(void)
+{
+	cpufreq_unregister_driver(&nforce2_driver);
+}
+
+module_init(nforce2_init);
+module_exit(nforce2_exit);
+
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
new file mode 100644
index 00000000000..c11baaf9f2b
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -0,0 +1,333 @@
+/*
+ *  Based on documentation provided by Dave Jones. Thanks!
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include <asm/msr.h>
+#include <asm/tsc.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+
+#define EPS_BRAND_C7M	0
+#define EPS_BRAND_C7	1
+#define EPS_BRAND_EDEN	2
+#define EPS_BRAND_C3	3
+
+struct eps_cpu_data {
+	u32 fsb;
+	struct cpufreq_frequency_table freq_table[];
+};
+
+static struct eps_cpu_data *eps_cpu[NR_CPUS];
+
+
+static unsigned int eps_get(unsigned int cpu)
+{
+	struct eps_cpu_data *centaur;
+	u32 lo, hi;
+
+	if (cpu)
+		return 0;
+	centaur = eps_cpu[cpu];
+	if (centaur == NULL)
+		return 0;
+
+	/* Return current frequency */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	return centaur->fsb * ((lo >> 8) & 0xff);
+}
+
+static int eps_set_state(struct eps_cpu_data *centaur,
+			 unsigned int cpu,
+			 u32 dest_state)
+{
+	struct cpufreq_freqs freqs;
+	u32 lo, hi;
+	int err = 0;
+	int i;
+
+	freqs.old = eps_get(cpu);
+	freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
+	freqs.cpu = cpu;
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Wait while CPU is busy */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	i = 0;
+	while (lo & ((1 << 16) | (1 << 17))) {
+		udelay(16);
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		i++;
+		if (unlikely(i > 64)) {
+			err = -ENODEV;
+			goto postchange;
+		}
+	}
+	/* Set new multiplier and voltage */
+	wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
+	/* Wait until transition end */
+	i = 0;
+	do {
+		udelay(16);
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		i++;
+		if (unlikely(i > 64)) {
+			err = -ENODEV;
+			goto postchange;
+		}
+	} while (lo & ((1 << 16) | (1 << 17)));
+
+	/* Return current frequency */
+postchange:
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	return err;
+}
+
+static int eps_target(struct cpufreq_policy *policy,
+			       unsigned int target_freq,
+			       unsigned int relation)
+{
+	struct eps_cpu_data *centaur;
+	unsigned int newstate = 0;
+	unsigned int cpu = policy->cpu;
+	unsigned int dest_state;
+	int ret;
+
+	if (unlikely(eps_cpu[cpu] == NULL))
+		return -ENODEV;
+	centaur = eps_cpu[cpu];
+
+	if (unlikely(cpufreq_frequency_table_target(policy,
+			&eps_cpu[cpu]->freq_table[0],
+			target_freq,
+			relation,
+			&newstate))) {
+		return -EINVAL;
+	}
+
+	/* Make frequency transition */
+	dest_state = centaur->freq_table[newstate].index & 0xffff;
+	ret = eps_set_state(centaur, cpu, dest_state);
+	if (ret)
+		printk(KERN_ERR "eps: Timeout!\n");
+	return ret;
+}
+
+static int eps_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy,
+			&eps_cpu[policy->cpu]->freq_table[0]);
+}
+
+static int eps_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	u32 lo, hi;
+	u64 val;
+	u8 current_multiplier, current_voltage;
+	u8 max_multiplier, max_voltage;
+	u8 min_multiplier, min_voltage;
+	u8 brand;
+	u32 fsb;
+	struct eps_cpu_data *centaur;
+	struct cpufreq_frequency_table *f_table;
+	int k, step, voltage;
+	int ret;
+	int states;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* Check brand */
+	printk("eps: Detected VIA ");
+	rdmsr(0x1153, lo, hi);
+	brand = (((lo >> 2) ^ lo) >> 18) & 3;
+	switch(brand) {
+	case EPS_BRAND_C7M:
+		printk("C7-M\n");
+		break;
+	case EPS_BRAND_C7:
+		printk("C7\n");
+		break;
+	case EPS_BRAND_EDEN:
+		printk("Eden\n");
+		break;
+	case EPS_BRAND_C3:
+		printk("C3\n");
+		return -ENODEV;
+		break;
+	}
+	/* Enable Enhanced PowerSaver */
+	rdmsrl(MSR_IA32_MISC_ENABLE, val);
+	if (!(val & 1 << 16)) {
+		val |= 1 << 16;
+		wrmsrl(MSR_IA32_MISC_ENABLE, val);
+		/* Can be locked at 0 */
+		rdmsrl(MSR_IA32_MISC_ENABLE, val);
+		if (!(val & 1 << 16)) {
+			printk("eps: Can't enable Enhanced PowerSaver\n");
+			return -ENODEV;
+		}
+	}
+
+	/* Print voltage and multiplier */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	current_voltage = lo & 0xff;
+	printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+	current_multiplier = (lo >> 8) & 0xff;
+	printk("eps: Current multiplier = %d\n", current_multiplier);
+
+	/* Print limits */
+	max_voltage = hi & 0xff;
+	printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+	max_multiplier = (hi >> 8) & 0xff;
+	printk("eps: Highest multiplier = %d\n", max_multiplier);
+	min_voltage = (hi >> 16) & 0xff;
+	printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+	min_multiplier = (hi >> 24) & 0xff;
+	printk("eps: Lowest multiplier = %d\n", min_multiplier);
+
+	/* Sanity checks */
+	if (current_multiplier == 0 || max_multiplier == 0
+	    || min_multiplier == 0)
+		return -EINVAL;
+	if (current_multiplier > max_multiplier
+	    || max_multiplier <= min_multiplier)
+		return -EINVAL;
+	if (current_voltage > 0x1c || max_voltage > 0x1c)
+		return -EINVAL;
+	if (max_voltage < min_voltage)
+		return -EINVAL;
+
+	/* Calc FSB speed */
+	fsb = cpu_khz / current_multiplier;
+	/* Calc number of p-states supported */
+	if (brand == EPS_BRAND_C7M)
+		states = max_multiplier - min_multiplier + 1;
+	else
+		states = 2;
+
+	/* Allocate private data and frequency table for current cpu */
+	centaur = kzalloc(sizeof(struct eps_cpu_data)
+		    + (states + 1) * sizeof(struct cpufreq_frequency_table),
+		    GFP_KERNEL);
+	if (!centaur)
+		return -ENOMEM;
+	eps_cpu[0] = centaur;
+
+	/* Copy basic values */
+	centaur->fsb = fsb;
+
+	/* Fill frequency and MSR value table */
+	f_table = &centaur->freq_table[0];
+	if (brand != EPS_BRAND_C7M) {
+		f_table[0].frequency = fsb * min_multiplier;
+		f_table[0].index = (min_multiplier << 8) | min_voltage;
+		f_table[1].frequency = fsb * max_multiplier;
+		f_table[1].index = (max_multiplier << 8) | max_voltage;
+		f_table[2].frequency = CPUFREQ_TABLE_END;
+	} else {
+		k = 0;
+		step = ((max_voltage - min_voltage) * 256)
+			/ (max_multiplier - min_multiplier);
+		for (i = min_multiplier; i <= max_multiplier; i++) {
+			voltage = (k * step) / 256 + min_voltage;
+			f_table[k].frequency = fsb * i;
+			f_table[k].index = (i << 8) | voltage;
+			k++;
+		}
+		f_table[k].frequency = CPUFREQ_TABLE_END;
+	}
+
+	policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
+	policy->cur = fsb * current_multiplier;
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
+	if (ret) {
+		kfree(centaur);
+		return ret;
+	}
+
+	cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
+	return 0;
+}
+
+static int eps_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct eps_cpu_data *centaur;
+	u32 lo, hi;
+
+	if (eps_cpu[cpu] == NULL)
+		return -ENODEV;
+	centaur = eps_cpu[cpu];
+
+	/* Get max frequency */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	/* Set max frequency */
+	eps_set_state(centaur, cpu, hi & 0xffff);
+	/* Bye */
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	kfree(eps_cpu[cpu]);
+	eps_cpu[cpu] = NULL;
+	return 0;
+}
+
+static struct freq_attr* eps_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver eps_driver = {
+	.verify		= eps_verify,
+	.target		= eps_target,
+	.init		= eps_cpu_init,
+	.exit		= eps_cpu_exit,
+	.get		= eps_get,
+	.name		= "e_powersaver",
+	.owner		= THIS_MODULE,
+	.attr		= eps_attr,
+};
+
+static int __init eps_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	/* This driver will work only on Centaur C7 processors with
+	 * Enhanced SpeedStep/PowerSaver registers */
+	if (c->x86_vendor != X86_VENDOR_CENTAUR
+	    || c->x86 != 6 || c->x86_model != 10)
+		return -ENODEV;
+	if (!cpu_has(c, X86_FEATURE_EST))
+		return -ENODEV;
+
+	if (cpufreq_register_driver(&eps_driver))
+		return -EINVAL;
+	return 0;
+}
+
+static void __exit eps_exit(void)
+{
+	cpufreq_unregister_driver(&eps_driver);
+}
+
+MODULE_AUTHOR("Rafa� Bilski <rafalbilski@interia.pl>");
+MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
+MODULE_LICENSE("GPL");
+
+module_init(eps_init);
+module_exit(eps_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
new file mode 100644
index 00000000000..1e7ae7dafcf
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -0,0 +1,308 @@
+/*
+ *	elanfreq:	cpufreq driver for the AMD ELAN family
+ *
+ *	(c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
+ *
+ *	Parts of this code are (c) Sven Geggus <sven@geggus.net>
+ *
+ *      All Rights Reserved.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+#define REG_CSCIR 0x22		/* Chip Setup and Control Index Register    */
+#define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
+
+/* Module parameter */
+static int max_freq;
+
+struct s_elan_multiplier {
+	int clock;		/* frequency in kHz                         */
+	int val40h;		/* PMU Force Mode register                  */
+	int val80h;		/* CPU Clock Speed Register                 */
+};
+
+/*
+ * It is important that the frequencies
+ * are listed in ascending order here!
+ */
+struct s_elan_multiplier elan_multiplier[] = {
+	{1000,	0x02,	0x18},
+	{2000,	0x02,	0x10},
+	{4000,	0x02,	0x08},
+	{8000,	0x00,	0x00},
+	{16000,	0x00,	0x02},
+	{33000,	0x00,	0x04},
+	{66000,	0x01,	0x04},
+	{99000,	0x01,	0x05}
+};
+
+static struct cpufreq_frequency_table elanfreq_table[] = {
+	{0,	1000},
+	{1,	2000},
+	{2,	4000},
+	{3,	8000},
+	{4,	16000},
+	{5,	33000},
+	{6,	66000},
+	{7,	99000},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+
+/**
+ *	elanfreq_get_cpu_frequency: determine current cpu speed
+ *
+ *	Finds out at which frequency the CPU of the Elan SOC runs
+ *	at the moment. Frequencies from 1 to 33 MHz are generated
+ *	the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
+ *	and have the rest of the chip running with 33 MHz.
+ */
+
+static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
+{
+	u8 clockspeed_reg;    /* Clock Speed Register */
+
+	local_irq_disable();
+	outb_p(0x80,REG_CSCIR);
+	clockspeed_reg = inb_p(REG_CSCDR);
+	local_irq_enable();
+
+	if ((clockspeed_reg & 0xE0) == 0xE0)
+		return 0;
+
+	/* Are we in CPU clock multiplied mode (66/99 MHz)? */
+	if ((clockspeed_reg & 0xE0) == 0xC0) {
+		if ((clockspeed_reg & 0x01) == 0)
+			return 66000;
+		else
+			return 99000;
+	}
+
+	/* 33 MHz is not 32 MHz... */
+	if ((clockspeed_reg & 0xE0)==0xA0)
+		return 33000;
+
+	return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
+}
+
+
+/**
+ *	elanfreq_set_cpu_frequency: Change the CPU core frequency
+ *	@cpu: cpu number
+ *	@freq: frequency in kHz
+ *
+ *	This function takes a frequency value and changes the CPU frequency
+ *	according to this. Note that the frequency has to be checked by
+ *	elanfreq_validatespeed() for correctness!
+ *
+ *	There is no return value.
+ */
+
+static void elanfreq_set_cpu_state (unsigned int state)
+{
+	struct cpufreq_freqs    freqs;
+
+	freqs.old = elanfreq_get_cpu_frequency(0);
+	freqs.new = elan_multiplier[state].clock;
+	freqs.cpu = 0; /* elanfreq.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
+			elan_multiplier[state].clock);
+
+
+	/*
+	 * Access to the Elan's internal registers is indexed via
+	 * 0x22: Chip Setup & Control Register Index Register (CSCI)
+	 * 0x23: Chip Setup & Control Register Data  Register (CSCD)
+	 *
+	 */
+
+	/*
+	 * 0x40 is the Power Management Unit's Force Mode Register.
+	 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
+	 */
+
+	local_irq_disable();
+	outb_p(0x40,REG_CSCIR);		/* Disable hyperspeed mode */
+	outb_p(0x00,REG_CSCDR);
+	local_irq_enable();		/* wait till internal pipelines and */
+	udelay(1000);			/* buffers have cleaned up          */
+
+	local_irq_disable();
+
+	/* now, set the CPU clock speed register (0x80) */
+	outb_p(0x80,REG_CSCIR);
+	outb_p(elan_multiplier[state].val80h,REG_CSCDR);
+
+	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
+	outb_p(0x40,REG_CSCIR);
+	outb_p(elan_multiplier[state].val40h,REG_CSCDR);
+	udelay(10000);
+	local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+
+/**
+ *	elanfreq_validatespeed: test if frequency range is valid
+ *	@policy: the policy to validate
+ *
+ *	This function checks if a given frequency range in kHz is valid
+ *	for the hardware supported by the driver.
+ */
+
+static int elanfreq_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
+}
+
+static int elanfreq_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	elanfreq_set_cpu_state(newstate);
+
+	return 0;
+}
+
+
+/*
+ *	Module init and exit code
+ */
+
+static int elanfreq_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	unsigned int i;
+	int result;
+
+	/* capability check */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+	    (c->x86 != 4) || (c->x86_model!=10))
+		return -ENODEV;
+
+	/* max freq */
+	if (!max_freq)
+		max_freq = elanfreq_get_cpu_frequency(0);
+
+	/* table init */
+	for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if (elanfreq_table[i].frequency > max_freq)
+			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = elanfreq_get_cpu_frequency(0);
+
+	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
+	if (result)
+		return (result);
+
+	cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
+	return 0;
+}
+
+
+static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+
+#ifndef MODULE
+/**
+ * elanfreq_setup - elanfreq command line parameter parsing
+ *
+ * elanfreq command line parameter.  Use:
+ *  elanfreq=66000
+ * to set the maximum CPU frequency to 66 MHz. Note that in
+ * case you do not give this boot parameter, the maximum
+ * frequency will fall back to _current_ CPU frequency which
+ * might be lower. If you build this as a module, use the
+ * max_freq module parameter instead.
+ */
+static int __init elanfreq_setup(char *str)
+{
+	max_freq = simple_strtoul(str, &str, 0);
+	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+	return 1;
+}
+__setup("elanfreq=", elanfreq_setup);
+#endif
+
+
+static struct freq_attr* elanfreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver elanfreq_driver = {
+	.get		= elanfreq_get_cpu_frequency,
+	.verify		= elanfreq_verify,
+	.target		= elanfreq_target,
+	.init		= elanfreq_cpu_init,
+	.exit		= elanfreq_cpu_exit,
+	.name		= "elanfreq",
+	.owner		= THIS_MODULE,
+	.attr		= elanfreq_attr,
+};
+
+
+static int __init elanfreq_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	/* Test if we have the right hardware */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+		(c->x86 != 4) || (c->x86_model!=10)) {
+		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
+                return -ENODEV;
+	}
+	return cpufreq_register_driver(&elanfreq_driver);
+}
+
+
+static void __exit elanfreq_exit(void)
+{
+	cpufreq_unregister_driver(&elanfreq_driver);
+}
+
+
+module_param (max_freq, int, 0444);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
+
+module_init(elanfreq_init);
+module_exit(elanfreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
new file mode 100644
index 00000000000..ed2bda127c4
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -0,0 +1,494 @@
+/*
+ *	Cyrix MediaGX and NatSemi Geode Suspend Modulation
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Hiroshi Miura   <miura@da-cha.org>
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      version 2 as published by the Free Software Foundation
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *
+ * Theoritical note:
+ *
+ *	(see Geode(tm) CS5530 manual (rev.4.1) page.56)
+ *
+ *	CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
+ *	are based on Suspend Moduration.
+ *
+ *	Suspend Modulation works by asserting and de-asserting the SUSP# pin
+ *	to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
+ *	the CPU enters an idle state. GX1 stops its core clock when SUSP# is
+ *	asserted then power consumption is reduced.
+ *
+ *	Suspend Modulation's OFF/ON duration are configurable
+ *	with 'Suspend Modulation OFF Count Register'
+ *	and 'Suspend Modulation ON Count Register'.
+ *	These registers are 8bit counters that represent the number of
+ *	32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
+ *	to the processor.
+ *
+ *	These counters define a ratio which is the effective frequency
+ *	of operation of the system.
+ *
+ *			       OFF Count
+ *	F_eff = Fgx * ----------------------
+ *	                OFF Count + ON Count
+ *
+ *	0 <= On Count, Off Count <= 255
+ *
+ *	From these limits, we can get register values
+ *
+ *	off_duration + on_duration <= MAX_DURATION
+ *	on_duration = off_duration * (stock_freq - freq) / freq
+ *
+ *      off_duration  =  (freq * DURATION) / stock_freq
+ *      on_duration = DURATION - off_duration
+ *
+ *
+ *---------------------------------------------------------------------------
+ *
+ * ChangeLog:
+ *	Dec. 12, 2003	Hiroshi Miura <miura@da-cha.org>
+ *		- fix on/off register mistake
+ *		- fix cpu_khz calc when it stops cpu modulation.
+ *
+ *	Dec. 11, 2002	Hiroshi Miura <miura@da-cha.org>
+ *		- rewrite for Cyrix MediaGX Cx5510/5520 and
+ *		  NatSemi Geode Cs5530(A).
+ *
+ *	Jul. ??, 2002  Zwane Mwaikambo <zwane@commfireservices.com>
+ *		- cs5530_mod patch for 2.4.19-rc1.
+ *
+ *---------------------------------------------------------------------------
+ *
+ * Todo
+ *	Test on machines with 5510, 5530, 5530A
+ */
+
+/************************************************************************
+ *			Suspend Modulation - Definitions		*
+ ************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <asm/processor-cyrix.h>
+#include <asm/errno.h>
+
+/* PCI config registers, all at F0 */
+#define PCI_PMER1	0x80	/* power management enable register 1 */
+#define PCI_PMER2	0x81	/* power management enable register 2 */
+#define PCI_PMER3	0x82	/* power management enable register 3 */
+#define PCI_IRQTC	0x8c	/* irq speedup timer counter register:typical 2 to 4ms */
+#define PCI_VIDTC	0x8d	/* video speedup timer counter register: typical 50 to 100ms */
+#define PCI_MODOFF	0x94	/* suspend modulation OFF counter register, 1 = 32us */
+#define PCI_MODON	0x95	/* suspend modulation ON counter register */
+#define PCI_SUSCFG	0x96	/* suspend configuration register */
+
+/* PMER1 bits */
+#define GPM		(1<<0)	/* global power management */
+#define GIT		(1<<1)	/* globally enable PM device idle timers */
+#define GTR		(1<<2)	/* globally enable IO traps */
+#define IRQ_SPDUP	(1<<3)	/* disable clock throttle during interrupt handling */
+#define VID_SPDUP	(1<<4)	/* disable clock throttle during vga video handling */
+
+/* SUSCFG bits */
+#define SUSMOD		(1<<0)	/* enable/disable suspend modulation */
+/* the belows support only with cs5530 (after rev.1.2)/cs5530A */
+#define SMISPDUP	(1<<1)	/* select how SMI re-enable suspend modulation: */
+				/* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
+#define SUSCFG		(1<<2)	/* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
+/* the belows support only with cs5530A */
+#define PWRSVE_ISA	(1<<3)	/* stop ISA clock  */
+#define PWRSVE		(1<<4)	/* active idle */
+
+struct gxfreq_params {
+	u8 on_duration;
+	u8 off_duration;
+	u8 pci_suscfg;
+	u8 pci_pmer1;
+	u8 pci_pmer2;
+	struct pci_dev *cs55x0;
+};
+
+static struct gxfreq_params *gx_params;
+static int stock_freq;
+
+/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
+static int pci_busclk = 0;
+module_param (pci_busclk, int, 0444);
+
+/* maximum duration for which the cpu may be suspended
+ * (32us * MAX_DURATION). If no parameter is given, this defaults
+ * to 255.
+ * Note that this leads to a maximum of 8 ms(!) where the CPU clock
+ * is suspended -- processing power is just 0.39% of what it used to be,
+ * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
+static int max_duration = 255;
+module_param (max_duration, int, 0444);
+
+/* For the default policy, we want at least some processing power
+ * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
+ */
+#define POLICY_MIN_DIV 20
+
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
+
+/**
+ * we can detect a core multipiler from dir0_lsb
+ * from GX1 datasheet p.56,
+ *	MULT[3:0]:
+ *	0000 = SYSCLK multiplied by 4 (test only)
+ *	0001 = SYSCLK multiplied by 10
+ *	0010 = SYSCLK multiplied by 4
+ *	0011 = SYSCLK multiplied by 6
+ *	0100 = SYSCLK multiplied by 9
+ *	0101 = SYSCLK multiplied by 5
+ *	0110 = SYSCLK multiplied by 7
+ *	0111 = SYSCLK multiplied by 8
+ *              of 33.3MHz
+ **/
+static int gx_freq_mult[16] = {
+		4, 10, 4, 6, 9, 5, 7, 8,
+		0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+/****************************************************************
+ *	Low Level chipset interface				*
+ ****************************************************************/
+static struct pci_device_id gx_chipset_tbl[] __initdata = {
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
+	{ 0, },
+};
+
+/**
+ * gx_detect_chipset:
+ *
+ **/
+static __init struct pci_dev *gx_detect_chipset(void)
+{
+	struct pci_dev *gx_pci = NULL;
+
+	/* check if CPU is a MediaGX or a Geode. */
+	if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
+	    (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+		dprintk("error: no MediaGX/Geode processor found!\n");
+		return NULL;
+	}
+
+	/* detect which companion chip is used */
+	while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
+		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
+			return gx_pci;
+	}
+
+	dprintk("error: no supported chipset found!\n");
+	return NULL;
+}
+
+/**
+ * gx_get_cpuspeed:
+ *
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
+ */
+static unsigned int gx_get_cpuspeed(unsigned int cpu)
+{
+	if ((gx_params->pci_suscfg & SUSMOD) == 0)
+		return stock_freq;
+
+	return (stock_freq * gx_params->off_duration)
+		/ (gx_params->on_duration + gx_params->off_duration);
+}
+
+/**
+ *      gx_validate_speed:
+ *      determine current cpu speed
+ *
+ **/
+
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
+{
+	unsigned int i;
+	u8 tmp_on, tmp_off;
+	int old_tmp_freq = stock_freq;
+	int tmp_freq;
+
+	*off_duration=1;
+	*on_duration=0;
+
+	for (i=max_duration; i>0; i--) {
+		tmp_off = ((khz * i) / stock_freq) & 0xff;
+		tmp_on = i - tmp_off;
+		tmp_freq = (stock_freq * tmp_off) / i;
+		/* if this relation is closer to khz, use this. If it's equal,
+		 * prefer it, too - lower latency */
+		if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
+			*on_duration = tmp_on;
+			*off_duration = tmp_off;
+			old_tmp_freq = tmp_freq;
+		}
+	}
+
+	return old_tmp_freq;
+}
+
+
+/**
+ * gx_set_cpuspeed:
+ * set cpu speed in khz.
+ **/
+
+static void gx_set_cpuspeed(unsigned int khz)
+{
+	u8 suscfg, pmer1;
+	unsigned int new_khz;
+	unsigned long flags;
+	struct cpufreq_freqs freqs;
+
+	freqs.cpu = 0;
+	freqs.old = gx_get_cpuspeed(0);
+
+	new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
+
+	freqs.new = new_khz;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	local_irq_save(flags);
+
+	if (new_khz != stock_freq) {  /* if new khz == 100% of CPU speed, it is special case */
+		switch (gx_params->cs55x0->device) {
+		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
+			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
+			/* FIXME: need to test other values -- Zwane,Miura */
+			pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
+			pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
+			pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
+
+			if (gx_params->cs55x0->revision < 0x10) {   /* CS5530(rev 1.2, 1.3) */
+				suscfg = gx_params->pci_suscfg | SUSMOD;
+			} else {                           /* CS5530A,B.. */
+				suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
+			}
+			break;
+		case PCI_DEVICE_ID_CYRIX_5520:
+		case PCI_DEVICE_ID_CYRIX_5510:
+			suscfg = gx_params->pci_suscfg | SUSMOD;
+			break;
+		default:
+			local_irq_restore(flags);
+			dprintk("fatal: try to set unknown chipset.\n");
+			return;
+		}
+	} else {
+		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
+		gx_params->off_duration = 0;
+		gx_params->on_duration = 0;
+		dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
+	}
+
+	pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
+	pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
+
+	pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
+	pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
+
+	local_irq_restore(flags);
+
+	gx_params->pci_suscfg = suscfg;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+		gx_params->on_duration * 32, gx_params->off_duration * 32);
+	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+}
+
+/****************************************************************
+ *             High level functions                             *
+ ****************************************************************/
+
+/*
+ *	cpufreq_gx_verify: test if frequency range is valid
+ *
+ *	This function checks if a given frequency range in kHz is valid
+ *      for the hardware supported by the driver.
+ */
+
+static int cpufreq_gx_verify(struct cpufreq_policy *policy)
+{
+	unsigned int tmp_freq = 0;
+	u8 tmp1, tmp2;
+
+	if (!stock_freq || !policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+
+	/* it needs to be assured that at least one supported frequency is
+	 * within policy->min and policy->max. If it is not, policy->max
+	 * needs to be increased until one freuqency is supported.
+	 * policy->min may not be decreased, though. This way we guarantee a
+	 * specific processing capacity.
+	 */
+	tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
+	if (tmp_freq < policy->min)
+		tmp_freq += stock_freq / max_duration;
+	policy->min = tmp_freq;
+	if (policy->min > policy->max)
+		policy->max = tmp_freq;
+	tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
+	if (tmp_freq > policy->max)
+		tmp_freq -= stock_freq / max_duration;
+	policy->max = tmp_freq;
+	if (policy->max < policy->min)
+		policy->max = policy->min;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+
+	return 0;
+}
+
+/*
+ *      cpufreq_gx_target:
+ *
+ */
+static int cpufreq_gx_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	u8 tmp1, tmp2;
+	unsigned int tmp_freq;
+
+	if (!stock_freq || !policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+
+	tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
+	while (tmp_freq < policy->min) {
+		tmp_freq += stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+	while (tmp_freq > policy->max) {
+		tmp_freq -= stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+
+	gx_set_cpuspeed(tmp_freq);
+
+	return 0;
+}
+
+static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int maxfreq, curfreq;
+
+	if (!policy || policy->cpu != 0)
+		return -ENODEV;
+
+	/* determine maximum frequency */
+	if (pci_busclk) {
+		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+	} else if (cpu_khz) {
+		maxfreq = cpu_khz;
+	} else {
+		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+	}
+	stock_freq = maxfreq;
+	curfreq = gx_get_cpuspeed(0);
+
+	dprintk("cpu max frequency is %d.\n", maxfreq);
+	dprintk("cpu current frequency is %dkHz.\n",curfreq);
+
+	/* setup basic struct for cpufreq API */
+	policy->cpu = 0;
+
+	if (max_duration < POLICY_MIN_DIV)
+		policy->min = maxfreq / max_duration;
+	else
+		policy->min = maxfreq / POLICY_MIN_DIV;
+	policy->max = maxfreq;
+	policy->cur = curfreq;
+	policy->cpuinfo.min_freq = maxfreq / max_duration;
+	policy->cpuinfo.max_freq = maxfreq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
+	return 0;
+}
+
+/*
+ * cpufreq_gx_init:
+ *   MediaGX/Geode GX initialize cpufreq driver
+ */
+static struct cpufreq_driver gx_suspmod_driver = {
+	.get		= gx_get_cpuspeed,
+	.verify		= cpufreq_gx_verify,
+	.target		= cpufreq_gx_target,
+	.init		= cpufreq_gx_cpu_init,
+	.name		= "gx-suspmod",
+	.owner		= THIS_MODULE,
+};
+
+static int __init cpufreq_gx_init(void)
+{
+	int ret;
+	struct gxfreq_params *params;
+	struct pci_dev *gx_pci;
+
+	/* Test if we have the right hardware */
+	if ((gx_pci = gx_detect_chipset()) == NULL)
+		return -ENODEV;
+
+	/* check whether module parameters are sane */
+	if (max_duration > 0xff)
+		max_duration = 0xff;
+
+	dprintk("geode suspend modulation available.\n");
+
+	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
+	if (params == NULL)
+		return -ENOMEM;
+
+	params->cs55x0 = gx_pci;
+	gx_params = params;
+
+	/* keep cs55x0 configurations */
+	pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
+	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
+	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
+	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
+	pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
+
+	if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
+		kfree(params);
+		return ret;                   /* register error! */
+	}
+
+	return 0;
+}
+
+static void __exit cpufreq_gx_exit(void)
+{
+	cpufreq_unregister_driver(&gx_suspmod_driver);
+	pci_dev_put(gx_params->cs55x0);
+	kfree(gx_params);
+}
+
+MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE ("GPL");
+
+module_init(cpufreq_gx_init);
+module_exit(cpufreq_gx_exit);
+
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
new file mode 100644
index 00000000000..5045f5d583c
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -0,0 +1,1027 @@
+/*
+ *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by VIA.
+ *
+ *  VIA have currently 3 different versions of Longhaul.
+ *  Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
+ *   It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
+ *  Version 2 of longhaul is backward compatible with v1, but adds
+ *   LONGHAUL MSR for purpose of both frequency and voltage scaling.
+ *   Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
+ *  Version 3 of longhaul got renamed to Powersaver and redesigned
+ *   to use only the POWERSAVER MSR at 0x110a.
+ *   It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
+ *   It's pretty much the same feature wise to longhaul v2, though
+ *   there is provision for scaling FSB too, but this doesn't work
+ *   too well in practice so we don't even try to use this.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+#include <asm/acpi.h>
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+
+#include "longhaul.h"
+
+#define PFX "longhaul: "
+
+#define TYPE_LONGHAUL_V1	1
+#define TYPE_LONGHAUL_V2	2
+#define TYPE_POWERSAVER		3
+
+#define	CPU_SAMUEL	1
+#define	CPU_SAMUEL2	2
+#define	CPU_EZRA	3
+#define	CPU_EZRA_T	4
+#define	CPU_NEHEMIAH	5
+#define	CPU_NEHEMIAH_C	6
+
+/* Flags */
+#define USE_ACPI_C3		(1 << 1)
+#define USE_NORTHBRIDGE		(1 << 2)
+
+static int cpu_model;
+static unsigned int numscales=16;
+static unsigned int fsb;
+
+static const struct mV_pos *vrm_mV_table;
+static const unsigned char *mV_vrm_table;
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
+static unsigned int minmult, maxmult;
+static int can_scale_voltage;
+static struct acpi_processor *pr = NULL;
+static struct acpi_processor_cx *cx = NULL;
+static u32 acpi_regs_addr;
+static u8 longhaul_flags;
+static unsigned int longhaul_index;
+
+/* Module parameters */
+static int scale_voltage;
+static int disable_acpi_c3;
+static int revid_errata;
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
+
+
+/* Clock ratios multiplied by 10 */
+static int clock_ratio[32];
+static int eblcr_table[32];
+static int longhaul_version;
+static struct cpufreq_frequency_table *longhaul_table;
+
+#ifdef CONFIG_CPU_FREQ_DEBUG
+static char speedbuffer[8];
+
+static char *print_speed(int speed)
+{
+	if (speed < 1000) {
+		snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed);
+		return speedbuffer;
+	}
+
+	if (speed%1000 == 0)
+		snprintf(speedbuffer, sizeof(speedbuffer),
+			"%dGHz", speed/1000);
+	else
+		snprintf(speedbuffer, sizeof(speedbuffer),
+			"%d.%dGHz", speed/1000, (speed%1000)/100);
+
+	return speedbuffer;
+}
+#endif
+
+
+static unsigned int calc_speed(int mult)
+{
+	int khz;
+	khz = (mult/10)*fsb;
+	if (mult%10)
+		khz += fsb/2;
+	khz *= 1000;
+	return khz;
+}
+
+
+static int longhaul_get_cpu_mult(void)
+{
+	unsigned long invalue=0,lo, hi;
+
+	rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
+	invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
+	if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
+		if (lo & (1<<27))
+			invalue+=16;
+	}
+	return eblcr_table[invalue];
+}
+
+/* For processor with BCR2 MSR */
+
+static void do_longhaul1(unsigned int clock_ratio_index)
+{
+	union msr_bcr2 bcr2;
+
+	rdmsrl(MSR_VIA_BCR2, bcr2.val);
+	/* Enable software clock multiplier */
+	bcr2.bits.ESOFTBF = 1;
+	bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff;
+
+	/* Sync to timer tick */
+	safe_halt();
+	/* Change frequency on next halt or sleep */
+	wrmsrl(MSR_VIA_BCR2, bcr2.val);
+	/* Invoke transition */
+	ACPI_FLUSH_CPU_CACHE();
+	halt();
+
+	/* Disable software clock multiplier */
+	local_irq_disable();
+	rdmsrl(MSR_VIA_BCR2, bcr2.val);
+	bcr2.bits.ESOFTBF = 0;
+	wrmsrl(MSR_VIA_BCR2, bcr2.val);
+}
+
+/* For processor with Longhaul MSR */
+
+static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
+			  unsigned int dir)
+{
+	union msr_longhaul longhaul;
+	u32 t;
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	/* Setup new frequency */
+	if (!revid_errata)
+		longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
+	else
+		longhaul.bits.RevisionKey = 0;
+	longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf;
+	longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
+	/* Setup new voltage */
+	if (can_scale_voltage)
+		longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f;
+	/* Sync to timer tick */
+	safe_halt();
+	/* Raise voltage if necessary */
+	if (can_scale_voltage && dir) {
+		longhaul.bits.EnableSoftVID = 1;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+		/* Change voltage */
+		if (!cx_address) {
+			ACPI_FLUSH_CPU_CACHE();
+			halt();
+		} else {
+			ACPI_FLUSH_CPU_CACHE();
+			/* Invoke C3 */
+			inb(cx_address);
+			/* Dummy op - must do something useless after P_LVL3
+			 * read */
+			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		}
+		longhaul.bits.EnableSoftVID = 0;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	}
+
+	/* Change frequency on next halt or sleep */
+	longhaul.bits.EnableSoftBusRatio = 1;
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!cx_address) {
+		ACPI_FLUSH_CPU_CACHE();
+		halt();
+	} else {
+		ACPI_FLUSH_CPU_CACHE();
+		/* Invoke C3 */
+		inb(cx_address);
+		/* Dummy op - must do something useless after P_LVL3 read */
+		t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	}
+	/* Disable bus ratio bit */
+	longhaul.bits.EnableSoftBusRatio = 0;
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+
+	/* Reduce voltage if necessary */
+	if (can_scale_voltage && !dir) {
+		longhaul.bits.EnableSoftVID = 1;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+		/* Change voltage */
+		if (!cx_address) {
+			ACPI_FLUSH_CPU_CACHE();
+			halt();
+		} else {
+			ACPI_FLUSH_CPU_CACHE();
+			/* Invoke C3 */
+			inb(cx_address);
+			/* Dummy op - must do something useless after P_LVL3
+			 * read */
+			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		}
+		longhaul.bits.EnableSoftVID = 0;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	}
+}
+
+/**
+ * longhaul_set_cpu_frequency()
+ * @clock_ratio_index : bitpattern of the new multiplier.
+ *
+ * Sets a new clock ratio.
+ */
+
+static void longhaul_setstate(unsigned int table_index)
+{
+	unsigned int clock_ratio_index;
+	int speed, mult;
+	struct cpufreq_freqs freqs;
+	unsigned long flags;
+	unsigned int pic1_mask, pic2_mask;
+	u16 bm_status = 0;
+	u32 bm_timeout = 1000;
+	unsigned int dir = 0;
+
+	clock_ratio_index = longhaul_table[table_index].index;
+	/* Safety precautions */
+	mult = clock_ratio[clock_ratio_index & 0x1f];
+	if (mult == -1)
+		return;
+	speed = calc_speed(mult);
+	if ((speed > highest_speed) || (speed < lowest_speed))
+		return;
+	/* Voltage transition before frequency transition? */
+	if (can_scale_voltage && longhaul_index < table_index)
+		dir = 1;
+
+	freqs.old = calc_speed(longhaul_get_cpu_mult());
+	freqs.new = speed;
+	freqs.cpu = 0; /* longhaul.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+			fsb, mult/10, mult%10, print_speed(speed/1000));
+retry_loop:
+	preempt_disable();
+	local_irq_save(flags);
+
+	pic2_mask = inb(0xA1);
+	pic1_mask = inb(0x21);	/* works on C3. save mask. */
+	outb(0xFF,0xA1);	/* Overkill */
+	outb(0xFE,0x21);	/* TMR0 only */
+
+	/* Wait while PCI bus is busy. */
+	if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
+	    || ((pr != NULL) && pr->flags.bm_control))) {
+		bm_status = inw(acpi_regs_addr);
+		bm_status &= 1 << 4;
+		while (bm_status && bm_timeout) {
+			outw(1 << 4, acpi_regs_addr);
+			bm_timeout--;
+			bm_status = inw(acpi_regs_addr);
+			bm_status &= 1 << 4;
+		}
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE) {
+		/* Disable AGP and PCI arbiters */
+		outb(3, 0x22);
+	} else if ((pr != NULL) && pr->flags.bm_control) {
+		/* Disable bus master arbitration */
+		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+	}
+	switch (longhaul_version) {
+
+	/*
+	 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
+	 * Software controlled multipliers only.
+	 */
+	case TYPE_LONGHAUL_V1:
+		do_longhaul1(clock_ratio_index);
+		break;
+
+	/*
+	 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
+	 *
+	 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
+	 * Nehemiah can do FSB scaling too, but this has never been proven
+	 * to work in practice.
+	 */
+	case TYPE_LONGHAUL_V2:
+	case TYPE_POWERSAVER:
+		if (longhaul_flags & USE_ACPI_C3) {
+			/* Don't allow wakeup */
+			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+			do_powersaver(cx->address, clock_ratio_index, dir);
+		} else {
+			do_powersaver(0, clock_ratio_index, dir);
+		}
+		break;
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE) {
+		/* Enable arbiters */
+		outb(0, 0x22);
+	} else if ((pr != NULL) && pr->flags.bm_control) {
+		/* Enable bus master arbitration */
+		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+	}
+	outb(pic2_mask,0xA1);	/* restore mask */
+	outb(pic1_mask,0x21);
+
+	local_irq_restore(flags);
+	preempt_enable();
+
+	freqs.new = calc_speed(longhaul_get_cpu_mult());
+	/* Check if requested frequency is set. */
+	if (unlikely(freqs.new != speed)) {
+		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+		/* Revision ID = 1 but processor is expecting revision key
+		 * equal to 0. Jumpers at the bottom of processor will change
+		 * multiplier and FSB, but will not change bits in Longhaul
+		 * MSR nor enable voltage scaling. */
+		if (!revid_errata) {
+			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
+						"option.\n");
+			revid_errata = 1;
+			msleep(200);
+			goto retry_loop;
+		}
+		/* Why ACPI C3 sometimes doesn't work is a mystery for me.
+		 * But it does happen. Processor is entering ACPI C3 state,
+		 * but it doesn't change frequency. I tried poking various
+		 * bits in northbridge registers, but without success. */
+		if (longhaul_flags & USE_ACPI_C3) {
+			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+			longhaul_flags &= ~USE_ACPI_C3;
+			if (revid_errata) {
+				printk(KERN_INFO PFX "Disabling \"Ignore "
+						"Revision ID\" option.\n");
+				revid_errata = 0;
+			}
+			msleep(200);
+			goto retry_loop;
+		}
+		/* This shouldn't happen. Longhaul ver. 2 was reported not
+		 * working on processors without voltage scaling, but with
+		 * RevID = 1. RevID errata will make things right. Just
+		 * to be 100% sure. */
+		if (longhaul_version == TYPE_LONGHAUL_V2) {
+			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+			longhaul_version = TYPE_LONGHAUL_V1;
+			msleep(200);
+			goto retry_loop;
+		}
+	}
+	/* Report true CPU frequency */
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	if (!bm_timeout)
+		printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n");
+}
+
+/*
+ * Centaur decided to make life a little more tricky.
+ * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
+ * Samuel2 and above have to try and guess what the FSB is.
+ * We do this by assuming we booted at maximum multiplier, and interpolate
+ * between that value multiplied by possible FSBs and cpu_mhz which
+ * was calculated at boot time. Really ugly, but no other way to do this.
+ */
+
+#define ROUNDING	0xf
+
+static int guess_fsb(int mult)
+{
+	int speed = cpu_khz / 1000;
+	int i;
+	int speeds[] = { 666, 1000, 1333, 2000 };
+	int f_max, f_min;
+
+	for (i = 0; i < 4; i++) {
+		f_max = ((speeds[i] * mult) + 50) / 100;
+		f_max += (ROUNDING / 2);
+		f_min = f_max - ROUNDING;
+		if ((speed <= f_max) && (speed >= f_min))
+			return speeds[i] / 10;
+	}
+	return 0;
+}
+
+
+static int __init longhaul_get_ranges(void)
+{
+	unsigned int i, j, k = 0;
+	unsigned int ratio;
+	int mult;
+
+	/* Get current frequency */
+	mult = longhaul_get_cpu_mult();
+	if (mult == -1) {
+		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+		return -EINVAL;
+	}
+	fsb = guess_fsb(mult);
+	if (fsb == 0) {
+		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+		return -EINVAL;
+	}
+	/* Get max multiplier - as we always did.
+	 * Longhaul MSR is usefull only when voltage scaling is enabled.
+	 * C3 is booting at max anyway. */
+	maxmult = mult;
+	/* Get min multiplier */
+	switch (cpu_model) {
+	case CPU_NEHEMIAH:
+		minmult = 50;
+		break;
+	case CPU_NEHEMIAH_C:
+		minmult = 40;
+		break;
+	default:
+		minmult = 30;
+		break;
+	}
+
+	dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
+		 minmult/10, minmult%10, maxmult/10, maxmult%10);
+
+	highest_speed = calc_speed(maxmult);
+	lowest_speed = calc_speed(minmult);
+	dprintk ("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+		 print_speed(lowest_speed/1000),
+		 print_speed(highest_speed/1000));
+
+	if (lowest_speed == highest_speed) {
+		printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		return -EINVAL;
+	}
+	if (lowest_speed > highest_speed) {
+		printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+			lowest_speed, highest_speed);
+		return -EINVAL;
+	}
+
+	longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
+	if(!longhaul_table)
+		return -ENOMEM;
+
+	for (j = 0; j < numscales; j++) {
+		ratio = clock_ratio[j];
+		if (ratio == -1)
+			continue;
+		if (ratio > maxmult || ratio < minmult)
+			continue;
+		longhaul_table[k].frequency = calc_speed(ratio);
+		longhaul_table[k].index	= j;
+		k++;
+	}
+	if (k <= 1) {
+		kfree(longhaul_table);
+		return -ENODEV;
+	}
+	/* Sort */
+	for (j = 0; j < k - 1; j++) {
+		unsigned int min_f, min_i;
+		min_f = longhaul_table[j].frequency;
+		min_i = j;
+		for (i = j + 1; i < k; i++) {
+			if (longhaul_table[i].frequency < min_f) {
+				min_f = longhaul_table[i].frequency;
+				min_i = i;
+			}
+		}
+		if (min_i != j) {
+			unsigned int temp;
+			temp = longhaul_table[j].frequency;
+			longhaul_table[j].frequency = longhaul_table[min_i].frequency;
+			longhaul_table[min_i].frequency = temp;
+			temp = longhaul_table[j].index;
+			longhaul_table[j].index = longhaul_table[min_i].index;
+			longhaul_table[min_i].index = temp;
+		}
+	}
+
+	longhaul_table[k].frequency = CPUFREQ_TABLE_END;
+
+	/* Find index we are running on */
+	for (j = 0; j < k; j++) {
+		if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) {
+			longhaul_index = j;
+			break;
+		}
+	}
+	return 0;
+}
+
+
+static void __init longhaul_setup_voltagescaling(void)
+{
+	union msr_longhaul longhaul;
+	struct mV_pos minvid, maxvid, vid;
+	unsigned int j, speed, pos, kHz_step, numvscales;
+	int min_vid_speed;
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!(longhaul.bits.RevisionID & 1)) {
+		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+		return;
+	}
+
+	if (!longhaul.bits.VRMRev) {
+		printk(KERN_INFO PFX "VRM 8.5\n");
+		vrm_mV_table = &vrm85_mV[0];
+		mV_vrm_table = &mV_vrm85[0];
+	} else {
+		printk(KERN_INFO PFX "Mobile VRM\n");
+		if (cpu_model < CPU_NEHEMIAH)
+			return;
+		vrm_mV_table = &mobilevrm_mV[0];
+		mV_vrm_table = &mV_mobilevrm[0];
+	}
+
+	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+
+	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
+		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+					"Voltage scaling disabled.\n",
+					minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
+		return;
+	}
+
+	if (minvid.mV == maxvid.mV) {
+		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
+				"both %d.%03d. Voltage scaling disabled\n",
+				maxvid.mV/1000, maxvid.mV%1000);
+		return;
+	}
+
+	/* How many voltage steps */
+	numvscales = maxvid.pos - minvid.pos + 1;
+	printk(KERN_INFO PFX
+		"Max VID=%d.%03d  "
+		"Min VID=%d.%03d, "
+		"%d possible voltage scales\n",
+		maxvid.mV/1000, maxvid.mV%1000,
+		minvid.mV/1000, minvid.mV%1000,
+		numvscales);
+
+	/* Calculate max frequency at min voltage */
+	j = longhaul.bits.MinMHzBR;
+	if (longhaul.bits.MinMHzBR4)
+		j += 16;
+	min_vid_speed = eblcr_table[j];
+	if (min_vid_speed == -1)
+		return;
+	switch (longhaul.bits.MinMHzFSB) {
+	case 0:
+		min_vid_speed *= 13333;
+		break;
+	case 1:
+		min_vid_speed *= 10000;
+		break;
+	case 3:
+		min_vid_speed *= 6666;
+		break;
+	default:
+		return;
+		break;
+	}
+	if (min_vid_speed >= highest_speed)
+		return;
+	/* Calculate kHz for one voltage step */
+	kHz_step = (highest_speed - min_vid_speed) / numvscales;
+
+	j = 0;
+	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+		speed = longhaul_table[j].frequency;
+		if (speed > min_vid_speed)
+			pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
+		else
+			pos = minvid.pos;
+		longhaul_table[j].index |= mV_vrm_table[pos] << 8;
+		vid = vrm_mV_table[mV_vrm_table[pos]];
+		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV);
+		j++;
+	}
+
+	can_scale_voltage = 1;
+	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+}
+
+
+static int longhaul_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, longhaul_table);
+}
+
+
+static int longhaul_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq, unsigned int relation)
+{
+	unsigned int table_index = 0;
+	unsigned int i;
+	unsigned int dir = 0;
+	u8 vid, current_vid;
+
+	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
+		return -EINVAL;
+
+	/* Don't set same frequency again */
+	if (longhaul_index == table_index)
+		return 0;
+
+	if (!can_scale_voltage)
+		longhaul_setstate(table_index);
+	else {
+		/* On test system voltage transitions exceeding single
+		 * step up or down were turning motherboard off. Both
+		 * "ondemand" and "userspace" are unsafe. C7 is doing
+		 * this in hardware, C3 is old and we need to do this
+		 * in software. */
+		i = longhaul_index;
+		current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f;
+		if (table_index > longhaul_index)
+			dir = 1;
+		while (i != table_index) {
+			vid = (longhaul_table[i].index >> 8) & 0x1f;
+			if (vid != current_vid) {
+				longhaul_setstate(i);
+				current_vid = vid;
+				msleep(200);
+			}
+			if (dir)
+				i++;
+			else
+				i--;
+		}
+		longhaul_setstate(table_index);
+	}
+	longhaul_index = table_index;
+	return 0;
+}
+
+
+static unsigned int longhaul_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return calc_speed(longhaul_get_cpu_mult());
+}
+
+static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
+					  u32 nesting_level,
+					  void *context, void **return_value)
+{
+	struct acpi_device *d;
+
+	if ( acpi_bus_get_device(obj_handle, &d) ) {
+		return 0;
+	}
+	*return_value = (void *)acpi_driver_data(d);
+	return 1;
+}
+
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+	struct pci_dev *dev;
+	int status = 1;
+	int reg;
+	u8 pci_cmd;
+
+	/* Find PLE133 host bridge */
+	reg = 0x78;
+	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
+			     NULL);
+	/* Find PM133/VT8605 host bridge */
+	if (dev == NULL)
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_8605_0, NULL);
+	/* Find CLE266 host bridge */
+	if (dev == NULL) {
+		reg = 0x76;
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_862X_0, NULL);
+		/* Find CN400 V-Link host bridge */
+		if (dev == NULL)
+			dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
+	}
+	if (dev != NULL) {
+		/* Enable access to port 0x22 */
+		pci_read_config_byte(dev, reg, &pci_cmd);
+		if (!(pci_cmd & 1<<7)) {
+			pci_cmd |= 1<<7;
+			pci_write_config_byte(dev, reg, pci_cmd);
+			pci_read_config_byte(dev, reg, &pci_cmd);
+			if (!(pci_cmd & 1<<7)) {
+				printk(KERN_ERR PFX
+					"Can't enable access to port 0x22.\n");
+				status = 0;
+			}
+		}
+		pci_dev_put(dev);
+		return status;
+	}
+	return 0;
+}
+
+static int longhaul_setup_southbridge(void)
+{
+	struct pci_dev *dev;
+	u8 pci_cmd;
+
+	/* Find VT8235 southbridge */
+	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
+	if (dev == NULL)
+	/* Find VT8237 southbridge */
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_8237, NULL);
+	if (dev != NULL) {
+		/* Set transition time to max */
+		pci_read_config_byte(dev, 0xec, &pci_cmd);
+		pci_cmd &= ~(1 << 2);
+		pci_write_config_byte(dev, 0xec, pci_cmd);
+		pci_read_config_byte(dev, 0xe4, &pci_cmd);
+		pci_cmd &= ~(1 << 7);
+		pci_write_config_byte(dev, 0xe4, pci_cmd);
+		pci_read_config_byte(dev, 0xe5, &pci_cmd);
+		pci_cmd |= 1 << 7;
+		pci_write_config_byte(dev, 0xe5, pci_cmd);
+		/* Get address of ACPI registers block*/
+		pci_read_config_byte(dev, 0x81, &pci_cmd);
+		if (pci_cmd & 1 << 7) {
+			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
+			acpi_regs_addr &= 0xff00;
+			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
+		}
+
+		pci_dev_put(dev);
+		return 1;
+	}
+	return 0;
+}
+
+static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	char *cpuname=NULL;
+	int ret;
+	u32 lo, hi;
+
+	/* Check what we have on this motherboard */
+	switch (c->x86_model) {
+	case 6:
+		cpu_model = CPU_SAMUEL;
+		cpuname = "C3 'Samuel' [C5A]";
+		longhaul_version = TYPE_LONGHAUL_V1;
+		memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
+		memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
+		break;
+
+	case 7:
+		switch (c->x86_mask) {
+		case 0:
+			longhaul_version = TYPE_LONGHAUL_V1;
+			cpu_model = CPU_SAMUEL2;
+			cpuname = "C3 'Samuel 2' [C5B]";
+			/* Note, this is not a typo, early Samuel2's had
+			 * Samuel1 ratios. */
+			memcpy(clock_ratio, samuel1_clock_ratio,
+				sizeof(samuel1_clock_ratio));
+			memcpy(eblcr_table, samuel2_eblcr,
+				sizeof(samuel2_eblcr));
+			break;
+		case 1 ... 15:
+			longhaul_version = TYPE_LONGHAUL_V1;
+			if (c->x86_mask < 8) {
+				cpu_model = CPU_SAMUEL2;
+				cpuname = "C3 'Samuel 2' [C5B]";
+			} else {
+				cpu_model = CPU_EZRA;
+				cpuname = "C3 'Ezra' [C5C]";
+			}
+			memcpy(clock_ratio, ezra_clock_ratio,
+				sizeof(ezra_clock_ratio));
+			memcpy(eblcr_table, ezra_eblcr,
+				sizeof(ezra_eblcr));
+			break;
+		}
+		break;
+
+	case 8:
+		cpu_model = CPU_EZRA_T;
+		cpuname = "C3 'Ezra-T' [C5M]";
+		longhaul_version = TYPE_POWERSAVER;
+		numscales=32;
+		memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
+		memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
+		break;
+
+	case 9:
+		longhaul_version = TYPE_POWERSAVER;
+		numscales = 32;
+		memcpy(clock_ratio,
+		       nehemiah_clock_ratio,
+		       sizeof(nehemiah_clock_ratio));
+		memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
+		switch (c->x86_mask) {
+		case 0 ... 1:
+			cpu_model = CPU_NEHEMIAH;
+			cpuname = "C3 'Nehemiah A' [C5XLOE]";
+			break;
+		case 2 ... 4:
+			cpu_model = CPU_NEHEMIAH;
+			cpuname = "C3 'Nehemiah B' [C5XLOH]";
+			break;
+		case 5 ... 15:
+			cpu_model = CPU_NEHEMIAH_C;
+			cpuname = "C3 'Nehemiah C' [C5P]";
+			break;
+		}
+		break;
+
+	default:
+		cpuname = "Unknown";
+		break;
+	}
+	/* Check Longhaul ver. 2 */
+	if (longhaul_version == TYPE_LONGHAUL_V2) {
+		rdmsr(MSR_VIA_LONGHAUL, lo, hi);
+		if (lo == 0 && hi == 0)
+			/* Looks like MSR isn't present */
+			longhaul_version = TYPE_LONGHAUL_V1;
+	}
+
+	printk (KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	switch (longhaul_version) {
+	case TYPE_LONGHAUL_V1:
+	case TYPE_LONGHAUL_V2:
+		printk ("Longhaul v%d supported.\n", longhaul_version);
+		break;
+	case TYPE_POWERSAVER:
+		printk ("Powersaver supported.\n");
+		break;
+	};
+
+	/* Doesn't hurt */
+	longhaul_setup_southbridge();
+
+	/* Find ACPI data for processor */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				ACPI_UINT32_MAX, &longhaul_walk_callback,
+				NULL, (void *)&pr);
+
+	/* Check ACPI support for C3 state */
+	if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
+		cx = &pr->power.states[ACPI_STATE_C3];
+		if (cx->address > 0 && cx->latency <= 1000)
+			longhaul_flags |= USE_ACPI_C3;
+	}
+	/* Disable if it isn't working */
+	if (disable_acpi_c3)
+		longhaul_flags &= ~USE_ACPI_C3;
+	/* Check if northbridge is friendly */
+	if (enable_arbiter_disable())
+		longhaul_flags |= USE_NORTHBRIDGE;
+
+	/* Check ACPI support for bus master arbiter disable */
+	if (!(longhaul_flags & USE_ACPI_C3
+	     || longhaul_flags & USE_NORTHBRIDGE)
+	    && ((pr == NULL) || !(pr->flags.bm_control))) {
+		printk(KERN_ERR PFX
+			"No ACPI support. Unsupported northbridge.\n");
+		return -ENODEV;
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE)
+		printk(KERN_INFO PFX "Using northbridge support.\n");
+	if (longhaul_flags & USE_ACPI_C3)
+		printk(KERN_INFO PFX "Using ACPI support.\n");
+
+	ret = longhaul_get_ranges();
+	if (ret != 0)
+		return ret;
+
+	if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
+		longhaul_setup_voltagescaling();
+
+	policy->cpuinfo.transition_latency = 200000;	/* nsec */
+	policy->cur = calc_speed(longhaul_get_cpu_mult());
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
+	if (ret)
+		return ret;
+
+	cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
+
+	return 0;
+}
+
+static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr* longhaul_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver longhaul_driver = {
+	.verify	= longhaul_verify,
+	.target	= longhaul_target,
+	.get	= longhaul_get,
+	.init	= longhaul_cpu_init,
+	.exit	= __devexit_p(longhaul_cpu_exit),
+	.name	= "longhaul",
+	.owner	= THIS_MODULE,
+	.attr	= longhaul_attr,
+};
+
+
+static int __init longhaul_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
+		return -ENODEV;
+
+#ifdef CONFIG_SMP
+	if (num_online_cpus() > 1) {
+		printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n");
+		return -ENODEV;
+	}
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	if (cpu_has_apic) {
+		printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n");
+		return -ENODEV;
+	}
+#endif
+	switch (c->x86_model) {
+	case 6 ... 9:
+		return cpufreq_register_driver(&longhaul_driver);
+	case 10:
+		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+	default:
+		;;
+	}
+
+	return -ENODEV;
+}
+
+
+static void __exit longhaul_exit(void)
+{
+	int i;
+
+	for (i=0; i < numscales; i++) {
+		if (clock_ratio[i] == maxmult) {
+			longhaul_setstate(i);
+			break;
+		}
+	}
+
+	cpufreq_unregister_driver(&longhaul_driver);
+	kfree(longhaul_table);
+}
+
+/* Even if BIOS is exporting ACPI C3 state, and it is used
+ * with success when CPU is idle, this state doesn't
+ * trigger frequency transition in some cases. */
+module_param (disable_acpi_c3, int, 0644);
+MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
+/* Change CPU voltage with frequency. Very usefull to save
+ * power, but most VIA C3 processors aren't supporting it. */
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+/* Force revision key to 0 for processors which doesn't
+ * support voltage scaling, but are introducing itself as
+ * such. */
+module_param(revid_errata, int, 0644);
+MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(longhaul_init);
+module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
new file mode 100644
index 00000000000..4fcc320997d
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -0,0 +1,353 @@
+/*
+ *  longhaul.h
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  VIA-specific information
+ */
+
+union msr_bcr2 {
+	struct {
+		unsigned Reseved:19,	// 18:0
+		ESOFTBF:1,		// 19
+		Reserved2:3,		// 22:20
+		CLOCKMUL:4,		// 26:23
+		Reserved3:5;		// 31:27
+	} bits;
+	unsigned long val;
+};
+
+union msr_longhaul {
+	struct {
+		unsigned RevisionID:4,	// 3:0
+		RevisionKey:4,		// 7:4
+		EnableSoftBusRatio:1,	// 8
+		EnableSoftVID:1,	// 9
+		EnableSoftBSEL:1,	// 10
+		Reserved:3,		// 11:13
+		SoftBusRatio4:1,	// 14
+		VRMRev:1,		// 15
+		SoftBusRatio:4,		// 19:16
+		SoftVID:5,		// 24:20
+		Reserved2:3,		// 27:25
+		SoftBSEL:2,		// 29:28
+		Reserved3:2,		// 31:30
+		MaxMHzBR:4,		// 35:32
+		MaximumVID:5,		// 40:36
+		MaxMHzFSB:2,		// 42:41
+		MaxMHzBR4:1,		// 43
+		Reserved4:4,		// 47:44
+		MinMHzBR:4,		// 51:48
+		MinimumVID:5,		// 56:52
+		MinMHzFSB:2,		// 58:57
+		MinMHzBR4:1,		// 59
+		Reserved5:4;		// 63:60
+	} bits;
+	unsigned long long val;
+};
+
+/*
+ * Clock ratio tables. Div/Mod by 10 to get ratio.
+ * The eblcr ones specify the ratio read from the CPU.
+ * The clock_ratio ones specify what to write to the CPU.
+ */
+
+/*
+ * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
+ */
+static const int __initdata samuel1_clock_ratio[16] = {
+	-1, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	-1, /* 0100 -> RESERVED */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	55, /* 0111 ->  5.5x */
+	60, /* 1000 ->  6.0x */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	50, /* 1011 ->  5.0x */
+	65, /* 1100 ->  6.5x */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	-1, /* 1111 -> RESERVED */
+};
+
+static const int __initdata samuel1_eblcr[16] = {
+	50, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	55, /* 0100 ->  5.5x */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	-1, /* 0111 -> RESERVED */
+	-1, /* 1000 -> RESERVED */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	60, /* 1011 ->  6.0x */
+	-1, /* 1100 -> RESERVED */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	65, /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Samuel2 Stepping 1->15
+ */
+static const int __initdata samuel2_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	110, /* 0111 -> 11.0x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	130, /* 1110 -> 13.0x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Ezra
+ */
+static const int __initdata ezra_clock_ratio[16] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+};
+
+static const int __initdata ezra_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 (Ezra-T) [C5M].
+ */
+static const int __initdata ezrat_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 ->  12.0x */
+
+	-1,  /* 0000 -> RESERVED (10.0x) */
+	110, /* 0001 -> 11.0x */
+	-1, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (9.0x)*/
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	-1,  /* 1111 -> RESERVED (12.0x) */
+};
+
+static const int __initdata ezrat_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+
+	-1,  /* 0000 -> RESERVED (9.0x) */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (10.0x)*/
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	-1,  /* 1100 -> RESERVED (12.0x) */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145, /* 1111 -> 14.5x */
+};
+
+/*
+ * VIA C3 Nehemiah */
+
+static const int __initdata  nehemiah_clock_ratio[32] = {
+	100, /* 0000 -> 10.0x */
+	-1, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+	-1, /* 0000 -> 10.0x */
+	110, /* 0001 -> 11.0x */
+	-1, /* 0010 -> 12.0x */
+	-1,  /* 0011 ->  9.0x */
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	-1, /* 1111 -> 12.0x */
+};
+
+static const int __initdata nehemiah_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	160, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+	90,  /* 0000 ->  9.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	100, /* 0011 -> 10.0x */
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	120, /* 1100 -> 12.0x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145 /* 1111 -> 14.5x */
+};
+
+/*
+ * Voltage scales. Div/Mod by 1000 to get actual voltage.
+ * Which scale to use depends on the VRM type in use.
+ */
+
+struct mV_pos {
+	unsigned short mV;
+	unsigned short pos;
+};
+
+static const struct mV_pos __initdata vrm85_mV[32] = {
+	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
+	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
+	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
+	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
+	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
+	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
+	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
+	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
+};
+
+static const unsigned char __initdata mV_vrm85[32] = {
+	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
+	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
+	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
+	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
+};
+
+static const struct mV_pos __initdata mobilevrm_mV[32] = {
+	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
+	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
+	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
+	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
+	{975, 15},	{950, 14},	{925, 13},	{900, 12},
+	{875, 11},	{850, 10},	{825, 9},	{800, 8},
+	{775, 7},	{750, 6},	{725, 5},	{700, 4},
+	{675, 3},	{650, 2},	{625, 1},	{600, 0}
+};
+
+static const unsigned char __initdata mV_mobilevrm[32] = {
+	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
+	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
+	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
+	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
+};
+
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
new file mode 100644
index 00000000000..b2689514295
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -0,0 +1,325 @@
+/*
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/timex.h>
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
+
+static struct cpufreq_driver	longrun_driver;
+
+/**
+ * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
+ * values into per cent values. In TMTA microcode, the following is valid:
+ * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int longrun_low_freq, longrun_high_freq;
+
+
+/**
+ * longrun_get_policy - get the current LongRun policy
+ * @policy: struct cpufreq_policy where current policy is written into
+ *
+ * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
+ * and MSR_TMTA_LONGRUN_CTRL
+ */
+static void __init longrun_get_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	if (msr_lo & 0x01)
+		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+	else
+		policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	msr_lo &= 0x0000007F;
+	msr_hi &= 0x0000007F;
+
+	if ( longrun_high_freq <= longrun_low_freq ) {
+		/* Assume degenerate Longrun table */
+		policy->min = policy->max = longrun_high_freq;
+	} else {
+		policy->min = longrun_low_freq + msr_lo *
+			((longrun_high_freq - longrun_low_freq) / 100);
+		policy->max = longrun_low_freq + msr_hi *
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+	policy->cpu = 0;
+}
+
+
+/**
+ * longrun_set_policy - sets a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Sets a new CPUFreq policy on LongRun-capable processors. This function
+ * has to be called with cpufreq_driver locked.
+ */
+static int longrun_set_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+	u32 pctg_lo, pctg_hi;
+
+	if (!policy)
+		return -EINVAL;
+
+	if ( longrun_high_freq <= longrun_low_freq ) {
+		/* Assume degenerate Longrun table */
+		pctg_lo = pctg_hi = 100;
+	} else {
+		pctg_lo = (policy->min - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+		pctg_hi = (policy->max - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+
+	if (pctg_hi > 100)
+		pctg_hi = 100;
+	if (pctg_lo > pctg_hi)
+		pctg_lo = pctg_hi;
+
+	/* performance or economy mode */
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFFFE;
+	switch (policy->policy) {
+	case CPUFREQ_POLICY_PERFORMANCE:
+		msr_lo |= 0x00000001;
+		break;
+	case CPUFREQ_POLICY_POWERSAVE:
+		break;
+	}
+	wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+
+	/* lower and upper boundary */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFF80;
+	msr_hi &= 0xFFFFFF80;
+	msr_lo |= pctg_lo;
+	msr_hi |= pctg_hi;
+	wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+	return 0;
+}
+
+
+/**
+ * longrun_verify_poliy - verifies a new CPUFreq policy
+ * @policy: the policy to verify
+ *
+ * Validates a new CPUFreq policy. This function has to be called with
+ * cpufreq_driver locked.
+ */
+static int longrun_verify_policy(struct cpufreq_policy *policy)
+{
+	if (!policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy,
+		policy->cpuinfo.min_freq,
+		policy->cpuinfo.max_freq);
+
+	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
+	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int longrun_get(unsigned int cpu)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (cpu)
+		return 0;
+
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	dprintk("cpuid eax is %u\n", eax);
+
+	return (eax * 1000);
+}
+
+/**
+ * longrun_determine_freqs - determines the lowest and highest possible core frequency
+ * @low_freq: an int to put the lowest frequency into
+ * @high_freq: an int to put the highest frequency into
+ *
+ * Determines the lowest and highest possible core frequencies on this CPU.
+ * This is necessary to calculate the performance percentage according to
+ * TMTA rules:
+ * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
+						   unsigned int *high_freq)
+{
+	u32 msr_lo, msr_hi;
+	u32 save_lo, save_hi;
+	u32 eax, ebx, ecx, edx;
+	u32 try_hi;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (!low_freq || !high_freq)
+		return -EINVAL;
+
+	if (cpu_has(c, X86_FEATURE_LRTI)) {
+		/* if the LongRun Table Interface is present, the
+		 * detection is a bit easier:
+		 * For minimum frequency, read out the maximum
+		 * level (msr_hi), write that into "currently
+		 * selected level", and read out the frequency.
+		 * For maximum frequency, read out level zero.
+		 */
+		/* minimum */
+		rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
+		wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*low_freq = msr_lo * 1000; /* to kHz */
+
+		/* maximum */
+		wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*high_freq = msr_lo * 1000; /* to kHz */
+
+		dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
+
+		if (*low_freq > *high_freq)
+			*low_freq = *high_freq;
+		return 0;
+	}
+
+	/* set the upper border to the value determined during TSC init */
+	*high_freq = (cpu_khz / 1000);
+	*high_freq = *high_freq * 1000;
+	dprintk("high frequency is %u kHz\n", *high_freq);
+
+	/* get current borders */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	save_lo = msr_lo & 0x0000007F;
+	save_hi = msr_hi & 0x0000007F;
+
+	/* if current perf_pctg is larger than 90%, we need to decrease the
+	 * upper limit to make the calculation more accurate.
+	 */
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	/* try decreasing in 10% steps, some processors react only
+	 * on some barrier values */
+	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
+		/* set to 0 to try_hi perf_pctg */
+		msr_lo &= 0xFFFFFF80;
+		msr_hi &= 0xFFFFFF80;
+		msr_hi |= try_hi;
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+		/* read out current core MHz and current perf_pctg */
+		cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+
+		/* restore values */
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
+	}
+	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+
+	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+	 * eqals
+	 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+	 *
+	 * high_freq * perf_pctg is stored tempoarily into "ebx".
+	 */
+	ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
+
+	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
+		return -EIO;
+
+	edx = (eax - ebx) / (100 - ecx);
+	*low_freq = edx * 1000; /* back to kHz */
+
+	dprintk("low frequency is %u kHz\n", *low_freq);
+
+	if (*low_freq > *high_freq)
+		*low_freq = *high_freq;
+
+	return 0;
+}
+
+
+static int __init longrun_cpu_init(struct cpufreq_policy *policy)
+{
+	int result = 0;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* detect low and high frequency */
+	result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
+	if (result)
+		return result;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = longrun_low_freq;
+	policy->cpuinfo.max_freq = longrun_high_freq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	longrun_get_policy(policy);
+
+	return 0;
+}
+
+
+static struct cpufreq_driver longrun_driver = {
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.verify		= longrun_verify_policy,
+	.setpolicy	= longrun_set_policy,
+	.get		= longrun_get,
+	.init		= longrun_cpu_init,
+	.name		= "longrun",
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
+ *
+ * Initializes the LongRun support.
+ */
+static int __init longrun_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+
+	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
+	    !cpu_has(c, X86_FEATURE_LONGRUN))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&longrun_driver);
+}
+
+
+/**
+ * longrun_exit - unregisters LongRun support
+ */
+static void __exit longrun_exit(void)
+{
+	cpufreq_unregister_driver(&longrun_driver);
+}
+
+
+MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
+MODULE_LICENSE ("GPL");
+
+module_init(longrun_init);
+module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
new file mode 100644
index 00000000000..8eb414b906d
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -0,0 +1,315 @@
+/*
+ *	Pentium 4/Xeon CPU on demand clock modulation/speed scaling
+ *	(C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Arjan van de Ven <arjanv@redhat.com>
+ *	(C) 2002 Tora T. Engstad
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *
+ *	Date		Errata			Description
+ *	20020525	N44, O17	12.5% or 25% DC causes lockup
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/timex.h>
+
+#include "speedstep-lib.h"
+
+#define PFX	"p4-clockmod: "
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+
+/*
+ * Duty Cycle (3bits), note DC_DISABLE is not specified in
+ * intel docs i just use it to mean disable
+ */
+enum {
+	DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
+	DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
+};
+
+#define DC_ENTRIES	8
+
+
+static int has_N44_O17_errata[NR_CPUS];
+static unsigned int stock_freq;
+static struct cpufreq_driver p4clockmod_driver;
+static unsigned int cpufreq_p4_get(unsigned int cpu);
+
+static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
+{
+	u32 l, h;
+
+	if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+		return -EINVAL;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
+
+	if (l & 0x01)
+		dprintk("CPU#%d currently thermal throttled\n", cpu);
+
+	if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+		newstate = DC_38PT;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+	if (newstate == DC_DISABLE) {
+		dprintk("CPU#%d disabling modulation\n", cpu);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+	} else {
+		dprintk("CPU#%d setting duty cycle to %d%%\n",
+			cpu, ((125 * newstate) / 10));
+		/* bits 63 - 5	: reserved
+		 * bit  4	: enable/disable
+		 * bits 3-1	: duty cycle
+		 * bit  0	: reserved
+		 */
+		l = (l & ~14);
+		l = l | (1<<4) | ((newstate & 0x7)<<1);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
+	}
+
+	return 0;
+}
+
+
+static struct cpufreq_frequency_table p4clockmod_table[] = {
+	{DC_RESV, CPUFREQ_ENTRY_INVALID},
+	{DC_DFLT, 0},
+	{DC_25PT, 0},
+	{DC_38PT, 0},
+	{DC_50PT, 0},
+	{DC_64PT, 0},
+	{DC_75PT, 0},
+	{DC_88PT, 0},
+	{DC_DISABLE, 0},
+	{DC_RESV, CPUFREQ_TABLE_END},
+};
+
+
+static int cpufreq_p4_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int    newstate = DC_RESV;
+	struct cpufreq_freqs freqs;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = cpufreq_p4_get(policy->cpu);
+	freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
+
+	if (freqs.new == freqs.old)
+		return 0;
+
+	/* notifiers */
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+	 * Developer's Manual, Volume 3
+	 */
+	for_each_cpu_mask(i, policy->cpus)
+		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
+
+	/* notifiers */
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+static int cpufreq_p4_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
+}
+
+
+static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
+{
+	if (c->x86 == 0x06) {
+		if (cpu_has(c, X86_FEATURE_EST))
+			printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. "
+			       "The acpi-cpufreq module offers voltage scaling"
+			       " in addition of frequency scaling. You should use "
+			       "that instead of p4-clockmod, if possible.\n");
+		switch (c->x86_model) {
+		case 0x0E: /* Core */
+		case 0x0F: /* Core Duo */
+			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
+		case 0x0D: /* Pentium M (Dothan) */
+			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+			/* fall through */
+		case 0x09: /* Pentium M (Banias) */
+			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+		}
+	}
+
+	if (c->x86 != 0xF) {
+		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n");
+		return 0;
+	}
+
+	/* on P-4s, the TSC runs with constant frequency independent whether
+	 * throttling is active or not. */
+	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
+		       "The speedstep-ich or acpi cpufreq modules offer "
+		       "voltage scaling in addition of frequency scaling. "
+		       "You should use either one instead of p4-clockmod, "
+		       "if possible.\n");
+		return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+	}
+
+	return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+}
+
+
+
+static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	int cpuid = 0;
+	unsigned int i;
+
+#ifdef CONFIG_SMP
+	policy->cpus = cpu_sibling_map[policy->cpu];
+#endif
+
+	/* Errata workaround */
+	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+	switch (cpuid) {
+	case 0x0f07:
+	case 0x0f0a:
+	case 0x0f11:
+	case 0x0f12:
+		has_N44_O17_errata[policy->cpu] = 1;
+		dprintk("has errata -- disabling low frequencies\n");
+	}
+
+	/* get max frequency */
+	stock_freq = cpufreq_p4_get_frequency(c);
+	if (!stock_freq)
+		return -EINVAL;
+
+	/* table init */
+	for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			p4clockmod_table[i].frequency = (stock_freq * i)/8;
+	}
+	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = 1000000; /* assumed */
+	policy->cur = stock_freq;
+
+	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
+}
+
+
+static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int cpufreq_p4_get(unsigned int cpu)
+{
+	u32 l, h;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+
+	if (l & 0x10) {
+		l = l >> 1;
+		l &= 0x7;
+	} else
+		l = DC_DISABLE;
+
+	if (l != DC_DISABLE)
+		return (stock_freq * l / 8);
+
+	return stock_freq;
+}
+
+static struct freq_attr* p4clockmod_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver p4clockmod_driver = {
+	.verify		= cpufreq_p4_verify,
+	.target		= cpufreq_p4_target,
+	.init		= cpufreq_p4_cpu_init,
+	.exit		= cpufreq_p4_cpu_exit,
+	.get		= cpufreq_p4_get,
+	.name		= "p4-clockmod",
+	.owner		= THIS_MODULE,
+	.attr		= p4clockmod_attr,
+};
+
+
+static int __init cpufreq_p4_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	int ret;
+
+	/*
+	 * THERM_CONTROL is architectural for IA32 now, so
+	 * we can rely on the capability checks
+	 */
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
+	if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) ||
+		!test_bit(X86_FEATURE_ACC, c->x86_capability))
+		return -ENODEV;
+
+	ret = cpufreq_register_driver(&p4clockmod_driver);
+	if (!ret)
+		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+
+	return (ret);
+}
+
+
+static void __exit cpufreq_p4_exit(void)
+{
+	cpufreq_unregister_driver(&p4clockmod_driver);
+}
+
+
+MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE ("GPL");
+
+late_initcall(cpufreq_p4_init);
+module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
new file mode 100644
index 00000000000..6d028533931
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -0,0 +1,255 @@
+/*
+ *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
+ *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne P�nk�l�, Dominik Brodowski.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+
+#define POWERNOW_IOPORT 0xfff0         /* it doesn't matter where, as long
+					  as it is unused */
+
+static unsigned int                     busfreq;   /* FSB, in 10 kHz */
+static unsigned int                     max_multiplier;
+
+
+/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
+static struct cpufreq_frequency_table clock_ratio[] = {
+	{45,  /* 000 -> 4.5x */ 0},
+	{50,  /* 001 -> 5.0x */ 0},
+	{40,  /* 010 -> 4.0x */ 0},
+	{55,  /* 011 -> 5.5x */ 0},
+	{20,  /* 100 -> 2.0x */ 0},
+	{30,  /* 101 -> 3.0x */ 0},
+	{60,  /* 110 -> 6.0x */ 0},
+	{35,  /* 111 -> 3.5x */ 0},
+	{0, CPUFREQ_TABLE_END}
+};
+
+
+/**
+ * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
+ *
+ *   Returns the current setting of the frequency multiplier. Core clock
+ * speed is frequency of the Front-Side Bus multiplied with this value.
+ */
+static int powernow_k6_get_cpu_multiplier(void)
+{
+	u64             invalue = 0;
+	u32             msrval;
+
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue=inl(POWERNOW_IOPORT + 0x8);
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	return clock_ratio[(invalue >> 5)&7].index;
+}
+
+
+/**
+ * powernow_k6_set_state - set the PowerNow! multiplier
+ * @best_i: clock_ratio[best_i] is the target multiplier
+ *
+ *   Tries to change the PowerNow! multiplier
+ */
+static void powernow_k6_set_state (unsigned int best_i)
+{
+	unsigned long           outvalue=0, invalue=0;
+	unsigned long           msrval;
+	struct cpufreq_freqs    freqs;
+
+	if (clock_ratio[best_i].index > max_multiplier) {
+		printk(KERN_ERR "cpufreq: invalid target frequency\n");
+		return;
+	}
+
+	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
+	freqs.new = busfreq * clock_ratio[best_i].index;
+	freqs.cpu = 0; /* powernow-k6.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* we now need to transform best_i to the BVC format, see AMD#23446 */
+
+	outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
+
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue=inl(POWERNOW_IOPORT + 0x8);
+	invalue = invalue & 0xf;
+	outvalue = outvalue | invalue;
+	outl(outvalue ,(POWERNOW_IOPORT + 0x8));
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return;
+}
+
+
+/**
+ * powernow_k6_verify - verifies a new CPUfreq policy
+ * @policy: new policy
+ *
+ * Policy must be within lowest and highest possible CPU Frequency,
+ * and at least one possible state must be within min and max.
+ */
+static int powernow_k6_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
+}
+
+
+/**
+ * powernow_k6_setpolicy - sets a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * sets a new CPUFreq policy
+ */
+static int powernow_k6_target (struct cpufreq_policy *policy,
+			       unsigned int target_freq,
+			       unsigned int relation)
+{
+	unsigned int    newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	powernow_k6_set_state(newstate);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* get frequencies */
+	max_multiplier = powernow_k6_get_cpu_multiplier();
+	busfreq = cpu_khz / max_multiplier;
+
+	/* table init */
+	for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if (clock_ratio[i].index > max_multiplier)
+			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = busfreq * max_multiplier;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
+	if (result)
+		return (result);
+
+	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	for (i=0; i<8; i++) {
+		if (i==max_multiplier)
+			powernow_k6_set_state(i);
+	}
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int powernow_k6_get(unsigned int cpu)
+{
+	return busfreq * powernow_k6_get_cpu_multiplier();
+}
+
+static struct freq_attr* powernow_k6_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_k6_driver = {
+	.verify		= powernow_k6_verify,
+	.target		= powernow_k6_target,
+	.init		= powernow_k6_cpu_init,
+	.exit		= powernow_k6_cpu_exit,
+	.get		= powernow_k6_get,
+	.name		= "powernow-k6",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_k6_attr,
+};
+
+
+/**
+ * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
+ *
+ *   Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
+ * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
+ * on success.
+ */
+static int __init powernow_k6_init(void)
+{
+	struct cpuinfo_x86      *c = cpu_data;
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
+		((c->x86_model != 12) && (c->x86_model != 13)))
+		return -ENODEV;
+
+	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
+		printk("cpufreq: PowerNow IOPORT region already used.\n");
+		return -EIO;
+	}
+
+	if (cpufreq_register_driver(&powernow_k6_driver)) {
+		release_region (POWERNOW_IOPORT, 16);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/**
+ * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
+ *
+ *   Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
+ */
+static void __exit powernow_k6_exit(void)
+{
+	cpufreq_unregister_driver(&powernow_k6_driver);
+	release_region (POWERNOW_IOPORT, 16);
+}
+
+
+MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE ("GPL");
+
+module_init(powernow_k6_init);
+module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
new file mode 100644
index 00000000000..7decd6a50ff
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -0,0 +1,701 @@
+/*
+ *  AMD K7 Powernow driver.
+ *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003-2004 Dave Jones <davej@redhat.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
+ * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
+ * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dmi.h>
+
+#include <asm/msr.h>
+#include <asm/timer.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#include "powernow-k7.h"
+
+#define PFX "powernow: "
+
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags;
+	u16 settlingtime;
+	u8 reserved1;
+	u8 numpst;
+};
+
+struct pst_s {
+	u32 cpuid;
+	u8 fsbspeed;
+	u8 maxfid;
+	u8 startvid;
+	u8 numpstates;
+};
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+union powernow_acpi_control_t {
+	struct {
+		unsigned long fid:5,
+		vid:5,
+		sgtc:20,
+		res1:2;
+	} bits;
+	unsigned long val;
+};
+#endif
+
+#ifdef CONFIG_CPU_FREQ_DEBUG
+/* divide by 1000 to get VCore voltage in V. */
+static const int mobile_vid_table[32] = {
+    2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+    1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
+    1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+    1075, 1050, 1025, 1000, 975, 950, 925, 0,
+};
+#endif
+
+/* divide by 10 to get FID. */
+static const int fid_codes[32] = {
+    110, 115, 120, 125, 50, 55, 60, 65,
+    70, 75, 80, 85, 90, 95, 100, 105,
+    30, 190, 40, 200, 130, 135, 140, 210,
+    150, 225, 160, 165, 170, 180, -1, -1,
+};
+
+/* This parameter is used in order to force ACPI instead of legacy method for
+ * configuration purpose.
+ */
+
+static int acpi_force;
+
+static struct cpufreq_frequency_table *powernow_table;
+
+static unsigned int can_scale_bus;
+static unsigned int can_scale_vid;
+static unsigned int minimum_speed=-1;
+static unsigned int maximum_speed;
+static unsigned int number_scales;
+static unsigned int fsb;
+static unsigned int latency;
+static char have_a0;
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
+
+static int check_fsb(unsigned int fsbspeed)
+{
+	int delta;
+	unsigned int f = fsb / 1000;
+
+	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
+	return (delta < 5);
+}
+
+static int check_powernow(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	unsigned int maxei, eax, ebx, ecx, edx;
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
+#ifdef MODULE
+		printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
+#endif
+		return 0;
+	}
+
+	/* Get maximum capabilities */
+	maxei = cpuid_eax (0x80000000);
+	if (maxei < 0x80000007) {	/* Any powernow info ? */
+#ifdef MODULE
+		printk (KERN_INFO PFX "No powernow capabilities detected\n");
+#endif
+		return 0;
+	}
+
+	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+		printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
+		have_a0 = 1;
+	}
+
+	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+	/* Check we can actually do something before we say anything.*/
+	if (!(edx & (1 << 1 | 1 << 2)))
+		return 0;
+
+	printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+
+	if (edx & 1 << 1) {
+		printk ("frequency");
+		can_scale_bus=1;
+	}
+
+	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
+		printk (" and ");
+
+	if (edx & 1 << 2) {
+		printk ("voltage");
+		can_scale_vid=1;
+	}
+
+	printk (".\n");
+	return 1;
+}
+
+
+static int get_ranges (unsigned char *pst)
+{
+	unsigned int j;
+	unsigned int speed;
+	u8 fid, vid;
+
+	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
+	if (!powernow_table)
+		return -ENOMEM;
+
+	for (j=0 ; j < number_scales; j++) {
+		fid = *pst++;
+
+		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
+		powernow_table[j].index = fid; /* lower 8 bits */
+
+		speed = powernow_table[j].frequency;
+
+		if ((fid_codes[fid] % 10)==5) {
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+			if (have_a0 == 1)
+				powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
+#endif
+		}
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+
+		vid = *pst++;
+		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
+
+		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+			 fid_codes[fid] % 10, speed/1000, vid,
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+	}
+	powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
+	powernow_table[number_scales].index = 0;
+
+	return 0;
+}
+
+
+static void change_FID(int fid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.FID != fid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.FID = fid;
+		fidvidctl.bits.VIDC = 0;
+		fidvidctl.bits.FIDC = 1;
+		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_VID(int vid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.VID != vid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.VID = vid;
+		fidvidctl.bits.FIDC = 0;
+		fidvidctl.bits.VIDC = 1;
+		wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_speed (unsigned int index)
+{
+	u8 fid, vid;
+	struct cpufreq_freqs freqs;
+	union msr_fidvidstatus fidvidstatus;
+	int cfid;
+
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in powernow_decode_bios,
+	 * vid are the upper 8 bits.
+	 */
+
+	fid = powernow_table[index].index & 0xFF;
+	vid = (powernow_table[index].index & 0xFF00) >> 8;
+
+	freqs.cpu = 0;
+
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+	freqs.old = fsb * fid_codes[cfid] / 10;
+
+	freqs.new = powernow_table[index].frequency;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Now do the magic poking into the MSRs.  */
+
+	if (have_a0 == 1)	/* A0 errata 5 */
+		local_irq_disable();
+
+	if (freqs.old > freqs.new) {
+		/* Going down, so change FID first */
+		change_FID(fid);
+		change_VID(vid);
+	} else {
+		/* Going up, so change VID first */
+		change_VID(vid);
+		change_FID(fid);
+	}
+
+
+	if (have_a0 == 1)
+		local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+
+static struct acpi_processor_performance *acpi_processor_perf;
+
+static int powernow_acpi_init(void)
+{
+	int i;
+	int retval = 0;
+	union powernow_acpi_control_t pc;
+
+	if (acpi_processor_perf != NULL && powernow_table != NULL) {
+		retval = -EINVAL;
+		goto err0;
+	}
+
+	acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
+				      GFP_KERNEL);
+	if (!acpi_processor_perf) {
+		retval = -ENOMEM;
+		goto err0;
+	}
+
+	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
+		retval = -EIO;
+		goto err1;
+	}
+
+	if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	number_scales = acpi_processor_perf->state_count;
+
+	if (number_scales < 2) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
+	if (!powernow_table) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	pc.val = (unsigned long) acpi_processor_perf->states[0].control;
+	for (i = 0; i < number_scales; i++) {
+		u8 fid, vid;
+		struct acpi_processor_px *state =
+			&acpi_processor_perf->states[i];
+		unsigned int speed, speed_mhz;
+
+		pc.val = (unsigned long) state->control;
+		dprintk ("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+			 i,
+			 (u32) state->core_frequency,
+			 (u32) state->power,
+			 (u32) state->transition_latency,
+			 (u32) state->control,
+			 pc.bits.sgtc);
+
+		vid = pc.bits.vid;
+		fid = pc.bits.fid;
+
+		powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
+		powernow_table[i].index = fid; /* lower 8 bits */
+		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+
+		speed = powernow_table[i].frequency;
+		speed_mhz = speed / 1000;
+
+		/* processor_perflib will multiply the MHz value by 1000 to
+		 * get a KHz value (e.g. 1266000). However, powernow-k7 works
+		 * with true KHz values (e.g. 1266768). To ensure that all
+		 * powernow frequencies are available, we must ensure that
+		 * ACPI doesn't restrict them, so we round up the MHz value
+		 * to ensure that perflib's computed KHz value is greater than
+		 * or equal to powernow's KHz value.
+		 */
+		if (speed % 1000 > 0)
+			speed_mhz++;
+
+		if ((fid_codes[fid] % 10)==5) {
+			if (have_a0 == 1)
+				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		}
+
+		dprintk ("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+			 fid_codes[fid] % 10, speed_mhz, vid,
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+
+		if (state->core_frequency != speed_mhz) {
+			state->core_frequency = speed_mhz;
+			dprintk("   Corrected ACPI frequency to %d\n",
+				speed_mhz);
+		}
+
+		if (latency < pc.bits.sgtc)
+			latency = pc.bits.sgtc;
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+	}
+
+	powernow_table[i].frequency = CPUFREQ_TABLE_END;
+	powernow_table[i].index = 0;
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+err2:
+	acpi_processor_unregister_performance(acpi_processor_perf, 0);
+err1:
+	kfree(acpi_processor_perf);
+err0:
+	printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
+	acpi_processor_perf = NULL;
+	return retval;
+}
+#else
+static int powernow_acpi_init(void)
+{
+	printk(KERN_INFO PFX "no support for ACPI processor found."
+	       "  Please recompile your kernel with ACPI processor\n");
+	return -EINVAL;
+}
+#endif
+
+static int powernow_decode_bios (int maxfid, int startvid)
+{
+	struct psb_s *psb;
+	struct pst_s *pst;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int etuple;
+	unsigned int ret;
+
+	etuple = cpuid_eax(0x80000001);
+
+	for (i=0xC0000; i < 0xffff0 ; i+=16) {
+
+		p = phys_to_virt(i);
+
+		if (memcmp(p, "AMDK7PNOW!",  10) == 0){
+			dprintk ("Found PSB header at %p\n", p);
+			psb = (struct psb_s *) p;
+			dprintk ("Table version: 0x%x\n", psb->tableversion);
+			if (psb->tableversion != 0x12) {
+				printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
+				return -ENODEV;
+			}
+
+			dprintk ("Flags: 0x%x\n", psb->flags);
+			if ((psb->flags & 1)==0) {
+				dprintk ("Mobile voltage regulator\n");
+			} else {
+				dprintk ("Desktop voltage regulator\n");
+			}
+
+			latency = psb->settlingtime;
+			if (latency < 100) {
+				printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
+						"Should be at least 100. Correcting.\n", latency);
+				latency = 100;
+			}
+			dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
+			dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
+
+			p += sizeof (struct psb_s);
+
+			pst = (struct pst_s *) p;
+
+			for (j=0; j<psb->numpst; j++) {
+				pst = (struct pst_s *) p;
+				number_scales = pst->numpstates;
+
+				if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
+				    (maxfid==pst->maxfid) && (startvid==pst->startvid))
+				{
+					dprintk ("PST:%d (@%p)\n", j, pst);
+					dprintk (" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+						 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+
+					ret = get_ranges ((char *) pst + sizeof (struct pst_s));
+					return ret;
+				} else {
+					unsigned int k;
+					p = (char *) pst + sizeof (struct pst_s);
+					for (k=0; k<number_scales; k++)
+						p+=2;
+				}
+			}
+			printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
+			printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
+
+			return -EINVAL;
+		}
+		p++;
+	}
+
+	return -ENODEV;
+}
+
+
+static int powernow_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate;
+
+	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
+		return -EINVAL;
+
+	change_speed(newstate);
+
+	return 0;
+}
+
+
+static int powernow_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, powernow_table);
+}
+
+/*
+ * We use the fact that the bus frequency is somehow
+ * a multiple of 100000/3 khz, then we compute sgtc according
+ * to this multiple.
+ * That way, we match more how AMD thinks all of that work.
+ * We will then get the same kind of behaviour already tested under
+ * the "well-known" other OS.
+ */
+static int __init fixup_sgtc(void)
+{
+	unsigned int sgtc;
+	unsigned int m;
+
+	m = fsb / 3333;
+	if ((m % 10) >= 5)
+		m += 5;
+
+	m /= 10;
+
+	sgtc = 100 * m * latency;
+	sgtc = sgtc / 3;
+	if (sgtc > 0xfffff) {
+		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+		sgtc = 0xfffff;
+	}
+	return sgtc;
+}
+
+static unsigned int powernow_get(unsigned int cpu)
+{
+	union msr_fidvidstatus fidvidstatus;
+	unsigned int cfid;
+
+	if (cpu)
+		return 0;
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+
+	return (fsb * fid_codes[cfid] / 10);
+}
+
+
+static int __init acer_cpufreq_pst(struct dmi_system_id *d)
+{
+	printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
+	printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+	printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
+	return 0;
+}
+
+/*
+ * Some Athlon laptops have really fucked PST tables.
+ * A BIOS update is all that can save them.
+ * Mention this, and disable cpufreq.
+ */
+static struct dmi_system_id __initdata powernow_dmi_table[] = {
+	{
+		.callback = acer_cpufreq_pst,
+		.ident = "Acer Aspire",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
+			DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
+		},
+	},
+	{ }
+};
+
+static int __init powernow_cpu_init (struct cpufreq_policy *policy)
+{
+	union msr_fidvidstatus fidvidstatus;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+
+	recalibrate_cpu_khz();
+
+	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
+	if (!fsb) {
+		printk(KERN_WARNING PFX "can not determine bus frequency\n");
+		return -EINVAL;
+	}
+	dprintk("FSB: %3dMHz\n", fsb/1000);
+
+	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
+		printk (KERN_INFO PFX "PSB/PST known to be broken.  Trying ACPI instead\n");
+		result = powernow_acpi_init();
+	} else {
+		result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
+		if (result) {
+			printk (KERN_INFO PFX "Trying ACPI perflib\n");
+			maximum_speed = 0;
+			minimum_speed = -1;
+			latency = 0;
+			result = powernow_acpi_init();
+			if (result) {
+				printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
+				printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n");
+			}
+		} else {
+			/* SGTC use the bus clock as timer */
+			latency = fixup_sgtc();
+			printk(KERN_INFO PFX "SGTC: %d\n", latency);
+		}
+	}
+
+	if (result)
+		return result;
+
+	printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+				minimum_speed/1000, maximum_speed/1000);
+
+	policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
+
+	policy->cur = powernow_get(0);
+
+	cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
+
+	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
+}
+
+static int powernow_cpu_exit (struct cpufreq_policy *policy) {
+	cpufreq_frequency_table_put_attr(policy->cpu);
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+	if (acpi_processor_perf) {
+		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		kfree(acpi_processor_perf);
+	}
+#endif
+
+	kfree(powernow_table);
+	return 0;
+}
+
+static struct freq_attr* powernow_table_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_driver = {
+	.verify	= powernow_verify,
+	.target	= powernow_target,
+	.get	= powernow_get,
+	.init	= powernow_cpu_init,
+	.exit	= powernow_cpu_exit,
+	.name	= "powernow-k7",
+	.owner	= THIS_MODULE,
+	.attr	= powernow_table_attr,
+};
+
+static int __init powernow_init (void)
+{
+	if (check_powernow()==0)
+		return -ENODEV;
+	return cpufreq_register_driver(&powernow_driver);
+}
+
+
+static void __exit powernow_exit (void)
+{
+	cpufreq_unregister_driver(&powernow_driver);
+}
+
+module_param(acpi_force,  int, 0444);
+MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(powernow_init);
+module_exit(powernow_exit);
+
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
new file mode 100644
index 00000000000..f8a63b3664e
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
@@ -0,0 +1,44 @@
+/*
+ *  $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  AMD-specific information
+ *
+ */
+
+union msr_fidvidctl {
+	struct {
+		unsigned FID:5,			// 4:0
+		reserved1:3,	// 7:5
+		VID:5,			// 12:8
+		reserved2:3,	// 15:13
+		FIDC:1,			// 16
+		VIDC:1,			// 17
+		reserved3:2,	// 19:18
+		FIDCHGRATIO:1,	// 20
+		reserved4:11,	// 31-21
+		SGTC:20,		// 32:51
+		reserved5:12;	// 63:52
+	} bits;
+	unsigned long long val;
+};
+
+union msr_fidvidstatus {
+	struct {
+		unsigned CFID:5,			// 4:0
+		reserved1:3,	// 7:5
+		SFID:5,			// 12:8
+		reserved2:3,	// 15:13
+		MFID:5,			// 20:16
+		reserved3:11,	// 31:21
+		CVID:5,			// 36:32
+		reserved4:3,	// 39:37
+		SVID:5,			// 44:40
+		reserved5:3,	// 47:45
+		MVID:5,			// 52:48
+		reserved6:11;	// 63:53
+	} bits;
+	unsigned long long val;
+};
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
new file mode 100644
index 00000000000..b273b69cfdd
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -0,0 +1,1360 @@
+/*
+ *   (c) 2003-2006 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ *
+ *  Support : mark.langsdorf@amd.com
+ *
+ *  Based on the powernow-k7.c module written by Dave Jones.
+ *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2004 Dominik Brodowski <linux@brodo.de>
+ *  (C) 2004 Pavel Machek <pavel@suse.cz>
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ *  Valuable input gratefully received from Dave Jones, Pavel Machek,
+ *  Dominik Brodowski, Jacob Shin, and others.
+ *  Originally developed by Paul Devriendt.
+ *  Processor information obtained from Chapter 9 (Power and Thermal Management)
+ *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+ *  Opteron Processors" available for download from www.amd.com
+ *
+ *  Tables for specific CPUs can be inferred from
+ *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/cpumask.h>
+#include <linux/sched.h>	/* for current / set_cpus_allowed() */
+
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+#include <linux/acpi.h>
+#include <linux/mutex.h>
+#include <acpi/processor.h>
+#endif
+
+#define PFX "powernow-k8: "
+#define BFX PFX "BIOS error: "
+#define VERSION "version 2.00.00"
+#include "powernow-k8.h"
+
+/* serialize freq changes  */
+static DEFINE_MUTEX(fidvid_mutex);
+
+static struct powernow_k8_data *powernow_data[NR_CPUS];
+
+static int cpu_family = CPU_OPTERON;
+
+#ifndef CONFIG_SMP
+static cpumask_t cpu_core_map[1];
+#endif
+
+/* Return a frequency in MHz, given an input fid */
+static u32 find_freq_from_fid(u32 fid)
+{
+	return 800 + (fid * 100);
+}
+
+
+/* Return a frequency in KHz, given an input fid */
+static u32 find_khz_freq_from_fid(u32 fid)
+{
+	return 1000 * find_freq_from_fid(fid);
+}
+
+/* Return a frequency in MHz, given an input fid and did */
+static u32 find_freq_from_fiddid(u32 fid, u32 did)
+{
+	if (current_cpu_data.x86 == 0x10)
+		return 100 * (fid + 0x10) >> did;
+	else
+		return 100 * (fid + 0x8) >> did;
+}
+
+static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
+{
+	return 1000 * find_freq_from_fiddid(fid, did);
+}
+
+static u32 find_fid_from_pstate(u32 pstate)
+{
+	u32 hi, lo;
+	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+	return lo & HW_PSTATE_FID_MASK;
+}
+
+static u32 find_did_from_pstate(u32 pstate)
+{
+	u32 hi, lo;
+	rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+	return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+}
+
+/* Return the vco fid for an input fid
+ *
+ * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
+ * only from corresponding high fids. This returns "high" fid corresponding to
+ * "low" one.
+ */
+static u32 convert_fid_to_vco_fid(u32 fid)
+{
+	if (fid < HI_FID_TABLE_BOTTOM)
+		return 8 + (2 * fid);
+	else
+		return fid;
+}
+
+/*
+ * Return 1 if the pending bit is set. Unless we just instructed the processor
+ * to transition to a new state, seeing this bit set is really bad news.
+ */
+static int pending_bit_stuck(void)
+{
+	u32 lo, hi;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		return 0;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
+}
+
+/*
+ * Update the global current fid / vid values from the status msr.
+ * Returns 1 on error.
+ */
+static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
+{
+	u32 lo, hi;
+	u32 i = 0;
+
+	if (cpu_family == CPU_HW_PSTATE) {
+		rdmsr(MSR_PSTATE_STATUS, lo, hi);
+		i = lo & HW_PSTATE_MASK;
+		rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
+		data->currfid = lo & HW_PSTATE_FID_MASK;
+		data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+		return 0;
+	}
+	do {
+		if (i++ > 10000) {
+			dprintk("detected change pending stuck\n");
+			return 1;
+		}
+		rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	} while (lo & MSR_S_LO_CHANGE_PENDING);
+
+	data->currvid = hi & MSR_S_HI_CURRENT_VID;
+	data->currfid = lo & MSR_S_LO_CURRENT_FID;
+
+	return 0;
+}
+
+/* the isochronous relief time */
+static void count_off_irt(struct powernow_k8_data *data)
+{
+	udelay((1 << data->irt) * 10);
+	return;
+}
+
+/* the voltage stabalization time */
+static void count_off_vst(struct powernow_k8_data *data)
+{
+	udelay(data->vstable * VST_UNITS_20US);
+	return;
+}
+
+/* need to init the control msr to a safe value (for each cpu) */
+static void fidvid_msr_init(void)
+{
+	u32 lo, hi;
+	u8 fid, vid;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	vid = hi & MSR_S_HI_CURRENT_VID;
+	fid = lo & MSR_S_LO_CURRENT_FID;
+	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
+	hi = MSR_C_HI_STP_GNT_BENIGN;
+	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	wrmsr(MSR_FIDVID_CTL, lo, hi);
+}
+
+
+/* write the new fid value along with the other control fields to the msr */
+static int write_new_fid(struct powernow_k8_data *data, u32 fid)
+{
+	u32 lo;
+	u32 savevid = data->currvid;
+	u32 i = 0;
+
+	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
+		return 1;
+	}
+
+	lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+
+	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
+
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
+			return 1;
+		}
+	} while (query_current_values_with_pending_wait(data));
+
+	count_off_irt(data);
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
+		       savevid, data->currvid);
+		return 1;
+	}
+
+	if (fid != data->currfid) {
+		printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+		        data->currfid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Write a new vid to the hardware */
+static int write_new_vid(struct powernow_k8_data *data, u32 vid)
+{
+	u32 lo;
+	u32 savefid = data->currfid;
+	int i = 0;
+
+	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
+		return 1;
+	}
+
+	lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+
+	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+		vid, lo, STOP_GRANT_5NS);
+
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+			return 1;
+		}
+	} while (query_current_values_with_pending_wait(data));
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+		       savefid, data->currfid);
+		return 1;
+	}
+
+	if (vid != data->currvid) {
+		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
+				data->currvid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reduce the vid by the max of step or reqvid.
+ * Decreasing vid codes represent increasing voltages:
+ * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
+ */
+static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+{
+	if ((data->currvid - reqvid) > step)
+		reqvid = data->currvid - step;
+
+	if (write_new_vid(data, reqvid))
+		return 1;
+
+	count_off_vst(data);
+
+	return 0;
+}
+
+/* Change hardware pstate by single MSR write */
+static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
+{
+	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+	data->currfid = find_fid_from_pstate(pstate);
+	return 0;
+}
+
+/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
+static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+{
+	if (core_voltage_pre_transition(data, reqvid))
+		return 1;
+
+	if (core_frequency_transition(data, reqfid))
+		return 1;
+
+	if (core_voltage_post_transition(data, reqvid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
+		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+				smp_processor_id(),
+				reqfid, reqvid, data->currfid, data->currvid);
+		return 1;
+	}
+
+	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 1 - core voltage transition ... setup voltage */
+static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+{
+	u32 rvosteps = data->rvo;
+	u32 savefid = data->currfid;
+	u32 maxvid, lo;
+
+	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqvid, data->rvo);
+
+	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+	maxvid = 0x1f & (maxvid >> 16);
+	dprintk("ph1 maxvid=0x%x\n", maxvid);
+	if (reqvid < maxvid) /* lower numbers are higher voltages */
+		reqvid = maxvid;
+
+	while (data->currvid > reqvid) {
+		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+			data->currvid, reqvid);
+		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
+			return 1;
+	}
+
+	while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) {
+		if (data->currvid == maxvid) {
+			rvosteps = 0;
+		} else {
+			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+				data->currvid - 1);
+			if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+				return 1;
+			rvosteps--;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+		return 1;
+	}
+
+	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 2 - core frequency transition */
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
+{
+	u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
+
+	if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
+			reqfid, data->currfid);
+		return 1;
+	}
+
+	if (data->currfid == reqfid) {
+		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+		return 0;
+	}
+
+	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqfid);
+
+	vcoreqfid = convert_fid_to_vco_fid(reqfid);
+	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+	    : vcoreqfid - vcocurrfid;
+
+	while (vcofiddiff > 2) {
+		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
+
+		if (reqfid > data->currfid) {
+			if (data->currfid > LO_FID_TABLE_TOP) {
+				if (write_new_fid(data, data->currfid + fid_interval)) {
+					return 1;
+				}
+			} else {
+				if (write_new_fid
+				    (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+					return 1;
+				}
+			}
+		} else {
+			if (write_new_fid(data, data->currfid - fid_interval))
+				return 1;
+		}
+
+		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+		    : vcoreqfid - vcocurrfid;
+	}
+
+	if (write_new_fid(data, reqfid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (data->currfid != reqfid) {
+		printk(KERN_ERR PFX
+			"ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+			data->currfid, reqfid);
+		return 1;
+	}
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
+			savevid, data->currvid);
+		return 1;
+	}
+
+	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 3 - core voltage transition flow ... jump to the final vid. */
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+{
+	u32 savefid = data->currfid;
+	u32 savereqvid = reqvid;
+
+	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid);
+
+	if (reqvid != data->currvid) {
+		if (write_new_vid(data, reqvid))
+			return 1;
+
+		if (savefid != data->currfid) {
+			printk(KERN_ERR PFX
+			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
+			       savefid, data->currfid);
+			return 1;
+		}
+
+		if (data->currvid != reqvid) {
+			printk(KERN_ERR PFX
+			       "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+			       reqvid, data->currvid);
+			return 1;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savereqvid != data->currvid) {
+		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		return 1;
+	}
+
+	if (savefid != data->currfid) {
+		dprintk("ph3 failed, currfid changed 0x%x\n",
+			data->currfid);
+		return 1;
+	}
+
+	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+static int check_supported_cpu(unsigned int cpu)
+{
+	cpumask_t oldmask = CPU_MASK_ALL;
+	u32 eax, ebx, ecx, edx;
+	unsigned int rc = 0;
+
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+	if (smp_processor_id() != cpu) {
+		printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
+		goto out;
+	}
+
+	if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		goto out;
+
+	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
+	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
+		goto out;
+
+	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
+		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
+			printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+			goto out;
+		}
+
+		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+			printk(KERN_INFO PFX
+			       "No frequency change capabilities detected\n");
+			goto out;
+		}
+
+		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
+			printk(KERN_INFO PFX "Power state transitions not supported\n");
+			goto out;
+		}
+	} else { /* must be a HW Pstate capable processor */
+		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
+			cpu_family = CPU_HW_PSTATE;
+		else
+			goto out;
+	}
+
+	rc = 1;
+
+out:
+	set_cpus_allowed(current, oldmask);
+	return rc;
+}
+
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+{
+	unsigned int j;
+	u8 lastfid = 0xff;
+
+	for (j = 0; j < data->numps; j++) {
+		if (pst[j].vid > LEAST_VID) {
+			printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid);
+			return -EINVAL;
+		}
+		if (pst[j].vid < data->rvo) {	/* vid + rvo >= 0 */
+			printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j);
+			return -ENODEV;
+		}
+		if (pst[j].vid < maxvid + data->rvo) {	/* vid + rvo >= maxvid */
+			printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j);
+			return -ENODEV;
+		}
+		if (pst[j].fid > MAX_FID) {
+			printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j);
+			return -ENODEV;
+		}
+		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
+			/* Only first fid is allowed to be in "low" range */
+			printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid);
+			return -EINVAL;
+		}
+		if (pst[j].fid < lastfid)
+			lastfid = pst[j].fid;
+	}
+	if (lastfid & 1) {
+		printk(KERN_ERR BFX "lastfid invalid\n");
+		return -EINVAL;
+	}
+	if (lastfid > LO_FID_TABLE_TOP)
+		printk(KERN_INFO BFX  "first fid not from lo freq table\n");
+
+	return 0;
+}
+
+static void print_basics(struct powernow_k8_data *data)
+{
+	int j;
+	for (j = 0; j < data->numps; j++) {
+		if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+			if (cpu_family == CPU_HW_PSTATE) {
+				printk(KERN_INFO PFX "   %d : fid 0x%x did 0x%x (%d MHz)\n",
+					j,
+					(data->powernow_table[j].index & 0xff00) >> 8,
+					(data->powernow_table[j].index & 0xff0000) >> 16,
+					data->powernow_table[j].frequency/1000);
+			} else {
+				printk(KERN_INFO PFX "   %d : fid 0x%x (%d MHz), vid 0x%x\n",
+					j,
+					data->powernow_table[j].index & 0xff,
+					data->powernow_table[j].frequency/1000,
+					data->powernow_table[j].index >> 8);
+			}
+		}
+	}
+	if (data->batps)
+		printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+}
+
+static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+{
+	struct cpufreq_frequency_table *powernow_table;
+	unsigned int j;
+
+	if (data->batps) {    /* use ACPI support to get full speed on mains power */
+		printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+		data->numps = data->batps;
+	}
+
+	for ( j=1; j<data->numps; j++ ) {
+		if (pst[j-1].fid >= pst[j].fid) {
+			printk(KERN_ERR PFX "PST out of sequence\n");
+			return -EINVAL;
+		}
+	}
+
+	if (data->numps < 2) {
+		printk(KERN_ERR PFX "no p states to transition\n");
+		return -ENODEV;
+	}
+
+	if (check_pst_table(data, pst, maxvid))
+		return -EINVAL;
+
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->numps + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	for (j = 0; j < data->numps; j++) {
+		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
+		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
+		powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+	}
+	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->numps].index = 0;
+
+	if (query_current_values_with_pending_wait(data)) {
+		kfree(powernow_table);
+		return -EIO;
+	}
+
+	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	data->powernow_table = powernow_table;
+	if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+		print_basics(data);
+
+	for (j = 0; j < data->numps; j++)
+		if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+			return 0;
+
+	dprintk("currfid/vid do not match PST, ignoring\n");
+	return 0;
+}
+
+/* Find and validate the PSB/PST table in BIOS. */
+static int find_psb_table(struct powernow_k8_data *data)
+{
+	struct psb_s *psb;
+	unsigned int i;
+	u32 mvs;
+	u8 maxvid;
+	u32 cpst = 0;
+	u32 thiscpuid;
+
+	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
+		/* Scan BIOS looking for the signature. */
+		/* It can not be at ffff0 - it is too big. */
+
+		psb = phys_to_virt(i);
+		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
+			continue;
+
+		dprintk("found PSB header at 0x%p\n", psb);
+
+		dprintk("table vers: 0x%x\n", psb->tableversion);
+		if (psb->tableversion != PSB_VERSION_1_4) {
+			printk(KERN_ERR BFX "PSB table is not v1.4\n");
+			return -ENODEV;
+		}
+
+		dprintk("flags: 0x%x\n", psb->flags1);
+		if (psb->flags1) {
+			printk(KERN_ERR BFX "unknown flags\n");
+			return -ENODEV;
+		}
+
+		data->vstable = psb->vstable;
+		dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+
+		dprintk("flags2: 0x%x\n", psb->flags2);
+		data->rvo = psb->flags2 & 3;
+		data->irt = ((psb->flags2) >> 2) & 3;
+		mvs = ((psb->flags2) >> 4) & 3;
+		data->vidmvs = 1 << mvs;
+		data->batps = ((psb->flags2) >> 6) & 3;
+
+		dprintk("ramp voltage offset: %d\n", data->rvo);
+		dprintk("isochronous relief time: %d\n", data->irt);
+		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+
+		dprintk("numpst: 0x%x\n", psb->num_tables);
+		cpst = psb->num_tables;
+		if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+			if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+				cpst = 1;
+			}
+		}
+		if (cpst != 1) {
+			printk(KERN_ERR BFX "numpst must be 1\n");
+			return -ENODEV;
+		}
+
+		data->plllock = psb->plllocktime;
+		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		dprintk("maxfid: 0x%x\n", psb->maxfid);
+		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		maxvid = psb->maxvid;
+
+		data->numps = psb->numps;
+		dprintk("numpstates: 0x%x\n", data->numps);
+		return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+	}
+	/*
+	 * If you see this message, complain to BIOS manufacturer. If
+	 * he tells you "we do not support Linux" or some similar
+	 * nonsense, remember that Windows 2000 uses the same legacy
+	 * mechanism that the old Linux PSB driver uses. Tell them it
+	 * is broken with Windows 2000.
+	 *
+	 * The reference to the AMD documentation is chapter 9 in the
+	 * BIOS and Kernel Developer's Guide, which is available on
+	 * www.amd.com
+	 */
+	printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+	return -ENODEV;
+}
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+{
+	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+		return;
+
+	data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
+	data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
+	data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+	data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+	data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
+	data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
+}
+
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
+{
+	struct cpufreq_frequency_table *powernow_table;
+	int ret_val;
+
+	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+		dprintk("register performance failed: bad ACPI data\n");
+		return -EIO;
+	}
+
+	/* verify the data contained in the ACPI structures */
+	if (data->acpi_data.state_count <= 1) {
+		dprintk("No ACPI P-States\n");
+		goto err_out;
+	}
+
+	if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+		(data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		dprintk("Invalid control/status registers (%x - %x)\n",
+			data->acpi_data.control_register.space_id,
+			data->acpi_data.status_register.space_id);
+		goto err_out;
+	}
+
+	/* fill in data->powernow_table */
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		dprintk("powernow_table memory alloc failure\n");
+		goto err_out;
+	}
+
+	if (cpu_family == CPU_HW_PSTATE)
+		ret_val = fill_powernow_table_pstate(data, powernow_table);
+	else
+		ret_val = fill_powernow_table_fidvid(data, powernow_table);
+	if (ret_val)
+		goto err_out_mem;
+
+	powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->acpi_data.state_count].index = 0;
+	data->powernow_table = powernow_table;
+
+	/* fill in data */
+	data->numps = data->acpi_data.state_count;
+	if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+		print_basics(data);
+	powernow_k8_acpi_pst_values(data, 0);
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+err_out_mem:
+	kfree(powernow_table);
+
+err_out:
+	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+	/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+	data->acpi_data.state_count = 0;
+
+	return -ENODEV;
+}
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+	int i;
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		u32 index;
+		u32 hi = 0, lo = 0;
+		u32 fid;
+		u32 did;
+
+		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+		if (index > MAX_HW_PSTATE) {
+			printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
+			printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
+		}
+		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+		if (!(hi & HW_PSTATE_VALID_MASK)) {
+			dprintk("invalid pstate %d, ignoring\n", index);
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+
+		fid = lo & HW_PSTATE_FID_MASK;
+		did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+
+		dprintk("   %d : fid 0x%x, did 0x%x\n", index, fid, did);
+
+		powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT);
+
+		powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did);
+
+		if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+			printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
+				powernow_table[i].frequency,
+				(unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+	}
+	return 0;
+}
+
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+	int i;
+	int cntlofreq = 0;
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		u32 fid;
+		u32 vid;
+
+		if (data->exttype) {
+			fid = data->acpi_data.states[i].status & EXT_FID_MASK;
+			vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+		} else {
+			fid = data->acpi_data.states[i].control & FID_MASK;
+			vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+		}
+
+		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+
+		powernow_table[i].index = fid; /* lower 8 bits */
+		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+		powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+
+		/* verify frequency is OK */
+		if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
+			(powernow_table[i].frequency < (MIN_FREQ * 1000))) {
+			dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+
+		/* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+		if (vid == VID_OFF) {
+			dprintk("invalid vid %u, ignoring\n", vid);
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+
+		/* verify only 1 entry from the lo frequency table */
+		if (fid < HI_FID_TABLE_BOTTOM) {
+			if (cntlofreq) {
+				/* if both entries are the same, ignore this one ... */
+				if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
+				    (powernow_table[i].index != powernow_table[cntlofreq].index)) {
+					printk(KERN_ERR PFX "Too many lo freq table entries\n");
+					return 1;
+				}
+
+				dprintk("double low frequency table entry, ignoring it.\n");
+				powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+				continue;
+			} else
+				cntlofreq = i;
+		}
+
+		if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+			printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
+				powernow_table[i].frequency,
+				(unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+			powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+			continue;
+		}
+	}
+	return 0;
+}
+
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
+{
+	if (data->acpi_data.state_count)
+		acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+}
+
+#else
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
+#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
+
+/* Take a frequency, and issue the fid/vid transition command */
+static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
+{
+	u32 fid = 0;
+	u32 vid = 0;
+	int res, i;
+	struct cpufreq_freqs freqs;
+
+	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+	/* fid/vid correctness check for k8 */
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in find_psb_table, vid
+	 * are the upper 8 bits.
+	 */
+	fid = data->powernow_table[index].index & 0xFF;
+	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
+
+	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((data->currvid == vid) && (data->currfid == fid)) {
+		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+			fid, vid);
+		return 0;
+	}
+
+	if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		printk(KERN_ERR PFX
+		       "ignoring illegal change in lo freq table-%x to 0x%x\n",
+		       data->currfid, fid);
+		return 1;
+	}
+
+	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), fid, vid);
+	freqs.old = find_khz_freq_from_fid(data->currfid);
+	freqs.new = find_khz_freq_from_fid(fid);
+
+	for_each_cpu_mask(i, *(data->available_cores)) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	res = transition_fid_vid(data, fid, vid);
+	freqs.new = find_khz_freq_from_fid(data->currfid);
+
+	for_each_cpu_mask(i, *(data->available_cores)) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	return res;
+}
+
+/* Take a frequency, and issue the hardware pstate transition command */
+static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+{
+	u32 fid = 0;
+	u32 did = 0;
+	u32 pstate = 0;
+	int res, i;
+	struct cpufreq_freqs freqs;
+
+	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+	/* get fid did for hardware pstate transition */
+	pstate = index & HW_PSTATE_MASK;
+	if (pstate > MAX_HW_PSTATE)
+		return 0;
+	fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT;
+	did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT;
+	freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+	freqs.new = find_khz_freq_from_fiddid(fid, did);
+
+	for_each_cpu_mask(i, *(data->available_cores)) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	res = transition_pstate(data, pstate);
+	data->currfid = find_fid_from_pstate(pstate);
+	data->currdid = find_did_from_pstate(pstate);
+	freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+
+	for_each_cpu_mask(i, *(data->available_cores)) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	return res;
+}
+
+/* Driver entry point to switch to the target frequency */
+static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+{
+	cpumask_t oldmask = CPU_MASK_ALL;
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+	u32 checkfid;
+	u32 checkvid;
+	unsigned int newstate;
+	int ret = -EIO;
+
+	if (!data)
+		return -EINVAL;
+
+	checkfid = data->currfid;
+	checkvid = data->currvid;
+
+	/* only run on specific CPU from here on */
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
+
+	if (smp_processor_id() != pol->cpu) {
+		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
+		goto err_out;
+	}
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
+		goto err_out;
+	}
+
+	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+		pol->cpu, targfreq, pol->min, pol->max, relation);
+
+	if (query_current_values_with_pending_wait(data))
+		goto err_out;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		dprintk("targ: curr fid 0x%x, did 0x%x\n",
+			data->currfid, data->currdid);
+	else {
+		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		data->currfid, data->currvid);
+
+		if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+			printk(KERN_INFO PFX
+				"error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
+				checkfid, data->currfid, checkvid, data->currvid);
+		}
+	}
+
+	if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+		goto err_out;
+
+	mutex_lock(&fidvid_mutex);
+
+	powernow_k8_acpi_pst_values(data, newstate);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		ret = transition_frequency_pstate(data, newstate);
+	else
+		ret = transition_frequency_fidvid(data, newstate);
+	if (ret) {
+		printk(KERN_ERR PFX "transition frequency failed\n");
+		ret = 1;
+		mutex_unlock(&fidvid_mutex);
+		goto err_out;
+	}
+	mutex_unlock(&fidvid_mutex);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+	else
+		pol->cur = find_khz_freq_from_fid(data->currfid);
+	ret = 0;
+
+err_out:
+	set_cpus_allowed(current, oldmask);
+	return ret;
+}
+
+/* Driver entry point to verify the policy and range of frequencies */
+static int powernowk8_verify(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+
+	if (!data)
+		return -EINVAL;
+
+	return cpufreq_frequency_table_verify(pol, data->powernow_table);
+}
+
+/* per CPU init entry point to the driver */
+static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data;
+	cpumask_t oldmask = CPU_MASK_ALL;
+	int rc;
+
+	if (!cpu_online(pol->cpu))
+		return -ENODEV;
+
+	if (!check_supported_cpu(pol->cpu))
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
+		return -ENOMEM;
+	}
+
+	data->cpu = pol->cpu;
+
+	if (powernow_k8_cpu_init_acpi(data)) {
+		/*
+		 * Use the PSB BIOS structure. This is only availabe on
+		 * an UP version, and is deprecated by AMD.
+		 */
+		if (num_online_cpus() != 1) {
+			printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
+			kfree(data);
+			return -ENODEV;
+		}
+		if (pol->cpu != 0) {
+			printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n");
+			kfree(data);
+			return -ENODEV;
+		}
+		rc = find_psb_table(data);
+		if (rc) {
+			kfree(data);
+			return -ENODEV;
+		}
+	}
+
+	/* only run on specific CPU from here on */
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
+
+	if (smp_processor_id() != pol->cpu) {
+		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
+		goto err_out;
+	}
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing init, change pending bit set\n");
+		goto err_out;
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		goto err_out;
+
+	if (cpu_family == CPU_OPTERON)
+		fidvid_msr_init();
+
+	/* run on any CPU again */
+	set_cpus_allowed(current, oldmask);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pol->cpus = cpumask_of_cpu(pol->cpu);
+	else
+		pol->cpus = cpu_core_map[pol->cpu];
+	data->available_cores = &(pol->cpus);
+
+	/* Take a crude guess here.
+	 * That guess was in microseconds, so multiply with 1000 */
+	pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
+	    + (3 * (1 << data->irt) * 10)) * 1000;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+	else
+		pol->cur = find_khz_freq_from_fid(data->currfid);
+	dprintk("policy current frequency %d kHz\n", pol->cur);
+
+	/* min/max the cpu is capable of */
+	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
+		printk(KERN_ERR PFX "invalid powernow_table\n");
+		powernow_k8_cpu_exit_acpi(data);
+		kfree(data->powernow_table);
+		kfree(data);
+		return -EINVAL;
+	}
+
+	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		dprintk("cpu_init done, current fid 0x%x, did 0x%x\n",
+			data->currfid, data->currdid);
+	else
+		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+			data->currfid, data->currvid);
+
+	powernow_data[pol->cpu] = data;
+
+	return 0;
+
+err_out:
+	set_cpus_allowed(current, oldmask);
+	powernow_k8_cpu_exit_acpi(data);
+
+	kfree(data);
+	return -ENODEV;
+}
+
+static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = powernow_data[pol->cpu];
+
+	if (!data)
+		return -EINVAL;
+
+	powernow_k8_cpu_exit_acpi(data);
+
+	cpufreq_frequency_table_put_attr(pol->cpu);
+
+	kfree(data->powernow_table);
+	kfree(data);
+
+	return 0;
+}
+
+static unsigned int powernowk8_get (unsigned int cpu)
+{
+	struct powernow_k8_data *data;
+	cpumask_t oldmask = current->cpus_allowed;
+	unsigned int khz = 0;
+
+	data = powernow_data[first_cpu(cpu_core_map[cpu])];
+
+	if (!data)
+		return -EINVAL;
+
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu) {
+		printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
+		set_cpus_allowed(current, oldmask);
+		return 0;
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		goto out;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		khz = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+	else
+		khz = find_khz_freq_from_fid(data->currfid);
+
+
+out:
+	set_cpus_allowed(current, oldmask);
+	return khz;
+}
+
+static struct freq_attr* powernow_k8_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver cpufreq_amd64_driver = {
+	.verify = powernowk8_verify,
+	.target = powernowk8_target,
+	.init = powernowk8_cpu_init,
+	.exit = __devexit_p(powernowk8_cpu_exit),
+	.get = powernowk8_get,
+	.name = "powernow-k8",
+	.owner = THIS_MODULE,
+	.attr = powernow_k8_attr,
+};
+
+/* driver entry point for init */
+static int __cpuinit powernowk8_init(void)
+{
+	unsigned int i, supported_cpus = 0;
+
+	for_each_online_cpu(i) {
+		if (check_supported_cpu(i))
+			supported_cpus++;
+	}
+
+	if (supported_cpus == num_online_cpus()) {
+		printk(KERN_INFO PFX "Found %d %s "
+			"processors (%d cpu cores) (" VERSION ")\n",
+			num_online_nodes(),
+			boot_cpu_data.x86_model_id, supported_cpus);
+		return cpufreq_register_driver(&cpufreq_amd64_driver);
+	}
+
+	return -ENODEV;
+}
+
+/* driver entry point for term */
+static void __exit powernowk8_exit(void)
+{
+	dprintk("exit\n");
+
+	cpufreq_unregister_driver(&cpufreq_amd64_driver);
+}
+
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
+MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernowk8_init);
+module_exit(powernowk8_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
new file mode 100644
index 00000000000..b06c812208c
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -0,0 +1,232 @@
+/*
+ *  (c) 2003-2006 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ */
+
+struct powernow_k8_data {
+	unsigned int cpu;
+
+	u32 numps;  /* number of p-states */
+	u32 batps;  /* number of p-states supported on battery */
+
+	/* these values are constant when the PSB is used to determine
+	 * vid/fid pairings, but are modified during the ->target() call
+	 * when ACPI is used */
+	u32 rvo;     /* ramp voltage offset */
+	u32 irt;     /* isochronous relief time */
+	u32 vidmvs;  /* usable value calculated from mvs */
+	u32 vstable; /* voltage stabilization time, units 20 us */
+	u32 plllock; /* pll lock time, units 1 us */
+        u32 exttype; /* extended interface = 1 */
+
+	/* keep track of the current fid / vid or did */
+	u32 currvid, currfid, currdid;
+
+	/* the powernow_table includes all frequency and vid/fid pairings:
+	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
+	 * frequency is in kHz */
+	struct cpufreq_frequency_table  *powernow_table;
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+	/* the acpi table needs to be kept. it's only available if ACPI was
+	 * used to determine valid frequency/vid/fid states */
+	struct acpi_processor_performance acpi_data;
+#endif
+	/* we need to keep track of associated cores, but let cpufreq
+	 * handle hotplug events - so just point at cpufreq pol->cpus
+	 * structure */
+	cpumask_t *available_cores;
+};
+
+
+/* processor's cpuid instruction support */
+#define CPUID_PROCESSOR_SIGNATURE	1	/* function 1 */
+#define CPUID_XFAM			0x0ff00000	/* extended family */
+#define CPUID_XFAM_K8			0
+#define CPUID_XMOD			0x000f0000	/* extended model */
+#define CPUID_XMOD_REV_MASK		0x00080000
+#define CPUID_XFAM_10H			0x00100000	/* family 0x10 */
+#define CPUID_USE_XFAM_XMOD		0x00000f00
+#define CPUID_GET_MAX_CAPABILITIES	0x80000000
+#define CPUID_FREQ_VOLT_CAPABILITIES	0x80000007
+#define P_STATE_TRANSITION_CAPABLE	6
+
+/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For     */
+/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and   */
+/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
+/* the register number is placed in ecx, and the data is returned in edx:eax. */
+
+#define MSR_FIDVID_CTL      0xc0010041
+#define MSR_FIDVID_STATUS   0xc0010042
+
+/* Field definitions within the FID VID Low Control MSR : */
+#define MSR_C_LO_INIT_FID_VID     0x00010000
+#define MSR_C_LO_NEW_VID          0x00003f00
+#define MSR_C_LO_NEW_FID          0x0000003f
+#define MSR_C_LO_VID_SHIFT        8
+
+/* Field definitions within the FID VID High Control MSR : */
+#define MSR_C_HI_STP_GNT_TO	  0x000fffff
+
+/* Field definitions within the FID VID Low Status MSR : */
+#define MSR_S_LO_CHANGE_PENDING   0x80000000   /* cleared when completed */
+#define MSR_S_LO_MAX_RAMP_VID     0x3f000000
+#define MSR_S_LO_MAX_FID          0x003f0000
+#define MSR_S_LO_START_FID        0x00003f00
+#define MSR_S_LO_CURRENT_FID      0x0000003f
+
+/* Field definitions within the FID VID High Status MSR : */
+#define MSR_S_HI_MIN_WORKING_VID  0x3f000000
+#define MSR_S_HI_MAX_WORKING_VID  0x003f0000
+#define MSR_S_HI_START_VID        0x00003f00
+#define MSR_S_HI_CURRENT_VID      0x0000003f
+#define MSR_C_HI_STP_GNT_BENIGN	  0x00000001
+
+
+/* Hardware Pstate _PSS and MSR definitions */
+#define USE_HW_PSTATE		0x00000080
+#define HW_PSTATE_FID_MASK 	0x0000003f
+#define HW_PSTATE_DID_MASK 	0x000001c0
+#define HW_PSTATE_DID_SHIFT 	6
+#define HW_PSTATE_MASK 		0x00000007
+#define HW_PSTATE_VALID_MASK 	0x80000000
+#define HW_FID_INDEX_SHIFT	8
+#define HW_FID_INDEX_MASK	0x0000ff00
+#define HW_DID_INDEX_SHIFT	16
+#define HW_DID_INDEX_MASK	0x00ff0000
+#define HW_WATTS_MASK		0xff
+#define HW_PWR_DVR_MASK		0x300
+#define HW_PWR_DVR_SHIFT	8
+#define HW_PWR_MAX_MULT		3
+#define MAX_HW_PSTATE		8	/* hw pstate supports up to 8 */
+#define MSR_PSTATE_DEF_BASE 	0xc0010064 /* base of Pstate MSRs */
+#define MSR_PSTATE_STATUS 	0xc0010063 /* Pstate Status MSR */
+#define MSR_PSTATE_CTRL 	0xc0010062 /* Pstate control MSR */
+
+/* define the two driver architectures */
+#define CPU_OPTERON 0
+#define CPU_HW_PSTATE 1
+
+
+/*
+ * There are restrictions frequencies have to follow:
+ * - only 1 entry in the low fid table ( <=1.4GHz )
+ * - lowest entry in the high fid table must be >= 2 * the entry in the
+ *   low fid table
+ * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
+ *   in the low fid table
+ * - the parts can only step at <= 200 MHz intervals, odd fid values are
+ *   supported in revision G and later revisions.
+ * - lowest frequency must be >= interprocessor hypertransport link speed
+ *   (only applies to MP systems obviously)
+ */
+
+/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
+#define LO_FID_TABLE_TOP     7	/* fid values marking the boundary    */
+#define HI_FID_TABLE_BOTTOM  8	/* between the low and high tables    */
+
+#define LO_VCOFREQ_TABLE_TOP    1400	/* corresponding vco frequency values */
+#define HI_VCOFREQ_TABLE_BOTTOM 1600
+
+#define MIN_FREQ_RESOLUTION  200 /* fids jump by 2 matching freq jumps by 200 */
+
+#define MAX_FID 0x2a	/* Spec only gives FID values as far as 5 GHz */
+#define LEAST_VID 0x3e	/* Lowest (numerically highest) useful vid value */
+
+#define MIN_FREQ 800	/* Min and max freqs, per spec */
+#define MAX_FREQ 5000
+
+#define INVALID_FID_MASK 0xffffffc0  /* not a valid fid if these bits are set */
+#define INVALID_VID_MASK 0xffffffc0  /* not a valid vid if these bits are set */
+
+#define VID_OFF 0x3f
+
+#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
+
+#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
+
+#define MAXIMUM_VID_STEPS 1  /* Current cpus only allow a single step of 25mV */
+#define VST_UNITS_20US 20   /* Voltage Stabalization Time is in units of 20us */
+
+/*
+ * Most values of interest are enocoded in a single field of the _PSS
+ * entries: the "control" value.
+ */
+
+#define IRT_SHIFT      30
+#define RVO_SHIFT      28
+#define EXT_TYPE_SHIFT 27
+#define PLL_L_SHIFT    20
+#define MVS_SHIFT      18
+#define VST_SHIFT      11
+#define VID_SHIFT       6
+#define IRT_MASK        3
+#define RVO_MASK        3
+#define EXT_TYPE_MASK   1
+#define PLL_L_MASK   0x7f
+#define MVS_MASK        3
+#define VST_MASK     0x7f
+#define VID_MASK     0x1f
+#define FID_MASK     0x1f
+#define EXT_VID_MASK 0x3f
+#define EXT_FID_MASK 0x3f
+
+
+/*
+ * Version 1.4 of the PSB table. This table is constructed by BIOS and is
+ * to tell the OS's power management driver which VIDs and FIDs are
+ * supported by this particular processor.
+ * If the data in the PSB / PST is wrong, then this driver will program the
+ * wrong values into hardware, which is very likely to lead to a crash.
+ */
+
+#define PSB_ID_STRING      "AMDK7PNOW!"
+#define PSB_ID_STRING_LEN  10
+
+#define PSB_VERSION_1_4  0x14
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags1;
+	u16 vstable;
+	u8 flags2;
+	u8 num_tables;
+	u32 cpuid;
+	u8 plllocktime;
+	u8 maxfid;
+	u8 maxvid;
+	u8 numps;
+};
+
+/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
+struct pst_s {
+	u8 fid;
+	u8 vid;
+};
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
+
+static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+#endif
+
+#ifdef CONFIG_SMP
+static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
+{
+}
+#else
+static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
+{
+	cpu_set(0, cpu_sharedcore_mask[0]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
new file mode 100644
index 00000000000..d9f3e90a7ae
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -0,0 +1,190 @@
+/*
+ *	sc520_freq.c: cpufreq driver for the AMD Elan sc520
+ *
+ *	Copyright (C) 2005 Sean Young <sean@mess.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Based on elanfreq.c
+ *
+ *	2005-03-30: - initial revision
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+
+#define MMCR_BASE	0xfffef000	/* The default base address */
+#define OFFS_CPUCTL	0x2   /* CPU Control Register */
+
+static __u8 __iomem *cpuctl;
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg)
+
+static struct cpufreq_frequency_table sc520_freq_table[] = {
+	{0x01,	100000},
+	{0x02,	133000},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
+{
+	u8 clockspeed_reg = *cpuctl;
+
+	switch (clockspeed_reg & 0x03) {
+	default:
+		printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg);
+	case 0x01:
+		return 100000;
+	case 0x02:
+		return 133000;
+	}
+}
+
+static void sc520_freq_set_cpu_state (unsigned int state)
+{
+
+	struct cpufreq_freqs	freqs;
+	u8 clockspeed_reg;
+
+	freqs.old = sc520_freq_get_cpu_frequency(0);
+	freqs.new = sc520_freq_table[state].frequency;
+	freqs.cpu = 0; /* AMD Elan is UP */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	dprintk("attempting to set frequency to %i kHz\n",
+			sc520_freq_table[state].frequency);
+
+	local_irq_disable();
+
+	clockspeed_reg = *cpuctl & ~0x03;
+	*cpuctl = clockspeed_reg | sc520_freq_table[state].index;
+
+	local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+static int sc520_freq_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
+}
+
+static int sc520_freq_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate))
+		return -EINVAL;
+
+	sc520_freq_set_cpu_state(newstate);
+
+	return 0;
+}
+
+
+/*
+ *	Module init and exit code
+ */
+
+static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	int result;
+
+	/* capability check */
+	if (c->x86_vendor != X86_VENDOR_AMD ||
+	    c->x86 != 4 || c->x86_model != 9)
+		return -ENODEV;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = 1000000; /* 1ms */
+	policy->cur = sc520_freq_get_cpu_frequency(0);
+
+	result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
+	if (result)
+		return (result);
+
+	cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
+
+	return 0;
+}
+
+
+static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+
+static struct freq_attr* sc520_freq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver sc520_freq_driver = {
+	.get	= sc520_freq_get_cpu_frequency,
+	.verify	= sc520_freq_verify,
+	.target	= sc520_freq_target,
+	.init	= sc520_freq_cpu_init,
+	.exit	= sc520_freq_cpu_exit,
+	.name	= "sc520_freq",
+	.owner	= THIS_MODULE,
+	.attr	= sc520_freq_attr,
+};
+
+
+static int __init sc520_freq_init(void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	int err;
+
+	/* Test if we have the right hardware */
+	if(c->x86_vendor != X86_VENDOR_AMD ||
+				c->x86 != 4 || c->x86_model != 9) {
+		dprintk("no Elan SC520 processor found!\n");
+		return -ENODEV;
+	}
+	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
+	if(!cpuctl) {
+		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+		return -ENOMEM;
+	}
+
+	err = cpufreq_register_driver(&sc520_freq_driver);
+	if (err)
+		iounmap(cpuctl);
+
+	return err;
+}
+
+
+static void __exit sc520_freq_exit(void)
+{
+	cpufreq_unregister_driver(&sc520_freq_driver);
+	iounmap(cpuctl);
+}
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sean Young <sean@mess.org>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
+
+module_init(sc520_freq_init);
+module_exit(sc520_freq_exit);
+
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
new file mode 100644
index 00000000000..811d4743854
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -0,0 +1,633 @@
+/*
+ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
+ * M (part of the Centrino chipset).
+ *
+ * Since the original Pentium M, most new Intel CPUs support Enhanced
+ * SpeedStep.
+ *
+ * Despite the "SpeedStep" in the name, this is almost entirely unlike
+ * traditional SpeedStep.
+ *
+ * Modelled on speedstep.c
+ *
+ * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/sched.h>	/* current */
+#include <linux/delay.h>
+#include <linux/compiler.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+#define PFX		"speedstep-centrino: "
+#define MAINTAINER	"cpufreq@lists.linux.org.uk"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
+
+#define INTEL_MSR_RANGE	(0xffff)
+
+struct cpu_id
+{
+	__u8	x86;            /* CPU family */
+	__u8	x86_model;	/* model */
+	__u8	x86_mask;	/* stepping */
+};
+
+enum {
+	CPU_BANIAS,
+	CPU_DOTHAN_A1,
+	CPU_DOTHAN_A2,
+	CPU_DOTHAN_B0,
+	CPU_MP4HT_D0,
+	CPU_MP4HT_E0,
+};
+
+static const struct cpu_id cpu_ids[] = {
+	[CPU_BANIAS]	= { 6,  9, 5 },
+	[CPU_DOTHAN_A1]	= { 6, 13, 1 },
+	[CPU_DOTHAN_A2]	= { 6, 13, 2 },
+	[CPU_DOTHAN_B0]	= { 6, 13, 6 },
+	[CPU_MP4HT_D0]	= {15,  3, 4 },
+	[CPU_MP4HT_E0]	= {15,  4, 1 },
+};
+#define N_IDS	ARRAY_SIZE(cpu_ids)
+
+struct cpu_model
+{
+	const struct cpu_id *cpu_id;
+	const char	*model_name;
+	unsigned	max_freq; /* max clock in kHz */
+
+	struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
+};
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
+
+/* Operating points for current CPU */
+static struct cpu_model *centrino_model[NR_CPUS];
+static const struct cpu_id *centrino_cpu[NR_CPUS];
+
+static struct cpufreq_driver centrino_driver;
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
+
+/* Computes the correct form for IA32_PERF_CTL MSR for a particular
+   frequency/voltage operating point; frequency in MHz, volts in mV.
+   This is stored as "index" in the structure. */
+#define OP(mhz, mv)							\
+	{								\
+		.frequency = (mhz) * 1000,				\
+		.index = (((mhz)/100) << 8) | ((mv - 700) / 16)		\
+	}
+
+/*
+ * These voltage tables were derived from the Intel Pentium M
+ * datasheet, document 25261202.pdf, Table 5.  I have verified they
+ * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
+ * M.
+ */
+
+/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
+static struct cpufreq_frequency_table banias_900[] =
+{
+	OP(600,  844),
+	OP(800,  988),
+	OP(900, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
+static struct cpufreq_frequency_table banias_1000[] =
+{
+	OP(600,   844),
+	OP(800,   972),
+	OP(900,   988),
+	OP(1000, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
+static struct cpufreq_frequency_table banias_1100[] =
+{
+	OP( 600,  956),
+	OP( 800, 1020),
+	OP( 900, 1100),
+	OP(1000, 1164),
+	OP(1100, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+
+/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
+static struct cpufreq_frequency_table banias_1200[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP( 900, 1020),
+	OP(1000, 1100),
+	OP(1100, 1164),
+	OP(1200, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.30GHz (Banias) */
+static struct cpufreq_frequency_table banias_1300[] =
+{
+	OP( 600,  956),
+	OP( 800, 1260),
+	OP(1000, 1292),
+	OP(1200, 1356),
+	OP(1300, 1388),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.40GHz (Banias) */
+static struct cpufreq_frequency_table banias_1400[] =
+{
+	OP( 600,  956),
+	OP( 800, 1180),
+	OP(1000, 1308),
+	OP(1200, 1436),
+	OP(1400, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.50GHz (Banias) */
+static struct cpufreq_frequency_table banias_1500[] =
+{
+	OP( 600,  956),
+	OP( 800, 1116),
+	OP(1000, 1228),
+	OP(1200, 1356),
+	OP(1400, 1452),
+	OP(1500, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.60GHz (Banias) */
+static struct cpufreq_frequency_table banias_1600[] =
+{
+	OP( 600,  956),
+	OP( 800, 1036),
+	OP(1000, 1164),
+	OP(1200, 1276),
+	OP(1400, 1420),
+	OP(1600, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.70GHz (Banias) */
+static struct cpufreq_frequency_table banias_1700[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP(1000, 1116),
+	OP(1200, 1228),
+	OP(1400, 1308),
+	OP(1700, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+#undef OP
+
+#define _BANIAS(cpuid, max, name)	\
+{	.cpu_id		= cpuid,	\
+	.model_name	= "Intel(R) Pentium(R) M processor " name "MHz", \
+	.max_freq	= (max)*1000,	\
+	.op_points	= banias_##max,	\
+}
+#define BANIAS(max)	_BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
+
+/* CPU models, their operating frequency range, and freq/voltage
+   operating points */
+static struct cpu_model models[] =
+{
+	_BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
+	BANIAS(1000),
+	BANIAS(1100),
+	BANIAS(1200),
+	BANIAS(1300),
+	BANIAS(1400),
+	BANIAS(1500),
+	BANIAS(1600),
+	BANIAS(1700),
+
+	/* NULL model_name is a wildcard */
+	{ &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
+	{ &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
+	{ &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
+
+	{ NULL, }
+};
+#undef _BANIAS
+#undef BANIAS
+
+static int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpu_model *model;
+
+	for(model = models; model->cpu_id != NULL; model++)
+		if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
+		    (model->model_name == NULL ||
+		     strcmp(cpu->x86_model_id, model->model_name) == 0))
+			break;
+
+	if (model->cpu_id == NULL) {
+		/* No match at all */
+		dprintk("no support for CPU model \"%s\": "
+		       "send /proc/cpuinfo to " MAINTAINER "\n",
+		       cpu->x86_model_id);
+		return -ENOENT;
+	}
+
+	if (model->op_points == NULL) {
+		/* Matched a non-match */
+		dprintk("no table support for CPU model \"%s\"\n",
+		       cpu->x86_model_id);
+		dprintk("try using the acpi-cpufreq driver\n");
+		return -ENOENT;
+	}
+
+	centrino_model[policy->cpu] = model;
+
+	dprintk("found \"%s\": max frequency: %dkHz\n",
+	       model->model_name, model->max_freq);
+
+	return 0;
+}
+
+#else
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
+#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
+
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
+{
+	if ((c->x86 == x->x86) &&
+	    (c->x86_model == x->x86_model) &&
+	    (c->x86_mask == x->x86_mask))
+		return 1;
+	return 0;
+}
+
+/* To be called only after centrino_model is initialized */
+static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
+{
+	int i;
+
+	/*
+	 * Extract clock in kHz from PERF_CTL value
+	 * for centrino, as some DSDTs are buggy.
+	 * Ideally, this can be done using the acpi_data structure.
+	 */
+	if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
+	    (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
+	    (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
+		msr = (msr >> 8) & 0xff;
+		return msr * 100000;
+	}
+
+	if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
+		return 0;
+
+	msr &= 0xffff;
+	for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (msr == centrino_model[cpu]->op_points[i].index)
+			return centrino_model[cpu]->op_points[i].frequency;
+	}
+	if (failsafe)
+		return centrino_model[cpu]->op_points[i-1].frequency;
+	else
+		return 0;
+}
+
+/* Return the current CPU frequency in kHz */
+static unsigned int get_cur_freq(unsigned int cpu)
+{
+	unsigned l, h;
+	unsigned clock_freq;
+	cpumask_t saved_mask;
+
+	saved_mask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (smp_processor_id() != cpu)
+		return 0;
+
+	rdmsr(MSR_IA32_PERF_STATUS, l, h);
+	clock_freq = extract_clock(l, cpu, 0);
+
+	if (unlikely(clock_freq == 0)) {
+		/*
+		 * On some CPUs, we can see transient MSR values (which are
+		 * not present in _PSS), while CPU is doing some automatic
+		 * P-state transition (like TM2). Get the last freq set 
+		 * in PERF_CTL.
+		 */
+		rdmsr(MSR_IA32_PERF_CTL, l, h);
+		clock_freq = extract_clock(l, cpu, 1);
+	}
+
+	set_cpus_allowed(current, saved_mask);
+	return clock_freq;
+}
+
+
+static int centrino_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	unsigned freq;
+	unsigned l, h;
+	int ret;
+	int i;
+
+	/* Only Intel makes Enhanced Speedstep-capable CPUs */
+	if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
+		centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	for (i = 0; i < N_IDS; i++)
+		if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
+			break;
+
+	if (i != N_IDS)
+		centrino_cpu[policy->cpu] = &cpu_ids[i];
+
+	if (!centrino_cpu[policy->cpu]) {
+		dprintk("found unsupported CPU with "
+		"Enhanced SpeedStep: send /proc/cpuinfo to "
+		MAINTAINER "\n");
+		return -ENODEV;
+	}
+
+	if (centrino_cpu_init_table(policy)) {
+		return -ENODEV;
+	}
+
+	/* Check to see if Enhanced SpeedStep is enabled, and try to
+	   enable it if not. */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+	if (!(l & (1<<16))) {
+		l |= (1<<16);
+		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+		/* check to see if it stuck */
+		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+		if (!(l & (1<<16))) {
+			printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
+			return -ENODEV;
+		}
+	}
+
+	freq = get_cur_freq(policy->cpu);
+
+	policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
+	policy->cur = freq;
+
+	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
+	if (ret)
+		return (ret);
+
+	cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
+
+	return 0;
+}
+
+static int centrino_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+
+	if (!centrino_model[cpu])
+		return -ENODEV;
+
+	cpufreq_frequency_table_put_attr(cpu);
+
+	centrino_model[cpu] = NULL;
+
+	return 0;
+}
+
+/**
+ * centrino_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within this model's frequency range at least one
+ * border included.
+ */
+static int centrino_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
+}
+
+/**
+ * centrino_setpolicy - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int centrino_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int    newstate = 0;
+	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
+	struct cpufreq_freqs	freqs;
+	cpumask_t		online_policy_cpus;
+	cpumask_t		saved_mask;
+	cpumask_t		set_mask;
+	cpumask_t		covered_cpus;
+	int			retval = 0;
+	unsigned int		j, k, first_cpu, tmp;
+
+	if (unlikely(centrino_model[cpu] == NULL))
+		return -ENODEV;
+
+	if (unlikely(cpufreq_frequency_table_target(policy,
+			centrino_model[cpu]->op_points,
+			target_freq,
+			relation,
+			&newstate))) {
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/* cpufreq holds the hotplug lock, so we are safe from here on */
+	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+#else
+	online_policy_cpus = policy->cpus;
+#endif
+
+	saved_mask = current->cpus_allowed;
+	first_cpu = 1;
+	cpus_clear(covered_cpus);
+	for_each_cpu_mask(j, online_policy_cpus) {
+		/*
+		 * Support for SMP systems.
+		 * Make sure we are running on CPU that wants to change freq
+		 */
+		cpus_clear(set_mask);
+		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+			cpus_or(set_mask, set_mask, online_policy_cpus);
+		else
+			cpu_set(j, set_mask);
+
+		set_cpus_allowed(current, set_mask);
+		preempt_disable();
+		if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+			dprintk("couldn't limit to CPUs in this domain\n");
+			retval = -EAGAIN;
+			if (first_cpu) {
+				/* We haven't started the transition yet. */
+				goto migrate_end;
+			}
+			preempt_enable();
+			break;
+		}
+
+		msr = centrino_model[cpu]->op_points[newstate].index;
+
+		if (first_cpu) {
+			rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+			if (msr == (oldmsr & 0xffff)) {
+				dprintk("no change needed - msr was and needs "
+					"to be %x\n", oldmsr);
+				retval = 0;
+				goto migrate_end;
+			}
+
+			freqs.old = extract_clock(oldmsr, cpu, 0);
+			freqs.new = extract_clock(msr, cpu, 0);
+
+			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+				target_freq, freqs.old, freqs.new, msr);
+
+			for_each_cpu_mask(k, online_policy_cpus) {
+				freqs.cpu = k;
+				cpufreq_notify_transition(&freqs,
+					CPUFREQ_PRECHANGE);
+			}
+
+			first_cpu = 0;
+			/* all but 16 LSB are reserved, treat them with care */
+			oldmsr &= ~0xffff;
+			msr &= 0xffff;
+			oldmsr |= msr;
+		}
+
+		wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+			preempt_enable();
+			break;
+		}
+
+		cpu_set(j, covered_cpus);
+		preempt_enable();
+	}
+
+	for_each_cpu_mask(k, online_policy_cpus) {
+		freqs.cpu = k;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	if (unlikely(retval)) {
+		/*
+		 * We have failed halfway through the frequency change.
+		 * We have sent callbacks to policy->cpus and
+		 * MSRs have already been written on coverd_cpus.
+		 * Best effort undo..
+		 */
+
+		if (!cpus_empty(covered_cpus)) {
+			for_each_cpu_mask(j, covered_cpus) {
+				set_cpus_allowed(current, cpumask_of_cpu(j));
+				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+			}
+		}
+
+		tmp = freqs.new;
+		freqs.new = freqs.old;
+		freqs.old = tmp;
+		for_each_cpu_mask(j, online_policy_cpus) {
+			freqs.cpu = j;
+			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+		}
+	}
+	set_cpus_allowed(current, saved_mask);
+	return 0;
+
+migrate_end:
+	preempt_enable();
+	set_cpus_allowed(current, saved_mask);
+	return 0;
+}
+
+static struct freq_attr* centrino_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver centrino_driver = {
+	.name		= "centrino", /* should be speedstep-centrino,
+					 but there's a 16 char limit */
+	.init		= centrino_cpu_init,
+	.exit		= centrino_cpu_exit,
+	.verify		= centrino_verify,
+	.target		= centrino_target,
+	.get		= get_cur_freq,
+	.attr           = centrino_attr,
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
+ *
+ * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
+ * unsupported devices, -ENOENT if there's no voltage table for this
+ * particular CPU model, -EINVAL on problems during initiatization,
+ * and zero on success.
+ *
+ * This is quite picky.  Not only does the CPU have to advertise the
+ * "est" flag in the cpuid capability flags, we look for a specific
+ * CPU model and stepping, and we need to have the exact model name in
+ * our voltage tables.  That is, be paranoid about not releasing
+ * someone's valuable magic smoke.
+ */
+static int __init centrino_init(void)
+{
+	struct cpuinfo_x86 *cpu = cpu_data;
+
+	if (!cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&centrino_driver);
+}
+
+static void __exit centrino_exit(void)
+{
+	cpufreq_unregister_driver(&centrino_driver);
+}
+
+MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
+MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(centrino_init);
+module_exit(centrino_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
new file mode 100644
index 00000000000..36685e8f7be
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -0,0 +1,439 @@
+/*
+ * (C) 2001  Dave Jones, Arjan van de ven.
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information, and on Intel documentation
+ *  for chipsets ICH2-M and ICH3-M.
+ *
+ *  Many thanks to Ducrot Bruno for finding and fixing the last
+ *  "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
+ *  for extensive testing.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "speedstep-lib.h"
+
+
+/* speedstep_chipset:
+ *   It is necessary to know which chipset is used. As accesses to
+ * this device occur at various places in this module, we need a
+ * static struct pci_dev * pointing to that device.
+ */
+static struct pci_dev *speedstep_chipset_dev;
+
+
+/* speedstep_processor
+ */
+static unsigned int speedstep_processor = 0;
+
+static u32 pmbase;
+
+/*
+ *   There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH,	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+
+
+/**
+ * speedstep_find_register - read the PMBASE address
+ *
+ * Returns: -ENODEV if no register could be found
+ */
+static int speedstep_find_register (void)
+{
+	if (!speedstep_chipset_dev)
+		return -ENODEV;
+
+	/* get PMBASE */
+	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
+	if (!(pmbase & 0x01)) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return -ENODEV;
+	}
+
+	pmbase &= 0xFFFFFFFE;
+	if (!pmbase) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return -ENODEV;
+	}
+
+	dprintk("pmbase is 0x%x\n", pmbase);
+	return 0;
+}
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ *   Tries to change the SpeedStep state.
+ */
+static void speedstep_set_state (unsigned int state)
+{
+	u8 pm2_blk;
+	u8 value;
+	unsigned long flags;
+
+	if (state > 0x1)
+		return;
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	/* read state */
+	value = inb(pmbase + 0x50);
+
+	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	/* write new state */
+	value &= 0xFE;
+	value |= state;
+
+	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+
+	/* Disable bus master arbitration */
+	pm2_blk = inb(pmbase + 0x20);
+	pm2_blk |= 0x01;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* Actual transition */
+	outb(value, (pmbase + 0x50));
+
+	/* Restore bus master arbitration */
+	pm2_blk &= 0xfe;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* check if transition was successful */
+	value = inb(pmbase + 0x50);
+
+	/* Enable IRQs */
+	local_irq_restore(flags);
+
+	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	if (state == (value & 0x1)) {
+		dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
+	} else {
+		printk (KERN_ERR "cpufreq: change failed - I/O error\n");
+	}
+
+	return;
+}
+
+
+/**
+ * speedstep_activate - activate SpeedStep control in the chipset
+ *
+ *   Tries to activate the SpeedStep status and control registers.
+ * Returns -EINVAL on an unsupported chipset, and zero on success.
+ */
+static int speedstep_activate (void)
+{
+	u16 value = 0;
+
+	if (!speedstep_chipset_dev)
+		return -EINVAL;
+
+	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
+	if (!(value & 0x08)) {
+		value |= 0x08;
+		dprintk("activating SpeedStep (TM) registers\n");
+		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
+ *
+ *   Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
+ * the LPC bridge / PM module which contains all power-management
+ * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
+ * chipset, or zero on failure.
+ */
+static unsigned int speedstep_detect_chipset (void)
+{
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801DB_12,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 4; /* 4-M */
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801CA_12,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 3; /* 3-M */
+
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801BA_10,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev) {
+		/* speedstep.c causes lockups on Dell Inspirons 8000 and
+		 * 8100 which use a pretty old revision of the 82815
+		 * host brige. Abort on these systems.
+		 */
+		static struct pci_dev *hostbridge;
+
+		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82815_MC,
+			      PCI_ANY_ID,
+			      PCI_ANY_ID,
+			      NULL);
+
+		if (!hostbridge)
+			return 2; /* 2-M */
+
+		if (hostbridge->revision < 5) {
+			dprintk("hostbridge does not support speedstep\n");
+			speedstep_chipset_dev = NULL;
+			pci_dev_put(hostbridge);
+			return 0;
+		}
+
+		pci_dev_put(hostbridge);
+		return 2; /* 2-M */
+	}
+
+	return 0;
+}
+
+static unsigned int _speedstep_get(cpumask_t cpus)
+{
+	unsigned int speed;
+	cpumask_t cpus_allowed;
+
+	cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed(current, cpus);
+	speed = speedstep_get_processor_frequency(speedstep_processor);
+	set_cpus_allowed(current, cpus_allowed);
+	dprintk("detected %u kHz as current frequency\n", speed);
+	return speed;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	return _speedstep_get(cpumask_of_cpu(cpu));
+}
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int speedstep_target (struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+	cpumask_t cpus_allowed;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = _speedstep_get(policy->cpus);
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = policy->cpu;
+
+	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+
+	/* no transition necessary */
+	if (freqs.old == freqs.new)
+		return 0;
+
+	cpus_allowed = current->cpus_allowed;
+
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	/* switch to physical CPU where state is to be changed */
+	set_cpus_allowed(current, policy->cpus);
+
+	speedstep_set_state(newstate);
+
+	/* allow to be run on all CPUs */
+	set_cpus_allowed(current, cpus_allowed);
+
+	for_each_cpu_mask(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result = 0;
+	unsigned int speed;
+	cpumask_t cpus_allowed;
+
+	/* only run on CPU to be set, or on its sibling */
+#ifdef CONFIG_SMP
+	policy->cpus = cpu_sibling_map[policy->cpu];
+#endif
+
+	cpus_allowed = current->cpus_allowed;
+	set_cpus_allowed(current, policy->cpus);
+
+	/* detect low and high frequency and transition latency */
+	result = speedstep_get_freqs(speedstep_processor,
+				     &speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				     &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+				     &policy->cpuinfo.transition_latency,
+				     &speedstep_set_state);
+	set_cpus_allowed(current, cpus_allowed);
+	if (result)
+		return result;
+
+	/* get current speed setting */
+	speed = _speedstep_get(policy->cpus);
+	if (!speed)
+		return -EIO;
+
+	dprintk("currently at %s speed setting - %i MHz\n",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return (result);
+
+        cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr* speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver speedstep_driver = {
+	.name	= "speedstep-ich",
+	.verify	= speedstep_verify,
+	.target	= speedstep_target,
+	.init	= speedstep_cpu_init,
+	.exit	= speedstep_cpu_exit,
+	.get	= speedstep_get,
+	.owner	= THIS_MODULE,
+	.attr	= speedstep_attr,
+};
+
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	/* detect processor */
+	speedstep_processor = speedstep_detect_processor();
+	if (!speedstep_processor) {
+		dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+		return -ENODEV;
+	}
+
+	/* detect chipset */
+	if (!speedstep_detect_chipset()) {
+		dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+		return -ENODEV;
+	}
+
+	/* activate speedstep support */
+	if (speedstep_activate()) {
+		pci_dev_put(speedstep_chipset_dev);
+		return -EINVAL;
+	}
+
+	if (speedstep_find_register())
+		return -ENODEV;
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	pci_dev_put(speedstep_chipset_dev);
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+
+MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
+MODULE_LICENSE ("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
new file mode 100644
index 00000000000..b1acc8ce316
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -0,0 +1,444 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+
+#include <asm/msr.h>
+#include "speedstep-lib.h"
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+static int relaxed_check = 0;
+#else
+#define relaxed_check 0
+#endif
+
+/*********************************************************************
+ *                   GET PROCESSOR CORE SPEED IN KHZ                 *
+ *********************************************************************/
+
+static unsigned int pentium3_get_frequency (unsigned int processor)
+{
+        /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+	struct {
+		unsigned int ratio;	/* Frequency Multiplier (x10) */
+		u8 bitmap;		/* power on configuration bits
+					[27, 25:22] (in MSR 0x2a) */
+	} msr_decode_mult [] = {
+		{ 30, 0x01 },
+		{ 35, 0x05 },
+		{ 40, 0x02 },
+		{ 45, 0x06 },
+		{ 50, 0x00 },
+		{ 55, 0x04 },
+		{ 60, 0x0b },
+		{ 65, 0x0f },
+		{ 70, 0x09 },
+		{ 75, 0x0d },
+		{ 80, 0x0a },
+		{ 85, 0x26 },
+		{ 90, 0x20 },
+		{ 100, 0x2b },
+		{ 0, 0xff }     /* error or unknown value */
+	};
+
+	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
+	struct {
+		unsigned int value;	/* Front Side Bus speed in MHz */
+		u8 bitmap;		/* power on configuration bits [18: 19]
+					(in MSR 0x2a) */
+	} msr_decode_fsb [] = {
+		{  66, 0x0 },
+		{ 100, 0x2 },
+		{ 133, 0x1 },
+		{   0, 0xff}
+	};
+
+	u32 msr_lo, msr_tmp;
+	int i = 0, j = 0;
+
+	/* read MSR 0x2a - we only need the low 32 bits */
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	msr_tmp = msr_lo;
+
+	/* decode the FSB */
+	msr_tmp &= 0x00c0000;
+	msr_tmp >>= 18;
+	while (msr_tmp != msr_decode_fsb[i].bitmap) {
+		if (msr_decode_fsb[i].bitmap == 0xff)
+			return 0;
+		i++;
+	}
+
+	/* decode the multiplier */
+	if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
+		dprintk("workaround for early PIIIs\n");
+		msr_lo &= 0x03c00000;
+	} else
+		msr_lo &= 0x0bc00000;
+	msr_lo >>= 22;
+	while (msr_lo != msr_decode_mult[j].bitmap) {
+		if (msr_decode_mult[j].bitmap == 0xff)
+			return 0;
+		j++;
+	}
+
+	dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+
+	return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
+}
+
+
+static unsigned int pentiumM_get_frequency(void)
+{
+	u32 msr_lo, msr_tmp;
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+
+	/* see table B-2 of 24547212.pdf */
+	if (msr_lo & 0x00040000) {
+		printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
+		return 0;
+	}
+
+	msr_tmp = (msr_lo >> 22) & 0x1f;
+	dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
+
+	return (msr_tmp * 100 * 1000);
+}
+
+static unsigned int pentium_core_get_frequency(void)
+{
+	u32 fsb = 0;
+	u32 msr_lo, msr_tmp;
+
+	rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
+	/* see table B-2 of 25366920.pdf */
+	switch (msr_lo & 0x07) {
+	case 5:
+		fsb = 100000;
+		break;
+	case 1:
+		fsb = 133333;
+		break;
+	case 3:
+		fsb = 166667;
+		break;
+	default:
+		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+	}
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+
+	msr_tmp = (msr_lo >> 22) & 0x1f;
+	dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb));
+
+	return (msr_tmp * fsb);
+}
+
+
+static unsigned int pentium4_get_frequency(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 msr_lo, msr_hi, mult;
+	unsigned int fsb = 0;
+
+	rdmsr(0x2c, msr_lo, msr_hi);
+
+	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+
+	/* decode the FSB: see IA-32 Intel (C) Architecture Software
+	 * Developer's Manual, Volume 3: System Prgramming Guide,
+	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
+	 * Intel Xeon Processors, on page B-4 and B-5.
+	 */
+	if (c->x86_model < 2)
+		fsb = 100 * 1000;
+	else {
+		u8 fsb_code = (msr_lo >> 16) & 0x7;
+		switch (fsb_code) {
+		case 0:
+			fsb = 100 * 1000;
+			break;
+		case 1:
+			fsb = 13333 * 10;
+			break;
+		case 2:
+			fsb = 200 * 1000;
+			break;
+		}
+	}
+
+	if (!fsb)
+		printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
+
+	/* Multiplier. */
+	if (c->x86_model < 2)
+		mult = msr_lo >> 27;
+	else
+		mult = msr_lo >> 24;
+
+	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
+
+	return (fsb * mult);
+}
+
+
+unsigned int speedstep_get_processor_frequency(unsigned int processor)
+{
+	switch (processor) {
+	case SPEEDSTEP_PROCESSOR_PCORE:
+		return pentium_core_get_frequency();
+	case SPEEDSTEP_PROCESSOR_PM:
+		return pentiumM_get_frequency();
+	case SPEEDSTEP_PROCESSOR_P4D:
+	case SPEEDSTEP_PROCESSOR_P4M:
+		return pentium4_get_frequency();
+	case SPEEDSTEP_PROCESSOR_PIII_T:
+	case SPEEDSTEP_PROCESSOR_PIII_C:
+	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+		return pentium3_get_frequency(processor);
+	default:
+		return 0;
+	};
+	return 0;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
+
+
+/*********************************************************************
+ *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
+ *********************************************************************/
+
+unsigned int speedstep_detect_processor (void)
+{
+	struct cpuinfo_x86 *c = cpu_data;
+	u32 ebx, msr_lo, msr_hi;
+
+	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+
+	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
+	    ((c->x86 != 6) && (c->x86 != 0xF)))
+		return 0;
+
+	if (c->x86 == 0xF) {
+		/* Intel Mobile Pentium 4-M
+		 * or Intel Mobile Pentium 4 with 533 MHz FSB */
+		if (c->x86_model != 2)
+			return 0;
+
+		ebx = cpuid_ebx(0x00000001);
+		ebx &= 0x000000FF;
+
+		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+
+		switch (c->x86_mask) {
+		case 4:
+			/*
+			 * B-stepping [M-P4-M]
+			 * sample has ebx = 0x0f, production has 0x0e.
+			 */
+			if ((ebx == 0x0e) || (ebx == 0x0f))
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		case 7:
+			/*
+			 * C-stepping [M-P4-M]
+			 * needs to have ebx=0x0e, else it's a celeron:
+			 * cf. 25130917.pdf / page 7, footnote 5 even
+			 * though 25072120.pdf / page 7 doesn't say
+			 * samples are only of B-stepping...
+			 */
+			if (ebx == 0x0e)
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		case 9:
+			/*
+			 * D-stepping [M-P4-M or M-P4/533]
+			 *
+			 * this is totally strange: CPUID 0x0F29 is
+			 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
+			 * The latter need to be sorted out as they don't
+			 * support speedstep.
+			 * Celerons with CPUID 0x0F29 may have either
+			 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
+			 * specific.
+			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
+			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
+			 * also, M-P4M HTs have ebx=0x8, too
+			 * For now, they are distinguished by the model_id string
+			 */
+			if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL))
+				return SPEEDSTEP_PROCESSOR_P4M;
+			break;
+		default:
+			break;
+		}
+		return 0;
+	}
+
+	switch (c->x86_model) {
+	case 0x0B: /* Intel PIII [Tualatin] */
+		/* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */
+		ebx = cpuid_ebx(0x00000001);
+		dprintk("ebx is %x\n", ebx);
+
+		ebx &= 0x000000FF;
+
+		if (ebx != 0x06)
+			return 0;
+
+		/* So far all PIII-M processors support SpeedStep. See
+		 * Intel's 24540640.pdf of June 2003
+		 */
+		return SPEEDSTEP_PROCESSOR_PIII_T;
+
+	case 0x08: /* Intel PIII [Coppermine] */
+
+		/* all mobile PIII Coppermines have FSB 100 MHz
+		 * ==> sort out a few desktop PIIIs. */
+		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
+		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
+		msr_lo &= 0x00c0000;
+		if (msr_lo != 0x0080000)
+			return 0;
+
+		/*
+		 * If the processor is a mobile version,
+		 * platform ID has bit 50 set
+		 * it has SpeedStep technology if either
+		 * bit 56 or 57 is set
+		 */
+		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
+		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
+		if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+			if (c->x86_mask == 0x01) {
+				dprintk("early PIII version\n");
+				return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
+			} else
+				return SPEEDSTEP_PROCESSOR_PIII_C;
+		}
+
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(speedstep_detect_processor);
+
+
+/*********************************************************************
+ *                     DETECT SPEEDSTEP SPEEDS                       *
+ *********************************************************************/
+
+unsigned int speedstep_get_freqs(unsigned int processor,
+				  unsigned int *low_speed,
+				  unsigned int *high_speed,
+				  unsigned int *transition_latency,
+				  void (*set_state) (unsigned int state))
+{
+	unsigned int prev_speed;
+	unsigned int ret = 0;
+	unsigned long flags;
+	struct timeval tv1, tv2;
+
+	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
+		return -EINVAL;
+
+	dprintk("trying to determine both speeds\n");
+
+	/* get current speed */
+	prev_speed = speedstep_get_processor_frequency(processor);
+	if (!prev_speed)
+		return -EIO;
+
+	dprintk("previous speed is %u\n", prev_speed);
+
+	local_irq_save(flags);
+
+	/* switch to low state */
+	set_state(SPEEDSTEP_LOW);
+	*low_speed = speedstep_get_processor_frequency(processor);
+	if (!*low_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	dprintk("low speed is %u\n", *low_speed);
+
+	/* start latency measurement */
+	if (transition_latency)
+		do_gettimeofday(&tv1);
+
+	/* switch to high state */
+	set_state(SPEEDSTEP_HIGH);
+
+	/* end latency measurement */
+	if (transition_latency)
+		do_gettimeofday(&tv2);
+
+	*high_speed = speedstep_get_processor_frequency(processor);
+	if (!*high_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	dprintk("high speed is %u\n", *high_speed);
+
+	if (*low_speed == *high_speed) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* switch to previous state, if necessary */
+	if (*high_speed != prev_speed)
+		set_state(SPEEDSTEP_LOW);
+
+	if (transition_latency) {
+		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
+			tv2.tv_usec - tv1.tv_usec;
+		dprintk("transition latency is %u uSec\n", *transition_latency);
+
+		/* convert uSec to nSec and add 20% for safety reasons */
+		*transition_latency *= 1200;
+
+		/* check if the latency measurement is too high or too low
+		 * and set it to a safe value (500uSec) in that case
+		 */
+		if (*transition_latency > 10000000 || *transition_latency < 50000) {
+			printk (KERN_WARNING "speedstep: frequency transition measured seems out of "
+					"range (%u nSec), falling back to a safe one of %u nSec.\n",
+					*transition_latency, 500000);
+			*transition_latency = 500000;
+		}
+	}
+
+out:
+	local_irq_restore(flags);
+	return (ret);
+}
+EXPORT_SYMBOL_GPL(speedstep_get_freqs);
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+module_param(relaxed_check, int, 0444);
+MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
+#endif
+
+MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE ("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
new file mode 100644
index 00000000000..b11bcc608ca
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -0,0 +1,49 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+
+/* processors */
+
+#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY	0x00000001  /* Coppermine core */
+#define SPEEDSTEP_PROCESSOR_PIII_C		0x00000002  /* Coppermine core */
+#define SPEEDSTEP_PROCESSOR_PIII_T		0x00000003  /* Tualatin core */
+#define SPEEDSTEP_PROCESSOR_P4M			0x00000004  /* P4-M  */
+
+/* the following processors are not speedstep-capable and are not auto-detected
+ * in speedstep_detect_processor(). However, their speed can be detected using
+ * the speedstep_get_processor_frequency() call. */
+#define SPEEDSTEP_PROCESSOR_PM			0xFFFFFF03  /* Pentium M  */
+#define SPEEDSTEP_PROCESSOR_P4D			0xFFFFFF04  /* desktop P4  */
+#define SPEEDSTEP_PROCESSOR_PCORE		0xFFFFFF05  /* Core */
+
+/* speedstep states -- only two of them */
+
+#define SPEEDSTEP_HIGH	0x00000000
+#define SPEEDSTEP_LOW	0x00000001
+
+
+/* detect a speedstep-capable processor */
+extern unsigned int speedstep_detect_processor (void);
+
+/* detect the current speed (in khz) of the processor */
+extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
+
+
+/* detect the low and high speeds of the processor. The callback
+ * set_state"'s first argument is either SPEEDSTEP_HIGH or
+ * SPEEDSTEP_LOW; the second argument is zero so that no
+ * cpufreq_notify_transition calls are initiated.
+ */
+extern unsigned int speedstep_get_freqs(unsigned int processor,
+	unsigned int *low_speed,
+	unsigned int *high_speed,
+	unsigned int *transition_latency,
+	void (*set_state) (unsigned int state));
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
new file mode 100644
index 00000000000..f2b5a621d27
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -0,0 +1,423 @@
+/*
+ * Intel SpeedStep SMI driver.
+ *
+ * (C) 2003  Hiroshi Miura <miura@da-cha.org>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <asm/ist.h>
+#include <asm/io.h>
+
+#include "speedstep-lib.h"
+
+/* speedstep system management interface port/command.
+ *
+ * These parameters are got from IST-SMI BIOS call.
+ * If user gives it, these are used.
+ *
+ */
+static int smi_port = 0;
+static int smi_cmd = 0;
+static unsigned int smi_sig = 0;
+
+/* info about the processor */
+static unsigned int speedstep_processor = 0;
+
+/*
+ * There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH,	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+#define GET_SPEEDSTEP_OWNER 0
+#define GET_SPEEDSTEP_STATE 1
+#define SET_SPEEDSTEP_STATE 2
+#define GET_SPEEDSTEP_FREQS 4
+
+/* how often shall the SMI call be tried if it failed, e.g. because
+ * of DMA activity going on? */
+#define SMI_TRIES 5
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
+
+/**
+ * speedstep_smi_ownership
+ */
+static int speedstep_smi_ownership (void)
+{
+	u32 command, result, magic;
+	u32 function = GET_SPEEDSTEP_OWNER;
+	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+	magic = virt_to_phys(magic_data);
+
+	dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__(
+		"out %%al, (%%dx)\n"
+		: "=D" (result)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
+			"D" (0), "S" (magic)
+		: "memory"
+	);
+
+	dprintk("result is %x\n", result);
+
+	return result;
+}
+
+/**
+ * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
+ * @low: the low frequency value is placed here
+ * @high: the high frequency value is placed here
+ *
+ * Only available on later SpeedStep-enabled systems, returns false results or
+ * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
+ * shows that the latter occurs if !(ist_info.event & 0xFFFF).
+ */
+static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
+{
+	u32 command, result = 0, edi, high_mhz, low_mhz;
+	u32 state=0;
+	u32 function = GET_SPEEDSTEP_FREQS;
+
+	if (!(ist_info.event & 0xFFFF)) {
+		dprintk("bug #1422 -- can't read freqs from BIOS\n");
+		return -ENODEV;
+	}
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__("movl $0, %%edi\n"
+		"out %%al, (%%dx)\n"
+		: "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
+		: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+	);
+
+	dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
+
+	/* abort if results are obviously incorrect... */
+	if ((high_mhz + low_mhz) < 600)
+		return -EINVAL;
+
+	*high = high_mhz * 1000;
+	*low  = low_mhz  * 1000;
+
+	return result;
+}
+
+/**
+ * speedstep_get_state - set the SpeedStep state
+ * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static int speedstep_get_state (void)
+{
+	u32 function=GET_SPEEDSTEP_STATE;
+	u32 result, state, edi, command;
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
+
+	__asm__ __volatile__("movl $0, %%edi\n"
+		"out %%al, (%%dx)\n"
+		: "=a" (result), "=b" (state), "=D" (edi)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
+	);
+
+	dprintk("state is %x, result is %x\n", state, result);
+
+	return (state & 1);
+}
+
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static void speedstep_set_state (unsigned int state)
+{
+	unsigned int result = 0, command, new_state;
+	unsigned long flags;
+	unsigned int function=SET_SPEEDSTEP_STATE;
+	unsigned int retry = 0;
+
+	if (state > 0x1)
+		return;
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
+
+	do {
+		if (retry) {
+			dprintk("retry %u, previous result %u, waiting...\n", retry, result);
+			mdelay(retry * 50);
+		}
+		retry++;
+		__asm__ __volatile__(
+			"movl $0, %%edi\n"
+			"out %%al, (%%dx)\n"
+			: "=b" (new_state), "=D" (result)
+			: "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+			);
+	} while ((new_state != state) && (retry <= SMI_TRIES));
+
+	/* enable IRQs */
+	local_irq_restore(flags);
+
+	if (new_state == state) {
+		dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
+	} else {
+		printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
+	}
+
+	return;
+}
+
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: new freq
+ * @relation:
+ *
+ * Sets a new CPUFreq policy/freq.
+ */
+static int speedstep_target (struct cpufreq_policy *policy,
+			unsigned int target_freq, unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = 0; /* speedstep.c is UP only driver */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	speedstep_set_state(newstate);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result;
+	unsigned int speed,state;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	result = speedstep_smi_ownership();
+	if (result) {
+		dprintk("fails in aquiring ownership of a SMI interface.\n");
+		return -EINVAL;
+	}
+
+	/* detect low and high frequency */
+	result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				&speedstep_freqs[SPEEDSTEP_HIGH].frequency);
+	if (result) {
+		/* fall back to speedstep_lib.c dection mechanism: try both states out */
+		dprintk("could not detect low and high frequencies by SMI call.\n");
+		result = speedstep_get_freqs(speedstep_processor,
+				&speedstep_freqs[SPEEDSTEP_LOW].frequency,
+				&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+				NULL,
+				&speedstep_set_state);
+
+		if (result) {
+			dprintk("could not detect two different speeds -- aborting.\n");
+			return result;
+		} else
+			dprintk("workaround worked.\n");
+	}
+
+	/* get current speed setting */
+	state = speedstep_get_state();
+	speed = speedstep_freqs[state].frequency;
+
+	dprintk("currently at %s speed setting - %i MHz\n",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return (result);
+
+	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	if (cpu)
+		return -ENODEV;
+	return speedstep_get_processor_frequency(speedstep_processor);
+}
+
+
+static int speedstep_resume(struct cpufreq_policy *policy)
+{
+	int result = speedstep_smi_ownership();
+
+	if (result)
+		dprintk("fails in re-aquiring ownership of a SMI interface.\n");
+
+	return result;
+}
+
+static struct freq_attr* speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver speedstep_driver = {
+	.name		= "speedstep-smi",
+	.verify		= speedstep_verify,
+	.target		= speedstep_target,
+	.init		= speedstep_cpu_init,
+	.exit		= speedstep_cpu_exit,
+	.get		= speedstep_get,
+	.resume		= speedstep_resume,
+	.owner		= THIS_MODULE,
+	.attr		= speedstep_attr,
+};
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * BIOS, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	speedstep_processor = speedstep_detect_processor();
+
+	switch (speedstep_processor) {
+	case SPEEDSTEP_PROCESSOR_PIII_T:
+	case SPEEDSTEP_PROCESSOR_PIII_C:
+	case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+		break;
+	default:
+		speedstep_processor = 0;
+	}
+
+	if (!speedstep_processor) {
+		dprintk ("No supported Intel CPU detected.\n");
+		return -ENODEV;
+	}
+
+	dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n",
+		ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
+
+	/* Error if no IST-SMI BIOS or no PARM
+		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
+	if ((ist_info.signature !=  0x47534943) && (
+	    (smi_port == 0) || (smi_cmd == 0)))
+		return -ENODEV;
+
+	if (smi_sig == 1)
+		smi_sig = 0x47534943;
+	else
+		smi_sig = ist_info.signature;
+
+	/* setup smi_port from MODLULE_PARM or BIOS */
+	if ((smi_port > 0xff) || (smi_port < 0))
+		return -EINVAL;
+	else if (smi_port == 0)
+		smi_port = ist_info.command & 0xff;
+
+	if ((smi_cmd > 0xff) || (smi_cmd < 0))
+		return -EINVAL;
+	else if (smi_cmd == 0)
+		smi_cmd = (ist_info.command >> 16) & 0xff;
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+module_param(smi_port,  int, 0444);
+module_param(smi_cmd,   int, 0444);
+module_param(smi_sig,  uint, 0444);
+
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
+
+MODULE_AUTHOR ("Hiroshi Miura");
+MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE ("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
new file mode 100644
index 00000000000..122d2d75aa9
--- /dev/null
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -0,0 +1,463 @@
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/processor-cyrix.h>
+#include <asm/timer.h>
+#include <asm/pci-direct.h>
+#include <asm/tsc.h>
+
+#include "cpu.h"
+
+/*
+ * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
+ */
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+	unsigned char ccr2, ccr3;
+	unsigned long flags;
+	
+	/* we test for DEVID by checking whether CCR3 is writable */
+	local_irq_save(flags);
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, ccr3 ^ 0x80);
+	getCx86(0xc0);   /* dummy to change bus */
+
+	if (getCx86(CX86_CCR3) == ccr3) {       /* no DEVID regs. */
+		ccr2 = getCx86(CX86_CCR2);
+		setCx86(CX86_CCR2, ccr2 ^ 0x04);
+		getCx86(0xc0);  /* dummy */
+
+		if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
+			*dir0 = 0xfd;
+		else {                          /* Cx486S A step */
+			setCx86(CX86_CCR2, ccr2);
+			*dir0 = 0xfe;
+		}
+	}
+	else {
+		setCx86(CX86_CCR3, ccr3);  /* restore CCR3 */
+
+		/* read DIR0 and DIR1 CPU registers */
+		*dir0 = getCx86(CX86_DIR0);
+		*dir1 = getCx86(CX86_DIR1);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
+ * order to identify the Cyrix CPU model after we're out of setup.c
+ *
+ * Actually since bugs.h doesn't even reference this perhaps someone should
+ * fix the documentation ???
+ */
+static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
+
+static char Cx86_model[][9] __cpuinitdata = {
+	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
+	"M II ", "Unknown"
+};
+static char Cx486_name[][5] __cpuinitdata = {
+	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
+	"SRx2", "DRx2"
+};
+static char Cx486S_name[][4] __cpuinitdata = {
+	"S", "S2", "Se", "S2e"
+};
+static char Cx486D_name[][4] __cpuinitdata = {
+	"DX", "DX2", "?", "?", "?", "DX4"
+};
+static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
+static char cyrix_model_mult1[] __cpuinitdata = "12??43";
+static char cyrix_model_mult2[] __cpuinitdata = "12233445";
+
+/*
+ * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
+ * BIOSes for compatibility with DOS games.  This makes the udelay loop
+ * work correctly, and improves performance.
+ *
+ * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
+ */
+
+extern void calibrate_delay(void) __init;
+
+static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
+{
+	unsigned long flags;
+	
+	if (Cx86_dir0_msb == 3) {
+		unsigned char ccr3, ccr5;
+
+		local_irq_save(flags);
+		ccr3 = getCx86(CX86_CCR3);
+		setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+		ccr5 = getCx86(CX86_CCR5);
+		if (ccr5 & 2)
+			setCx86(CX86_CCR5, ccr5 & 0xfd);  /* reset SLOP */
+		setCx86(CX86_CCR3, ccr3);                 /* disable MAPEN */
+		local_irq_restore(flags);
+
+		if (ccr5 & 2) { /* possible wrong calibration done */
+			printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+			calibrate_delay();
+			c->loops_per_jiffy = loops_per_jiffy;
+		}
+	}
+}
+
+
+static void __cpuinit set_cx86_reorder(void)
+{
+	u8 ccr3;
+
+	printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN� */
+
+	/* Load/Store Serialize to mem access disable (=reorder it)� */
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+	/* set load/store serialize from 1GB to 4GB */
+	ccr3 |= 0xe0;
+	setCx86(CX86_CCR3, ccr3);
+}
+
+static void __cpuinit set_cx86_memwb(void)
+{
+	u32 cr0;
+
+	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+
+	/* CCR2 bit 2: unlock NW bit */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+	/* set 'Not Write-through' */
+	cr0 = 0x20000000;
+	write_cr0(read_cr0() | cr0);
+	/* CCR2 bit 2: lock NW bit and set WT1 */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+}
+
+static void __cpuinit set_cx86_inc(void)
+{
+	unsigned char ccr3;
+
+	printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN� */
+	/* PCR1 -- Performance Control */
+	/* Incrementor on, whatever that is */
+	setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
+	/* PCR0 -- Performance Control */
+	/* Incrementor Margin 10 */
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); 
+	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
+}
+
+/*
+ *	Configure later MediaGX and/or Geode processor.
+ */
+
+static void __cpuinit geode_configure(void)
+{
+	unsigned long flags;
+	u8 ccr3;
+	local_irq_save(flags);
+
+	/* Suspend on halt power saving and enable #SUSP pin */
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
+	
+
+	/* FPU fast, DTE cache, Mem bypass */
+	setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
+	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
+	
+	set_cx86_memwb();
+	set_cx86_reorder();	
+	set_cx86_inc();
+	
+	local_irq_restore(flags);
+}
+
+
+static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
+{
+	unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
+	char *buf = c->x86_model_id;
+	const char *p = NULL;
+
+	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+	clear_bit(0*32+31, c->x86_capability);
+
+	/* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
+	if ( test_bit(1*32+24, c->x86_capability) ) {
+		clear_bit(1*32+24, c->x86_capability);
+		set_bit(X86_FEATURE_CXMMX, c->x86_capability);
+	}
+
+	do_cyrix_devid(&dir0, &dir1);
+
+	check_cx686_slop(c);
+
+	Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family"   */
+	dir0_lsn = dir0 & 0xf;                /* model or clock multiplier */
+
+	/* common case step number/rev -- exceptions handled below */
+	c->x86_model = (dir1 >> 4) + 1;
+	c->x86_mask = dir1 & 0xf;
+
+	/* Now cook; the original recipe is by Channing Corn, from Cyrix.
+	 * We do the same thing for each generation: we work out
+	 * the model, multiplier and stepping.  Black magic included,
+	 * to make the silicon step/rev numbers match the printed ones.
+	 */
+	 
+	switch (dir0_msn) {
+		unsigned char tmp;
+
+	case 0: /* Cx486SLC/DLC/SRx/DRx */
+		p = Cx486_name[dir0_lsn & 7];
+		break;
+
+	case 1: /* Cx486S/DX/DX2/DX4 */
+		p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
+			: Cx486S_name[dir0_lsn & 3];
+		break;
+
+	case 2: /* 5x86 */
+		Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+		p = Cx86_cb+2;
+		break;
+
+	case 3: /* 6x86/6x86L */
+		Cx86_cb[1] = ' ';
+		Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+		if (dir1 > 0x21) { /* 686L */
+			Cx86_cb[0] = 'L';
+			p = Cx86_cb;
+			(c->x86_model)++;
+		} else             /* 686 */
+			p = Cx86_cb+1;
+		/* Emulate MTRRs using Cyrix's ARRs. */
+		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		/* 6x86's contain this bug */
+		c->coma_bug = 1;
+		break;
+
+	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+#ifdef CONFIG_PCI
+	{
+		u32 vendor, device;
+		/* It isn't really a PCI quirk directly, but the cure is the
+		   same. The MediaGX has deep magic SMM stuff that handles the
+		   SB emulation. It thows away the fifo on disable_dma() which
+		   is wrong and ruins the audio. 
+
+		   Bug2: VSA1 has a wrap bug so that using maximum sized DMA 
+		   causes bad things. According to NatSemi VSA2 has another
+		   bug to do with 'hlt'. I've not seen any boards using VSA2
+		   and X doesn't seem to support it either so who cares 8).
+		   VSA1 we work around however.
+		*/
+
+		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+		isa_dma_bridge_buggy = 2;
+
+		/* We do this before the PCI layer is running. However we
+		   are safe here as we know the bridge must be a Cyrix
+		   companion and must be present */
+		vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID);
+		device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID);
+
+		/*
+		 *  The 5510/5520 companion chips have a funky PIT.
+		 */  
+		if (vendor == PCI_VENDOR_ID_CYRIX &&
+	 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
+			mark_tsc_unstable("cyrix 5510/5520 detected");
+	}
+#endif
+		c->x86_cache_size=16;	/* Yep 16K integrated cache thats it */
+
+		/* GXm supports extended cpuid levels 'ala' AMD */
+		if (c->cpuid_level == 2) {
+			/* Enable cxMMX extensions (GX1 Datasheet 54) */
+			setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+			
+			/*
+			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
+			 * GXlv: 0x6x          GXlv datasheet 54
+			 *  ?  : 0x7x
+			 * GX1 : 0x8x          GX1  datasheet 56
+			 */
+			if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f))
+				geode_configure();
+			get_model_name(c);  /* get CPU marketing name */
+			return;
+		}
+		else {  /* MediaGX */
+			Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
+			p = Cx86_cb+2;
+			c->x86_model = (dir1 & 0x20) ? 1 : 2;
+		}
+		break;
+
+        case 5: /* 6x86MX/M II */
+		if (dir1 > 7)
+		{
+			dir0_msn++;  /* M II */
+			/* Enable MMX extensions (App note 108) */
+			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+		}
+		else
+		{
+			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
+		}
+		tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
+		Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
+		p = Cx86_cb+tmp;
+        	if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
+			(c->x86_model)++;
+		/* Emulate MTRRs using Cyrix's ARRs. */
+		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		break;
+
+	case 0xf:  /* Cyrix 486 without DEVID registers */
+		switch (dir0_lsn) {
+		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
+			dir0_msn = 0;
+			p = Cx486_name[(c->hard_math) ? 1 : 0];
+			break;
+
+		case 0xe:  /* a 486S A step */
+			dir0_msn = 0;
+			p = Cx486S_name[0];
+			break;
+		}
+		break;
+
+	default:  /* unknown (shouldn't happen, we know everyone ;-) */
+		dir0_msn = 7;
+		break;
+	}
+	strcpy(buf, Cx86_model[dir0_msn & 7]);
+	if (p) strcat(buf, p);
+	return;
+}
+
+/*
+ * Handle National Semiconductor branded processors
+ */
+static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
+{
+	/* There may be GX1 processors in the wild that are branded
+	 * NSC and not Cyrix.
+	 *
+	 * This function only handles the GX processor, and kicks every
+	 * thing else to the Cyrix init function above - that should
+	 * cover any processors that might have been branded differently
+	 * after NSC acquired Cyrix.
+	 *
+	 * If this breaks your GX1 horribly, please e-mail
+	 * info-linux@ldcmail.amd.com to tell us.
+	 */
+
+	/* Handle the GX (Formally known as the GX2) */
+
+	if (c->x86 == 5 && c->x86_model == 5)
+		display_cacheinfo(c);
+	else
+		init_cyrix(c);
+}
+
+/*
+ * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
+ * by the fact that they preserve the flags across the division of 5/2.
+ * PII and PPro exhibit this behavior too, but they have cpuid available.
+ */
+ 
+/*
+ * Perform the Cyrix 5/2 test. A Cyrix won't change
+ * the flags, while other 486 chips will.
+ */
+static inline int test_cyrix_52div(void)
+{
+	unsigned int test;
+
+	__asm__ __volatile__(
+	     "sahf\n\t"		/* clear flags (%eax = 0x0005) */
+	     "div %b2\n\t"	/* divide 5 by 2 */
+	     "lahf"		/* store flags into %ah */
+	     : "=a" (test)
+	     : "0" (5), "q" (2)
+	     : "cc");
+
+	/* AH is 0x02 on Cyrix after the divide.. */
+	return (unsigned char) (test >> 8) == 0x02;
+}
+
+static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
+{
+	/* Detect Cyrix with disabled CPUID */
+	if ( c->x86 == 4 && test_cyrix_52div() ) {
+		unsigned char dir0, dir1;
+		
+		strcpy(c->x86_vendor_id, "CyrixInstead");
+	        c->x86_vendor = X86_VENDOR_CYRIX;
+	        
+	        /* Actually enable cpuid on the older cyrix */
+	    
+	    	/* Retrieve CPU revisions */
+	    	
+		do_cyrix_devid(&dir0, &dir1);
+
+		dir0>>=4;		
+		
+		/* Check it is an affected model */
+		
+   	        if (dir0 == 5 || dir0 == 3)
+   	        {
+			unsigned char ccr3;
+			unsigned long flags;
+			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+			local_irq_save(flags);
+			ccr3 = getCx86(CX86_CCR3);
+			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN  */
+			setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80);  /* enable cpuid  */
+			setCx86(CX86_CCR3, ccr3);                       /* disable MAPEN */
+			local_irq_restore(flags);
+		}
+	}
+}
+
+static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Cyrix",
+	.c_ident 	= { "CyrixInstead" },
+	.c_init		= init_cyrix,
+	.c_identify	= cyrix_identify,
+};
+
+int __init cyrix_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
+	return 0;
+}
+
+static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
+	.c_vendor	= "NSC",
+	.c_ident 	= { "Geode by NSC" },
+	.c_init		= init_nsc,
+};
+
+int __init nsc_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
+	return 0;
+}
+
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
new file mode 100644
index 00000000000..dc4e08147b1
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel.c
@@ -0,0 +1,333 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+
+#include "cpu.h"
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#endif
+
+extern int trap_init_f00f_bug(void);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Alignment at which movsl is preferred for bulk memory copies.
+ */
+struct movsl_mask movsl_mask __read_mostly;
+#endif
+
+void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return;
+	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+	if (c->x86 == 15 && c->x86_cache_alignment == 64)
+		c->x86_cache_alignment = 128;
+}
+
+/*
+ *	Early probe support logic for ppro memory erratum #50
+ *
+ *	This is called before we do cpu ident work
+ */
+ 
+int __cpuinit ppro_with_ram_bug(void)
+{
+	/* Uses data from early_cpu_detect now */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6 &&
+	    boot_cpu_data.x86_model == 1 &&
+	    boot_cpu_data.x86_mask < 8) {
+		printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+		return 1;
+	}
+	return 0;
+}
+	
+
+/*
+ * P4 Xeon errata 037 workaround.
+ * Hardware prefetcher may cause stale data to be loaded into the cache.
+ */
+static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
+{
+	unsigned long lo, hi;
+
+	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+		rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		if ((lo & (1<<9)) == 0) {
+			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
+			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
+			lo |= (1<<9);	/* Disable hw prefetching */
+			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		}
+	}
+}
+
+
+/*
+ * find out the number of processor cores on the die
+ */
+static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	if (c->cpuid_level < 4)
+		return 1;
+
+	/* Intel has a non-standard dependency on %ecx for this CPUID level. */
+	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+	if (eax & 0x1f)
+		return ((eax >> 26) + 1);
+	else
+		return 1;
+}
+
+static void __cpuinit init_intel(struct cpuinfo_x86 *c)
+{
+	unsigned int l2 = 0;
+	char *p = NULL;
+
+#ifdef CONFIG_X86_F00F_BUG
+	/*
+	 * All current models of Pentium and Pentium with MMX technology CPUs
+	 * have the F0 0F bug, which lets nonprivileged users lock up the system.
+	 * Note that the workaround only should be initialized once...
+	 */
+	c->f00f_bug = 0;
+	if (!paravirt_enabled() && c->x86 == 5) {
+		static int f00f_workaround_enabled = 0;
+
+		c->f00f_bug = 1;
+		if ( !f00f_workaround_enabled ) {
+			trap_init_f00f_bug();
+			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+			f00f_workaround_enabled = 1;
+		}
+	}
+#endif
+
+	select_idle_routine(c);
+	l2 = init_intel_cacheinfo(c);
+	if (c->cpuid_level > 9 ) {
+		unsigned eax = cpuid_eax(10);
+		/* Check for version and the number of counters */
+		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+	}
+
+	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
+	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+	/* Names for the Pentium II/Celeron processors 
+	   detectable only by also checking the cache size.
+	   Dixon is NOT a Celeron. */
+	if (c->x86 == 6) {
+		switch (c->x86_model) {
+		case 5:
+			if (c->x86_mask == 0) {
+				if (l2 == 0)
+					p = "Celeron (Covington)";
+				else if (l2 == 256)
+					p = "Mobile Pentium II (Dixon)";
+			}
+			break;
+			
+		case 6:
+			if (l2 == 128)
+				p = "Celeron (Mendocino)";
+			else if (c->x86_mask == 0 || c->x86_mask == 5)
+				p = "Celeron-A";
+			break;
+			
+		case 8:
+			if (l2 == 128)
+				p = "Celeron (Coppermine)";
+			break;
+		}
+	}
+
+	if ( p )
+		strcpy(c->x86_model_id, p);
+	
+	c->x86_max_cores = num_cpu_cores(c);
+
+	detect_ht(c);
+
+	/* Work around errata */
+	Intel_errata_workarounds(c);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+	/*
+	 * Set up the preferred alignment for movsl bulk memory moves
+	 */
+	switch (c->x86) {
+	case 4:		/* 486: untested */
+		break;
+	case 5:		/* Old Pentia: untested */
+		break;
+	case 6:		/* PII/PIII only like movsl with 8-byte alignment */
+		movsl_mask.mask = 7;
+		break;
+	case 15:	/* P4 is OK down to 8-byte alignment */
+		movsl_mask.mask = 7;
+		break;
+	}
+#endif
+
+	if (c->x86 == 15) {
+		set_bit(X86_FEATURE_P4, c->x86_capability);
+		set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
+	}
+	if (c->x86 == 6) 
+		set_bit(X86_FEATURE_P3, c->x86_capability);
+	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+		(c->x86 == 0x6 && c->x86_model >= 0x0e))
+		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+
+	if (cpu_has_ds) {
+		unsigned int l1;
+		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<11)))
+			set_bit(X86_FEATURE_BTS, c->x86_capability);
+		if (!(l1 & (1<<12)))
+			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+	}
+}
+
+static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+	/* Intel PIII Tualatin. This comes in two flavours.
+	 * One has 256kb of cache, the other 512. We have no way
+	 * to determine which, so we use a boottime override
+	 * for the 512kb model, and assume 256 otherwise.
+	 */
+	if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
+		size = 256;
+	return size;
+}
+
+static struct cpu_dev intel_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Intel",
+	.c_ident 	= { "GenuineIntel" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 
+		  { 
+			  [0] = "486 DX-25/33", 
+			  [1] = "486 DX-50", 
+			  [2] = "486 SX", 
+			  [3] = "486 DX/2", 
+			  [4] = "486 SL", 
+			  [5] = "486 SX/2", 
+			  [7] = "486 DX/2-WB", 
+			  [8] = "486 DX/4", 
+			  [9] = "486 DX/4-WB"
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
+		  { 
+			  [0] = "Pentium 60/66 A-step", 
+			  [1] = "Pentium 60/66", 
+			  [2] = "Pentium 75 - 200",
+			  [3] = "OverDrive PODP5V83", 
+			  [4] = "Pentium MMX",
+			  [7] = "Mobile Pentium 75 - 200", 
+			  [8] = "Mobile Pentium MMX"
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
+		  { 
+			  [0] = "Pentium Pro A-step",
+			  [1] = "Pentium Pro", 
+			  [3] = "Pentium II (Klamath)", 
+			  [4] = "Pentium II (Deschutes)", 
+			  [5] = "Pentium II (Deschutes)", 
+			  [6] = "Mobile Pentium II",
+			  [7] = "Pentium III (Katmai)", 
+			  [8] = "Pentium III (Coppermine)", 
+			  [10] = "Pentium III (Cascades)",
+			  [11] = "Pentium III (Tualatin)",
+		  }
+		},
+		{ .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
+		  {
+			  [0] = "Pentium 4 (Unknown)",
+			  [1] = "Pentium 4 (Willamette)",
+			  [2] = "Pentium 4 (Northwood)",
+			  [4] = "Pentium 4 (Foster)",
+			  [5] = "Pentium 4 (Foster)",
+		  }
+		},
+	},
+	.c_init		= init_intel,
+	.c_size_cache	= intel_size_cache,
+};
+
+__init int intel_cpu_init(void)
+{
+	cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
+	return 0;
+}
+
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+	u8 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u8 *)ptr;
+	if (prev == old)
+		*(u8 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+	u16 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u16 *)ptr;
+	if (prev == old)
+		*(u16 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+	u32 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u32 *)ptr;
+	if (prev == old)
+		*(u32 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+// arch_initcall(intel_cpu_init);
+
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
new file mode 100644
index 00000000000..db6c25aa577
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -0,0 +1,806 @@
+/*
+ *      Routines to indentify caches on Intel CPU.
+ *
+ *      Changes:
+ *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+#define LVL_1_INST	1
+#define LVL_1_DATA	2
+#define LVL_2		3
+#define LVL_3		4
+#define LVL_TRACE	5
+
+struct _cache_table
+{
+	unsigned char descriptor;
+	char cache_type;
+	short size;
+};
+
+/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+static struct _cache_table cache_table[] __cpuinitdata =
+{
+	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
+	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x29, LVL_3,      4096 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x46, LVL_3,      4096 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x47, LVL_3,      8192 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x49, LVL_3,      4096 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4a, LVL_3,      6144 },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
+	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
+	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
+	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
+	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
+	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7b, LVL_2,     512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7c, LVL_2,    1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7d, LVL_2,    2048 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x7f, LVL_2,     512 },	/* 2-way set assoc, 64 byte line size */
+	{ 0x82, LVL_2,     256 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x83, LVL_2,     512 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x84, LVL_2,    1024 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x00, 0, 0}
+};
+
+
+enum _cache_type
+{
+	CACHE_TYPE_NULL	= 0,
+	CACHE_TYPE_DATA = 1,
+	CACHE_TYPE_INST = 2,
+	CACHE_TYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+	struct {
+		enum _cache_type	type:5;
+		unsigned int		level:3;
+		unsigned int		is_self_initializing:1;
+		unsigned int		is_fully_associative:1;
+		unsigned int		reserved:4;
+		unsigned int		num_threads_sharing:12;
+		unsigned int		num_cores_on_die:6;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+	struct {
+		unsigned int		coherency_line_size:12;
+		unsigned int		physical_line_partition:10;
+		unsigned int		ways_of_associativity:10;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+	struct {
+		unsigned int		number_of_sets:32;
+	} split;
+	u32 full;
+};
+
+struct _cpuid4_info {
+	union _cpuid4_leaf_eax eax;
+	union _cpuid4_leaf_ebx ebx;
+	union _cpuid4_leaf_ecx ecx;
+	unsigned long size;
+	cpumask_t shared_cpu_map;
+};
+
+unsigned short			num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+   information to the user.  This makes some assumptions about the machine:
+   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+   In theory the TLBs could be reported as fake type (they are in "dummy").
+   Maybe later */
+union l1_cache {
+	struct {
+		unsigned line_size : 8;
+		unsigned lines_per_tag : 8;
+		unsigned assoc : 8;
+		unsigned size_in_kb : 8;
+	};
+	unsigned val;
+};
+
+union l2_cache {
+	struct {
+		unsigned line_size : 8;
+		unsigned lines_per_tag : 4;
+		unsigned assoc : 4;
+		unsigned size_in_kb : 16;
+	};
+	unsigned val;
+};
+
+union l3_cache {
+	struct {
+		unsigned line_size : 8;
+		unsigned lines_per_tag : 4;
+		unsigned assoc : 4;
+		unsigned res : 2;
+		unsigned size_encoded : 14;
+	};
+	unsigned val;
+};
+
+static const unsigned short assocs[] = {
+	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
+	[8] = 16, [0xa] = 32, [0xb] = 48,
+	[0xc] = 64,
+	[0xf] = 0xffff // ??
+};
+
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+		       union _cpuid4_leaf_ebx *ebx,
+		       union _cpuid4_leaf_ecx *ecx)
+{
+	unsigned dummy;
+	unsigned line_size, lines_per_tag, assoc, size_in_kb;
+	union l1_cache l1i, l1d;
+	union l2_cache l2;
+	union l3_cache l3;
+	union l1_cache *l1 = &l1d;
+
+	eax->full = 0;
+	ebx->full = 0;
+	ecx->full = 0;
+
+	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
+
+	switch (leaf) {
+	case 1:
+		l1 = &l1i;
+	case 0:
+		if (!l1->val)
+			return;
+		assoc = l1->assoc;
+		line_size = l1->line_size;
+		lines_per_tag = l1->lines_per_tag;
+		size_in_kb = l1->size_in_kb;
+		break;
+	case 2:
+		if (!l2.val)
+			return;
+		assoc = l2.assoc;
+		line_size = l2.line_size;
+		lines_per_tag = l2.lines_per_tag;
+		/* cpu_data has errata corrections for K7 applied */
+		size_in_kb = current_cpu_data.x86_cache_size;
+		break;
+	case 3:
+		if (!l3.val)
+			return;
+		assoc = l3.assoc;
+		line_size = l3.line_size;
+		lines_per_tag = l3.lines_per_tag;
+		size_in_kb = l3.size_encoded * 512;
+		break;
+	default:
+		return;
+	}
+
+	eax->split.is_self_initializing = 1;
+	eax->split.type = types[leaf];
+	eax->split.level = levels[leaf];
+	if (leaf == 3)
+		eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+	else
+		eax->split.num_threads_sharing = 0;
+	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+
+	if (assoc == 0xf)
+		eax->split.is_fully_associative = 1;
+	ebx->split.coherency_line_size = line_size - 1;
+	ebx->split.ways_of_associativity = assocs[assoc] - 1;
+	ebx->split.physical_line_partition = lines_per_tag - 1;
+	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+		(ebx->split.ways_of_associativity + 1) - 1;
+}
+
+static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+	union _cpuid4_leaf_eax 	eax;
+	union _cpuid4_leaf_ebx 	ebx;
+	union _cpuid4_leaf_ecx 	ecx;
+	unsigned		edx;
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		amd_cpuid4(index, &eax, &ebx, &ecx);
+	else
+		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+	if (eax.split.type == CACHE_TYPE_NULL)
+		return -EIO; /* better error ? */
+
+	this_leaf->eax = eax;
+	this_leaf->ebx = ebx;
+	this_leaf->ecx = ecx;
+	this_leaf->size = (ecx.split.number_of_sets + 1) *
+		(ebx.split.coherency_line_size + 1) *
+		(ebx.split.physical_line_partition + 1) *
+		(ebx.split.ways_of_associativity + 1);
+	return 0;
+}
+
+static int __cpuinit find_num_cache_leaves(void)
+{
+	unsigned int		eax, ebx, ecx, edx;
+	union _cpuid4_leaf_eax	cache_eax;
+	int 			i = -1;
+
+	do {
+		++i;
+		/* Do cpuid(4) loop to find out num_cache_leaves */
+		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+		cache_eax.full = eax;
+	} while (cache_eax.split.type != CACHE_TYPE_NULL);
+	return i;
+}
+
+unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_X86_HT
+	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+#endif
+
+	if (c->cpuid_level > 3) {
+		static int is_initialized;
+
+		if (is_initialized == 0) {
+			/* Init num_cache_leaves from boot CPU */
+			num_cache_leaves = find_num_cache_leaves();
+			is_initialized++;
+		}
+
+		/*
+		 * Whenever possible use cpuid(4), deterministic cache
+		 * parameters cpuid leaf to find the cache details
+		 */
+		for (i = 0; i < num_cache_leaves; i++) {
+			struct _cpuid4_info this_leaf;
+
+			int retval;
+
+			retval = cpuid4_cache_lookup(i, &this_leaf);
+			if (retval >= 0) {
+				switch(this_leaf.eax.split.level) {
+				    case 1:
+					if (this_leaf.eax.split.type ==
+							CACHE_TYPE_DATA)
+						new_l1d = this_leaf.size/1024;
+					else if (this_leaf.eax.split.type ==
+							CACHE_TYPE_INST)
+						new_l1i = this_leaf.size/1024;
+					break;
+				    case 2:
+					new_l2 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l2_id = c->apicid >> index_msb;
+					break;
+				    case 3:
+					new_l3 = this_leaf.size/1024;
+					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+					index_msb = get_count_order(num_threads_sharing);
+					l3_id = c->apicid >> index_msb;
+					break;
+				    default:
+					break;
+				}
+			}
+		}
+	}
+	/*
+	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+	 * trace cache
+	 */
+	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+		/* supports eax=2  call */
+		int i, j, n;
+		int regs[4];
+		unsigned char *dp = (unsigned char *)regs;
+		int only_trace = 0;
+
+		if (num_cache_leaves != 0 && c->x86 == 15)
+			only_trace = 1;
+
+		/* Number of times to iterate */
+		n = cpuid_eax(2) & 0xFF;
+
+		for ( i = 0 ; i < n ; i++ ) {
+			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+			/* If bit 31 is set, this is an unknown format */
+			for ( j = 0 ; j < 3 ; j++ ) {
+				if ( regs[j] < 0 ) regs[j] = 0;
+			}
+
+			/* Byte 0 is level count, not a descriptor */
+			for ( j = 1 ; j < 16 ; j++ ) {
+				unsigned char des = dp[j];
+				unsigned char k = 0;
+
+				/* look up this descriptor in the table */
+				while (cache_table[k].descriptor != 0)
+				{
+					if (cache_table[k].descriptor == des) {
+						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+							break;
+						switch (cache_table[k].cache_type) {
+						case LVL_1_INST:
+							l1i += cache_table[k].size;
+							break;
+						case LVL_1_DATA:
+							l1d += cache_table[k].size;
+							break;
+						case LVL_2:
+							l2 += cache_table[k].size;
+							break;
+						case LVL_3:
+							l3 += cache_table[k].size;
+							break;
+						case LVL_TRACE:
+							trace += cache_table[k].size;
+							break;
+						}
+
+						break;
+					}
+
+					k++;
+				}
+			}
+		}
+	}
+
+	if (new_l1d)
+		l1d = new_l1d;
+
+	if (new_l1i)
+		l1i = new_l1i;
+
+	if (new_l2) {
+		l2 = new_l2;
+#ifdef CONFIG_X86_HT
+		cpu_llc_id[cpu] = l2_id;
+#endif
+	}
+
+	if (new_l3) {
+		l3 = new_l3;
+#ifdef CONFIG_X86_HT
+		cpu_llc_id[cpu] = l3_id;
+#endif
+	}
+
+	if (trace)
+		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+	else if ( l1i )
+		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+
+	if (l1d)
+		printk(", L1 D cache: %dK\n", l1d);
+	else
+		printk("\n");
+
+	if (l2)
+		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+	if (l3)
+		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+
+	return l2;
+}
+
+/* pointer to _cpuid4_info array (for each cache leaf) */
+static struct _cpuid4_info *cpuid4_info[NR_CPUS];
+#define CPUID4_INFO_IDX(x,y)    (&((cpuid4_info[x])[y]))
+
+#ifdef CONFIG_SMP
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
+	unsigned long num_threads_sharing;
+	int index_msb, i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+
+	if (num_threads_sharing == 1)
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+	else {
+		index_msb = get_count_order(num_threads_sharing);
+
+		for_each_online_cpu(i) {
+			if (c[i].apicid >> index_msb ==
+			    c[cpu].apicid >> index_msb) {
+				cpu_set(i, this_leaf->shared_cpu_map);
+				if (i != cpu && cpuid4_info[i])  {
+					sibling_leaf = CPUID4_INFO_IDX(i, index);
+					cpu_set(cpu, sibling_leaf->shared_cpu_map);
+				}
+			}
+		}
+	}
+}
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+	struct _cpuid4_info	*this_leaf, *sibling_leaf;
+	int sibling;
+
+	this_leaf = CPUID4_INFO_IDX(cpu, index);
+	for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
+		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
+		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+	}
+}
+#else
+static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+#endif
+
+static void free_cache_attributes(unsigned int cpu)
+{
+	kfree(cpuid4_info[cpu]);
+	cpuid4_info[cpu] = NULL;
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+	struct _cpuid4_info	*this_leaf;
+	unsigned long 		j;
+	int 			retval;
+	cpumask_t		oldmask;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	cpuid4_info[cpu] = kzalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (cpuid4_info[cpu] == NULL)
+		return -ENOMEM;
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (retval)
+		goto out;
+
+	/* Do cpuid and store the results */
+	retval = 0;
+	for (j = 0; j < num_cache_leaves; j++) {
+		this_leaf = CPUID4_INFO_IDX(cpu, j);
+		retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(retval < 0))
+			break;
+		cache_shared_cpu_map_setup(cpu, j);
+	}
+	set_cpus_allowed(current, oldmask);
+
+out:
+	if (retval)
+		free_cache_attributes(cpu);
+	return retval;
+}
+
+#ifdef CONFIG_SYSFS
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
+
+/* pointer to kobject for cpuX/cache */
+static struct kobject * cache_kobject[NR_CPUS];
+
+struct _index_kobject {
+	struct kobject kobj;
+	unsigned int cpu;
+	unsigned short index;
+};
+
+/* pointer to array of kobjects for cpuX/cache/indexY */
+static struct _index_kobject *index_kobject[NR_CPUS];
+#define INDEX_KOBJECT_PTR(x,y)    (&((index_kobject[x])[y]))
+
+#define show_one_plus(file_name, object, val)				\
+static ssize_t show_##file_name						\
+			(struct _cpuid4_info *this_leaf, char *buf)	\
+{									\
+	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+}
+
+show_one_plus(level, eax.split.level, 0);
+show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
+show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
+show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
+show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
+
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+{
+	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+}
+
+static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
+{
+	char mask_str[NR_CPUS];
+	cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
+	return sprintf(buf, "%s\n", mask_str);
+}
+
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
+	switch(this_leaf->eax.split.type) {
+	    case CACHE_TYPE_DATA:
+		return sprintf(buf, "Data\n");
+		break;
+	    case CACHE_TYPE_INST:
+		return sprintf(buf, "Instruction\n");
+		break;
+	    case CACHE_TYPE_UNIFIED:
+		return sprintf(buf, "Unified\n");
+		break;
+	    default:
+		return sprintf(buf, "Unknown\n");
+		break;
+	}
+}
+
+struct _cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct _cpuid4_info *, char *);
+	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#define define_one_ro(_name) \
+static struct _cache_attr _name = \
+	__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(physical_line_partition);
+define_one_ro(ways_of_associativity);
+define_one_ro(number_of_sets);
+define_one_ro(size);
+define_one_ro(shared_cpu_map);
+
+static struct attribute * default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&physical_line_partition.attr,
+	&ways_of_associativity.attr,
+	&number_of_sets.attr,
+	&size.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct _cache_attr *fattr = to_attr(attr);
+	struct _index_kobject *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ?
+		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+			buf) :
+	       	0;
+	return ret;
+}
+
+static ssize_t store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	return 0;
+}
+
+static struct sysfs_ops sysfs_ops = {
+	.show   = show,
+	.store  = store,
+};
+
+static struct kobj_type ktype_cache = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
+static struct kobj_type ktype_percpu_entry = {
+	.sysfs_ops	= &sysfs_ops,
+};
+
+static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+{
+	kfree(cache_kobject[cpu]);
+	kfree(index_kobject[cpu]);
+	cache_kobject[cpu] = NULL;
+	index_kobject[cpu] = NULL;
+	free_cache_attributes(cpu);
+}
+
+static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
+{
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	detect_cache_attributes(cpu);
+	if (cpuid4_info[cpu] == NULL)
+		return -ENOENT;
+
+	/* Allocate all required memory */
+	cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+	if (unlikely(cache_kobject[cpu] == NULL))
+		goto err_out;
+
+	index_kobject[cpu] = kzalloc(
+	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+	if (unlikely(index_kobject[cpu] == NULL))
+		goto err_out;
+
+	return 0;
+
+err_out:
+	cpuid4_cache_sysfs_exit(cpu);
+	return -ENOMEM;
+}
+
+/* Add/Remove cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct _index_kobject *this_object;
+	int retval = 0;
+
+	retval = cpuid4_cache_sysfs_init(cpu);
+	if (unlikely(retval < 0))
+		return retval;
+
+	cache_kobject[cpu]->parent = &sys_dev->kobj;
+	kobject_set_name(cache_kobject[cpu], "%s", "cache");
+	cache_kobject[cpu]->ktype = &ktype_percpu_entry;
+	retval = kobject_register(cache_kobject[cpu]);
+
+	for (i = 0; i < num_cache_leaves; i++) {
+		this_object = INDEX_KOBJECT_PTR(cpu,i);
+		this_object->cpu = cpu;
+		this_object->index = i;
+		this_object->kobj.parent = cache_kobject[cpu];
+		kobject_set_name(&(this_object->kobj), "index%1lu", i);
+		this_object->kobj.ktype = &ktype_cache;
+		retval = kobject_register(&(this_object->kobj));
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_unregister(
+					&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_unregister(cache_kobject[cpu]);
+			cpuid4_cache_sysfs_exit(cpu);
+			break;
+		}
+	}
+	return retval;
+}
+
+static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	if (cpuid4_info[cpu] == NULL)
+		return;
+	for (i = 0; i < num_cache_leaves; i++) {
+		cache_remove_shared_cpu_map(cpu, i);
+		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+	}
+	kobject_unregister(cache_kobject[cpu]);
+	cpuid4_cache_sysfs_exit(cpu);
+	return;
+}
+
+static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		cache_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		cache_remove_dev(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
+{
+    .notifier_call = cacheinfo_cpu_callback,
+};
+
+static int __cpuinit cache_sysfs_init(void)
+{
+	int i;
+
+	if (num_cache_leaves == 0)
+		return 0;
+
+	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
+
+	for_each_online_cpu(i) {
+		cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
+			(void *)(long)i);
+	}
+
+	return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
new file mode 100644
index 00000000000..f1ebe1c1c17
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -0,0 +1,2 @@
+obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
+obj-$(CONFIG_X86_MCE_NONFATAL)	+=	non-fatal.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
new file mode 100644
index 00000000000..eef63e3630c
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -0,0 +1,102 @@
+/*
+ * Athlon/Hammer specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For AMD Athlon/Duron */
+static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	for (i=1; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high&(1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+			/* Clear it */
+			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+
+	if (recover&2)
+		panic ("CPU context corrupt");
+	if (recover&1)
+		panic ("Unable to continue");
+	printk (KERN_EMERG "Attempting to continue.\n");
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+/* AMD K7 machine check is Intel like */
+void amd_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
+
+	machine_check_vector = k7_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	/* Clear status for MC index 0 separately, we don't touch CTL,
+	 * as some K7 Athlons cause spurious MCEs when its enabled. */
+	if (boot_cpu_data.x86 == 6) {
+		wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+		i = 1;
+	} else
+		i = 0;
+	for (; i<nr_mce_banks; i++) {
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	}
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
new file mode 100644
index 00000000000..34c781eddee
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,90 @@
+/*
+ * mce.c - x86 Machine Check Exception Reporting
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/mce.h>
+
+#include "mce.h"
+
+int mce_disabled = 0;
+int nr_mce_banks;
+
+EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+
+/* Handle unconfigured int18 (should never happen) */
+static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
+{	
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled==1)
+		return;
+
+	switch (c->x86_vendor) {
+		case X86_VENDOR_AMD:
+			amd_mcheck_init(c);
+			break;
+
+		case X86_VENDOR_INTEL:
+			if (c->x86==5)
+				intel_p5_mcheck_init(c);
+			if (c->x86==6)
+				intel_p6_mcheck_init(c);
+			if (c->x86==15)
+				intel_p4_mcheck_init(c);
+			break;
+
+		case X86_VENDOR_CENTAUR:
+			if (c->x86==5)
+				winchip_mcheck_init(c);
+			break;
+
+		default:
+			break;
+	}
+}
+
+static unsigned long old_cr4 __initdata;
+
+void __init stop_mce(void)
+{
+	old_cr4 = read_cr4();
+	clear_in_cr4(X86_CR4_MCE);
+}
+
+void __init restart_mce(void)
+{
+	if (old_cr4 & X86_CR4_MCE)
+		set_in_cr4(X86_CR4_MCE);
+}
+
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+
+static int __init mcheck_enable(char *str)
+{
+	mce_disabled = -1;
+	return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
new file mode 100644
index 00000000000..81fb6e2d35f
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <asm/mce.h>
+
+void amd_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+
+/* Call the installed machine check handler for this CPU setup. */
+extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
+
+extern int nr_mce_banks;
+
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
new file mode 100644
index 00000000000..bf39409b383
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -0,0 +1,91 @@
+/*
+ * Non Fatal Machine Check Exception Reporting
+ *
+ * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ *
+ * This file contains routines to check for non-fatal MCEs every 15s
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+static int firstbank;
+
+#define MCE_RATE	15*HZ	/* timer rate is 15s */
+
+static void mce_checkregs (void *info)
+{
+	u32 low, high;
+	int i;
+
+	for (i=firstbank; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+
+		if (high & (1<<31)) {
+			printk(KERN_INFO "MCE: The hardware reports a non "
+				"fatal, correctable incident occurred on "
+				"CPU %d.\n",
+				smp_processor_id());
+			printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
+			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+}
+
+static void mce_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
+
+static void mce_work_fn(struct work_struct *work)
+{ 
+	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
+} 
+
+static int __init init_nonfatal_mce_checker(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	/* Check for MCE support */
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return -ENODEV;
+
+	/* Check for PPro style MCA */
+	if (!cpu_has(c, X86_FEATURE_MCA))
+		return -ENODEV;
+
+	/* Some Athlons misbehave when we frob bank 0 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		boot_cpu_data.x86 == 6)
+			firstbank = 1;
+	else
+			firstbank = 0;
+
+	/*
+	 * Check for non-fatal errors every MCE_RATE s
+	 */
+	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
+	printk(KERN_INFO "Machine check exception polling timer started.\n");
+	return 0;
+}
+module_init(init_nonfatal_mce_checker);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
new file mode 100644
index 00000000000..1509edfb231
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -0,0 +1,253 @@
+/*
+ * P4 specific Machine Check Exception Reporting
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include <asm/therm_throt.h>
+
+#include "mce.h"
+
+/* as supported by the P4/Xeon family */
+struct intel_mce_extended_msrs {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+	u32 esi;
+	u32 edi;
+	u32 ebp;
+	u32 esp;
+	u32 eflags;
+	u32 eip;
+	/* u32 *reserved[]; */
+};
+
+static int mce_num_extended_msrs = 0;
+
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+static void unexpected_thermal_interrupt(struct pt_regs *regs)
+{	
+	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+			smp_processor_id());
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct pt_regs *regs)
+{
+	__u64 msr_val;
+
+	ack_APIC_irq();
+
+	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+	therm_throt_process(msr_val & 0x1);
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	vendor_thermal_interrupt(regs);
+	irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	unsigned int cpu = smp_processor_id();
+
+	/* Thermal monitoring */
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;	/* -ENODEV */
+
+	/* Clock modulation */
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;	/* -ENODEV */
+
+	/* first check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already -zwanem.
+	 */
+	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+				cpu);
+		return; /* -EBUSY */
+	}
+
+	/* check whether a vector already exists, temporarily masked? */	
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
+				"installed\n",
+			cpu, (h & APIC_VECTOR_MASK));
+		return; /* -EBUSY */
+	}
+
+	/* The temperature transition interrupt handler setup */
+	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
+	apic_write_around(APIC_LVTTHMR, h);
+
+	rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+	/* ok we're good to go... */
+	vendor_thermal_interrupt = intel_thermal_interrupt;
+	
+	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+	l = apic_read (APIC_LVTTHMR);
+	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
+	return;
+}
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+
+
+/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
+static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+{
+	u32 h;
+
+	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
+	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
+	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
+	rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
+	rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
+	rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
+	rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
+	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
+	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
+	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+}
+
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	if (mce_num_extended_msrs > 0) {
+		struct intel_mce_extended_msrs dbg;
+		intel_get_extended_msrs(&dbg);
+		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
+			smp_processor_id(), dbg.eip, dbg.eflags);
+		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
+			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
+		printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
+	}
+
+	for (i=0; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high & (1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+		}
+	}
+
+	if (recover & 2)
+		panic ("CPU context corrupt");
+	if (recover & 1)
+		panic ("Unable to continue");
+
+	printk(KERN_EMERG "Attempting to continue.\n");
+	/* 
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	 * recoverable/continuable.This will allow BIOS to look at the MSRs
+	 * for errors if the OS could not log the error.
+	 */
+	for (i=0; i<nr_mce_banks; i++) {
+		u32 msr;
+		msr = MSR_IA32_MC0_STATUS+i*4;
+		rdmsr (msr, low, high);
+		if (high&(1<<31)) {
+			/* Clear it */
+			wrmsr(msr, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+	
+	machine_check_vector = intel_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	for (i=0; i<nr_mce_banks; i++) {
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	}
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+
+	/* Check for P4/Xeon extended MCE MSRs */
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<9))	{/* MCG_EXT_P */
+		mce_num_extended_msrs = (l >> 16) & 0xff;
+		printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+				" available\n",
+			smp_processor_id(), mce_num_extended_msrs);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+		/* Check for P4/Xeon Thermal monitor */
+		intel_init_thermal(c);
+#endif
+	}
+}
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
new file mode 100644
index 00000000000..94bc43d950c
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -0,0 +1,53 @@
+/*
+ * P5 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for Pentium class Intel */
+static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
+{
+	u32 loaddr, hi, lotype;
+	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
+	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
+	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
+	if(lotype&(1<<5))
+		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	
+	/*Check for MCE support */
+	if( !cpu_has(c, X86_FEATURE_MCE) )
+		return;	
+
+	/* Default P5 to off as its often misconnected */
+	if(mce_disabled != -1)
+		return;
+	machine_check_vector = pentium_machine_check;
+	wmb();
+
+	/* Read registers before enabling */
+	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
+	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
+	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+
+ 	/* Enable MCE */
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
new file mode 100644
index 00000000000..deeae42ce19
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -0,0 +1,119 @@
+/*
+ * P6 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For PII/PIII */
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+	int recover=1;
+	u32 alow, ahigh, high, low;
+	u32 mcgstl, mcgsth;
+	int i;
+
+	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	if (mcgstl & (1<<0))	/* Recoverable ? */
+		recover=0;
+
+	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+		smp_processor_id(), mcgsth, mcgstl);
+
+	for (i=0; i<nr_mce_banks; i++) {
+		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+		if (high & (1<<31)) {
+			if (high & (1<<29))
+				recover |= 1;
+			if (high & (1<<25))
+				recover |= 2;
+			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+			high &= ~(1<<31);
+			if (high & (1<<27)) {
+				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+				printk ("[%08x%08x]", ahigh, alow);
+			}
+			if (high & (1<<26)) {
+				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+				printk (" at %08x%08x", ahigh, alow);
+			}
+			printk ("\n");
+		}
+	}
+
+	if (recover & 2)
+		panic ("CPU context corrupt");
+	if (recover & 1)
+		panic ("Unable to continue");
+
+	printk (KERN_EMERG "Attempting to continue.\n");
+	/* 
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	 * recoverable/continuable.This will allow BIOS to look at the MSRs
+	 * for errors if the OS could not log the error.
+	 */
+	for (i=0; i<nr_mce_banks; i++) {
+		unsigned int msr;
+		msr = MSR_IA32_MC0_STATUS+i*4;
+		rdmsr (msr,low, high);
+		if (high & (1<<31)) {
+			/* Clear it */
+			wrmsr (msr, 0UL, 0UL);
+			/* Serialize */
+			wmb();
+			add_taint(TAINT_MACHINE_CHECK);
+		}
+	}
+	mcgstl &= ~(1<<2);
+	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int i;
+	
+	/* Check for MCE support */
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
+
+	/* Check for PPro style MCA */
+ 	if (!cpu_has(c, X86_FEATURE_MCA))
+		return;
+
+	/* Ok machine check is available */
+	machine_check_vector = intel_machine_check;
+	wmb();
+
+	printk (KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	if (l & (1<<8))	/* Control register present ? */
+		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+	nr_mce_banks = l & 0xff;
+
+	/*
+	 * Following the example in IA-32 SDM Vol 3:
+	 * - MC0_CTL should not be written
+	 * - Status registers on all banks should be cleared on reset
+	 */
+	for (i=1; i<nr_mce_banks; i++)
+		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+
+	for (i=0; i<nr_mce_banks; i++)
+		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+
+	set_in_cr4 (X86_CR4_MCE);
+	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 00000000000..1203dc5ab87
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,186 @@
+/*
+ * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
+ *
+ * Thermal throttle event support code (such as syslog messaging and rate
+ * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ * This allows consistent reporting of CPU thermal throttle events.
+ *
+ * Maintains a counter in /sys that keeps track of the number of thermal
+ * events, such that the user knows how bad the thermal problem might be
+ * (since the logging to syslog and mcelog is rate limited).
+ *
+ * Author: Dmitriy Zavin (dmitriyz@google.com)
+ *
+ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
+ *          Inspired by Ross Biro's and Al Borchers' counter code.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <asm/cpu.h>
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
+#include <asm/therm_throt.h>
+
+/* How long to wait between reporting thermal events */
+#define CHECK_INTERVAL              (300 * HZ)
+
+static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+#define define_therm_throt_sysdev_one_ro(_name)                              \
+        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+
+#define define_therm_throt_sysdev_show_func(name)                            \
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
+                                              char *buf)                     \
+{                                                                            \
+	unsigned int cpu = dev->id;                                          \
+	ssize_t ret;                                                         \
+                                                                             \
+	preempt_disable();              /* CPU hotplug */                    \
+	if (cpu_online(cpu))                                                 \
+		ret = sprintf(buf, "%lu\n",                                  \
+			      per_cpu(thermal_throttle_##name, cpu));        \
+	else                                                                 \
+		ret = 0;                                                     \
+	preempt_enable();                                                    \
+                                                                             \
+	return ret;                                                          \
+}
+
+define_therm_throt_sysdev_show_func(count);
+define_therm_throt_sysdev_one_ro(count);
+
+static struct attribute *thermal_throttle_attrs[] = {
+	&attr_count.attr,
+	NULL
+};
+
+static struct attribute_group thermal_throttle_attr_group = {
+	.attrs = thermal_throttle_attrs,
+	.name = "thermal_throttle"
+};
+#endif /* CONFIG_SYSFS */
+
+/***
+ * therm_throt_process - Process thermal throttling event from interrupt
+ * @curr: Whether the condition is current or not (boolean), since the
+ *        thermal interrupt normally gets called both when the thermal
+ *        event begins and once the event has ended.
+ *
+ * This function is called by the thermal interrupt after the
+ * IRQ has been acknowledged.
+ *
+ * It will take care of rate limiting and printing messages to the syslog.
+ *
+ * Returns: 0 : Event should NOT be further logged, i.e. still in
+ *              "timeout" from previous log message.
+ *          1 : Event should be logged further, and a message has been
+ *              printed to the syslog.
+ */
+int therm_throt_process(int curr)
+{
+	unsigned int cpu = smp_processor_id();
+	__u64 tmp_jiffs = get_jiffies_64();
+
+	if (curr)
+		__get_cpu_var(thermal_throttle_count)++;
+
+	if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+		return 0;
+
+	__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+
+	/* if we just entered the thermal event */
+	if (curr) {
+		printk(KERN_CRIT "CPU%d: Temperature above threshold, "
+		       "cpu clock throttled (total events = %lu)\n", cpu,
+		       __get_cpu_var(thermal_throttle_count));
+
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+	}
+
+	return 1;
+}
+
+#ifdef CONFIG_SYSFS
+/* Add/Remove thermal_throttle interface for CPU device */
+static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
+{
+	return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+}
+
+static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
+{
+	return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+}
+
+/* Mutex protecting device creation against CPU hotplug */
+static DEFINE_MUTEX(therm_cpu_lock);
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
+						   unsigned long action,
+						   void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+	int err;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		mutex_lock(&therm_cpu_lock);
+		err = thermal_throttle_add_dev(sys_dev);
+		mutex_unlock(&therm_cpu_lock);
+		WARN_ON(err);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		mutex_lock(&therm_cpu_lock);
+		thermal_throttle_remove_dev(sys_dev);
+		mutex_unlock(&therm_cpu_lock);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block thermal_throttle_cpu_notifier =
+{
+	.notifier_call = thermal_throttle_cpu_callback,
+};
+
+static __init int thermal_throttle_init_device(void)
+{
+	unsigned int cpu = 0;
+	int err;
+
+	if (!atomic_read(&therm_throt_en))
+		return 0;
+
+	register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	mutex_lock(&therm_cpu_lock);
+#endif
+	/* connect live CPUs to sysfs */
+	for_each_online_cpu(cpu) {
+		err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+		WARN_ON(err);
+	}
+#ifdef CONFIG_HOTPLUG_CPU
+	mutex_unlock(&therm_cpu_lock);
+#endif
+
+	return 0;
+}
+
+device_initcall(thermal_throttle_init_device);
+#endif /* CONFIG_SYSFS */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
new file mode 100644
index 00000000000..9e424b6c293
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -0,0 +1,36 @@
+/*
+ * IDT Winchip specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for WinChip C6 */
+static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
+{
+	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+	add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting on the Winchip C6 series */
+void winchip_mcheck_init(struct cpuinfo_x86 *c)
+{
+	u32 lo, hi;
+	machine_check_vector = winchip_machine_check;
+	wmb();
+	rdmsr(MSR_IDT_FCR1, lo, hi);
+	lo|= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
+	lo&= ~(1<<4);	/* Enable MCE */
+	wrmsr(MSR_IDT_FCR1, lo, hi);
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+}
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
new file mode 100644
index 00000000000..191fc053364
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -0,0 +1,3 @@
+obj-y		:= main.o if.o generic.o state.o
+obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
+
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
new file mode 100644
index 00000000000..0949cdbf848
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -0,0 +1,121 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
+#include "mtrr.h"
+
+static void
+amd_get_mtrr(unsigned int reg, unsigned long *base,
+	     unsigned long *size, mtrr_type * type)
+{
+	unsigned long low, high;
+
+	rdmsr(MSR_K6_UWCCR, low, high);
+	/*  Upper dword is region 1, lower is region 0  */
+	if (reg == 1)
+		low = high;
+	/*  The base masks off on the right alignment  */
+	*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
+	*type = 0;
+	if (low & 1)
+		*type = MTRR_TYPE_UNCACHABLE;
+	if (low & 2)
+		*type = MTRR_TYPE_WRCOMB;
+	if (!(low & 3)) {
+		*size = 0;
+		return;
+	}
+	/*
+	 *  This needs a little explaining. The size is stored as an
+	 *  inverted mask of bits of 128K granularity 15 bits long offset
+	 *  2 bits
+	 *
+	 *  So to get a size we do invert the mask and add 1 to the lowest
+	 *  mask bit (4 as its 2 bits in). This gives us a size we then shift
+	 *  to turn into 128K blocks
+	 *
+	 *  eg              111 1111 1111 1100      is 512K
+	 *
+	 *  invert          000 0000 0000 0011
+	 *  +1              000 0000 0000 0100
+	 *  *128K   ...
+	 */
+	low = (~low) & 0x1FFFC;
+	*size = (low + 4) << (15 - PAGE_SHIFT);
+	return;
+}
+
+static void amd_set_mtrr(unsigned int reg, unsigned long base,
+			 unsigned long size, mtrr_type type)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+	u32 regs[2];
+
+	/*
+	 *  Low is MTRR0 , High MTRR 1
+	 */
+	rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+	/*
+	 *  Blank to disable
+	 */
+	if (size == 0)
+		regs[reg] = 0;
+	else
+		/* Set the register to the base, the type (off by one) and an
+		   inverted bitmask of the size The size is the only odd
+		   bit. We are fed say 512K We invert this and we get 111 1111
+		   1111 1011 but if you subtract one and invert you get the   
+		   desired 111 1111 1111 1100 mask
+
+		   But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!  */
+		regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
+		    | (base << PAGE_SHIFT) | (type + 1);
+
+	/*
+	 *  The writeback rule is quite specific. See the manual. Its
+	 *  disable local interrupts, write back the cache, set the mtrr
+	 */
+	wbinvd();
+	wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+}
+
+static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+{
+	/* Apply the K6 block alignment and size rules
+	   In order
+	   o Uncached or gathering only
+	   o 128K or bigger block
+	   o Power of 2 block
+	   o base suitably aligned to the power
+	*/
+	if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
+	    || (size & ~(size - 1)) - size || (base & (size - 1)))
+		return -EINVAL;
+	return 0;
+}
+
+static struct mtrr_ops amd_mtrr_ops = {
+	.vendor            = X86_VENDOR_AMD,
+	.set               = amd_set_mtrr,
+	.get               = amd_get_mtrr,
+	.get_free_region   = generic_get_free_region,
+	.validate_add_page = amd_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init amd_init_mtrr(void)
+{
+	set_mtrr_ops(&amd_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(amd_mtrr_init);
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
new file mode 100644
index 00000000000..cb9aa3a7a7a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -0,0 +1,224 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+static struct {
+	unsigned long high;
+	unsigned long low;
+} centaur_mcr[8];
+
+static u8 centaur_mcr_reserved;
+static u8 centaur_mcr_type;	/* 0 for winchip, 1 for winchip2 */
+
+/*
+ *	Report boot time MCR setups 
+ */
+
+static int
+centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+
+	max = num_var_ranges;
+	if (replace_reg >= 0 && replace_reg < max)
+		return replace_reg;
+	for (i = 0; i < max; ++i) {
+		if (centaur_mcr_reserved & (1 << i))
+			continue;
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return i;
+	}
+	return -ENOSPC;
+}
+
+void
+mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+{
+	centaur_mcr[mcr].low = lo;
+	centaur_mcr[mcr].high = hi;
+}
+
+static void
+centaur_get_mcr(unsigned int reg, unsigned long *base,
+		unsigned long *size, mtrr_type * type)
+{
+	*base = centaur_mcr[reg].high >> PAGE_SHIFT;
+	*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
+	*type = MTRR_TYPE_WRCOMB;	/*  If it is there, it is write-combining  */
+	if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
+		*type = MTRR_TYPE_UNCACHABLE;
+	if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
+		*type = MTRR_TYPE_WRBACK;
+	if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
+		*type = MTRR_TYPE_WRBACK;
+
+}
+
+static void centaur_set_mcr(unsigned int reg, unsigned long base,
+			    unsigned long size, mtrr_type type)
+{
+	unsigned long low, high;
+
+	if (size == 0) {
+		/*  Disable  */
+		high = low = 0;
+	} else {
+		high = base << PAGE_SHIFT;
+		if (centaur_mcr_type == 0)
+			low = -size << PAGE_SHIFT | 0x1f;	/* only support write-combining... */
+		else {
+			if (type == MTRR_TYPE_UNCACHABLE)
+				low = -size << PAGE_SHIFT | 0x02;	/* NC */
+			else
+				low = -size << PAGE_SHIFT | 0x09;	/* WWO,WC */
+		}
+	}
+	centaur_mcr[reg].high = high;
+	centaur_mcr[reg].low = low;
+	wrmsr(MSR_IDT_MCR0 + reg, low, high);
+}
+
+#if 0
+/*
+ *	Initialise the later (saner) Winchip MCR variant. In this version
+ *	the BIOS can pass us the registers it has used (but not their values)
+ *	and the control register is read/write
+ */
+
+static void __init
+centaur_mcr1_init(void)
+{
+	unsigned i;
+	u32 lo, hi;
+
+	/* Unfortunately, MCR's are read-only, so there is no way to
+	 * find out what the bios might have done.
+	 */
+
+	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+	if (((lo >> 17) & 7) == 1) {	/* Type 1 Winchip2 MCR */
+		lo &= ~0x1C0;	/* clear key */
+		lo |= 0x040;	/* set key to 1 */
+		wrmsr(MSR_IDT_MCR_CTRL, lo, hi);	/* unlock MCR */
+	}
+
+	centaur_mcr_type = 1;
+
+	/*
+	 *  Clear any unconfigured MCR's.
+	 */
+
+	for (i = 0; i < 8; ++i) {
+		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
+			if (!(lo & (1 << (9 + i))))
+				wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+			else
+				/*
+				 *      If the BIOS set up an MCR we cannot see it
+				 *      but we don't wish to obliterate it
+				 */
+				centaur_mcr_reserved |= (1 << i);
+		}
+	}
+	/*  
+	 *  Throw the main write-combining switch... 
+	 *  However if OOSTORE is enabled then people have already done far
+	 *  cleverer things and we should behave. 
+	 */
+
+	lo |= 15;		/* Write combine enables */
+	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+/*
+ *	Initialise the original winchip with read only MCR registers
+ *	no used bitmask for the BIOS to pass on and write only control
+ */
+
+static void __init
+centaur_mcr0_init(void)
+{
+	unsigned i;
+
+	/* Unfortunately, MCR's are read-only, so there is no way to
+	 * find out what the bios might have done.
+	 */
+
+	/* Clear any unconfigured MCR's.
+	 * This way we are sure that the centaur_mcr array contains the actual
+	 * values. The disadvantage is that any BIOS tweaks are thus undone.
+	 *
+	 */
+	for (i = 0; i < 8; ++i) {
+		if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
+			wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+	}
+
+	wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);	/* Write only */
+}
+
+/*
+ *	Initialise Winchip series MCR registers
+ */
+
+static void __init
+centaur_mcr_init(void)
+{
+	struct set_mtrr_context ctxt;
+
+	set_mtrr_prepare_save(&ctxt);
+	set_mtrr_cache_disable(&ctxt);
+
+	if (boot_cpu_data.x86_model == 4)
+		centaur_mcr0_init();
+	else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
+		centaur_mcr1_init();
+
+	set_mtrr_done(&ctxt);
+}
+#endif
+
+static int centaur_validate_add_page(unsigned long base, 
+				     unsigned long size, unsigned int type)
+{
+	/*
+	 *  FIXME: Winchip2 supports uncached
+	 */
+	if (type != MTRR_TYPE_WRCOMB && 
+	    (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
+		printk(KERN_WARNING
+		       "mtrr: only write-combining%s supported\n",
+		       centaur_mcr_type ? " and uncacheable are"
+		       : " is");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct mtrr_ops centaur_mtrr_ops = {
+	.vendor            = X86_VENDOR_CENTAUR,
+//	.init              = centaur_mcr_init,
+	.set               = centaur_set_mcr,
+	.get               = centaur_get_mcr,
+	.get_free_region   = centaur_get_free_region,
+	.validate_add_page = centaur_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init centaur_init_mtrr(void)
+{
+	set_mtrr_ops(&centaur_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(centaur_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
new file mode 100644
index 00000000000..2287d4863a8
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -0,0 +1,380 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/processor-cyrix.h>
+#include "mtrr.h"
+
+int arr3_protected;
+
+static void
+cyrix_get_arr(unsigned int reg, unsigned long *base,
+	      unsigned long *size, mtrr_type * type)
+{
+	unsigned long flags;
+	unsigned char arr, ccr3, rcr, shift;
+
+	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+
+	/* Save flags and disable interrupts */
+	local_irq_save(flags);
+
+	ccr3 = getCx86(CX86_CCR3);
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
+	((unsigned char *) base)[3] = getCx86(arr);
+	((unsigned char *) base)[2] = getCx86(arr + 1);
+	((unsigned char *) base)[1] = getCx86(arr + 2);
+	rcr = getCx86(CX86_RCR_BASE + reg);
+	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
+
+	/* Enable interrupts if it was enabled previously */
+	local_irq_restore(flags);
+	shift = ((unsigned char *) base)[1] & 0x0f;
+	*base >>= PAGE_SHIFT;
+
+	/* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+	 * Note: shift==0xf means 4G, this is unsupported.
+	 */
+	if (shift)
+		*size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
+	else
+		*size = 0;
+
+	/* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
+	if (reg < 7) {
+		switch (rcr) {
+		case 1:
+			*type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 8:
+			*type = MTRR_TYPE_WRBACK;
+			break;
+		case 9:
+			*type = MTRR_TYPE_WRCOMB;
+			break;
+		case 24:
+		default:
+			*type = MTRR_TYPE_WRTHROUGH;
+			break;
+		}
+	} else {
+		switch (rcr) {
+		case 0:
+			*type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 8:
+			*type = MTRR_TYPE_WRCOMB;
+			break;
+		case 9:
+			*type = MTRR_TYPE_WRBACK;
+			break;
+		case 25:
+		default:
+			*type = MTRR_TYPE_WRTHROUGH;
+			break;
+		}
+	}
+}
+
+static int
+cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
+/*  [SUMMARY] Get a free ARR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+
+	switch (replace_reg) {
+	case 7:
+		if (size < 0x40)
+			break;
+	case 6:
+	case 5:
+	case 4:
+		return replace_reg;
+	case 3:
+		if (arr3_protected)
+			break;
+	case 2:
+	case 1:
+	case 0:
+		return replace_reg;
+	}
+	/* If we are to set up a region >32M then look at ARR7 immediately */
+	if (size > 0x2000) {
+		cyrix_get_arr(7, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return 7;
+		/*  Else try ARR0-ARR6 first  */
+	} else {
+		for (i = 0; i < 7; i++) {
+			cyrix_get_arr(i, &lbase, &lsize, &ltype);
+			if ((i == 3) && arr3_protected)
+				continue;
+			if (lsize == 0)
+				return i;
+		}
+		/* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
+		cyrix_get_arr(i, &lbase, &lsize, &ltype);
+		if ((lsize == 0) && (size >= 0x40))
+			return i;
+	}
+	return -ENOSPC;
+}
+
+static u32 cr4 = 0;
+static u32 ccr3;
+
+static void prepare_set(void)
+{
+	u32 cr0;
+
+	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+	if ( cpu_has_pge ) {
+		cr4 = read_cr4();
+		write_cr4(cr4 & ~X86_CR4_PGE);
+	}
+
+	/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
+	    a side-effect  */
+	cr0 = read_cr0() | 0x40000000;
+	wbinvd();
+	write_cr0(cr0);
+	wbinvd();
+
+	/* Cyrix ARRs - everything else were excluded at the top */
+	ccr3 = getCx86(CX86_CCR3);
+
+	/* Cyrix ARRs - everything else were excluded at the top */
+	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
+
+}
+
+static void post_set(void)
+{
+	/*  Flush caches and TLBs  */
+	wbinvd();
+
+	/* Cyrix ARRs - everything else was excluded at the top */
+	setCx86(CX86_CCR3, ccr3);
+		
+	/*  Enable caches  */
+	write_cr0(read_cr0() & 0xbfffffff);
+
+	/*  Restore value of CR4  */
+	if ( cpu_has_pge )
+		write_cr4(cr4);
+}
+
+static void cyrix_set_arr(unsigned int reg, unsigned long base,
+			  unsigned long size, mtrr_type type)
+{
+	unsigned char arr, arr_type, arr_size;
+
+	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+
+	/* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
+	if (reg >= 7)
+		size >>= 6;
+
+	size &= 0x7fff;		/* make sure arr_size <= 14 */
+	for (arr_size = 0; size; arr_size++, size >>= 1) ;
+
+	if (reg < 7) {
+		switch (type) {
+		case MTRR_TYPE_UNCACHABLE:
+			arr_type = 1;
+			break;
+		case MTRR_TYPE_WRCOMB:
+			arr_type = 9;
+			break;
+		case MTRR_TYPE_WRTHROUGH:
+			arr_type = 24;
+			break;
+		default:
+			arr_type = 8;
+			break;
+		}
+	} else {
+		switch (type) {
+		case MTRR_TYPE_UNCACHABLE:
+			arr_type = 0;
+			break;
+		case MTRR_TYPE_WRCOMB:
+			arr_type = 8;
+			break;
+		case MTRR_TYPE_WRTHROUGH:
+			arr_type = 25;
+			break;
+		default:
+			arr_type = 9;
+			break;
+		}
+	}
+
+	prepare_set();
+
+	base <<= PAGE_SHIFT;
+	setCx86(arr, ((unsigned char *) &base)[3]);
+	setCx86(arr + 1, ((unsigned char *) &base)[2]);
+	setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
+	setCx86(CX86_RCR_BASE + reg, arr_type);
+
+	post_set();
+}
+
+typedef struct {
+	unsigned long base;
+	unsigned long size;
+	mtrr_type type;
+} arr_state_t;
+
+static arr_state_t arr_state[8] = {
+	{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
+	{0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
+};
+
+static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
+
+static void cyrix_set_all(void)
+{
+	int i;
+
+	prepare_set();
+
+	/* the CCRs are not contiguous */
+	for (i = 0; i < 4; i++)
+		setCx86(CX86_CCR0 + i, ccr_state[i]);
+	for (; i < 7; i++)
+		setCx86(CX86_CCR4 + i, ccr_state[i]);
+	for (i = 0; i < 8; i++)
+		cyrix_set_arr(i, arr_state[i].base, 
+			      arr_state[i].size, arr_state[i].type);
+
+	post_set();
+}
+
+#if 0
+/*
+ * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
+ * with the SMM (System Management Mode) mode. So we need the following:
+ * Check whether SMI_LOCK (CCR3 bit 0) is set
+ *   if it is set, write a warning message: ARR3 cannot be changed!
+ *     (it cannot be changed until the next processor reset)
+ *   if it is reset, then we can change it, set all the needed bits:
+ *   - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
+ *   - disable access to SMM memory (CCR1 bit 2 reset)
+ *   - disable SMM mode (CCR1 bit 1 reset)
+ *   - disable write protection of ARR3 (CCR6 bit 1 reset)
+ *   - (maybe) disable ARR3
+ * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
+ */
+static void __init
+cyrix_arr_init(void)
+{
+	struct set_mtrr_context ctxt;
+	unsigned char ccr[7];
+	int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
+#ifdef CONFIG_SMP
+	int i;
+#endif
+
+	/* flush cache and enable MAPEN */
+	set_mtrr_prepare_save(&ctxt);
+	set_mtrr_cache_disable(&ctxt);
+
+	/* Save all CCRs locally */
+	ccr[0] = getCx86(CX86_CCR0);
+	ccr[1] = getCx86(CX86_CCR1);
+	ccr[2] = getCx86(CX86_CCR2);
+	ccr[3] = ctxt.ccr3;
+	ccr[4] = getCx86(CX86_CCR4);
+	ccr[5] = getCx86(CX86_CCR5);
+	ccr[6] = getCx86(CX86_CCR6);
+
+	if (ccr[3] & 1) {
+		ccrc[3] = 1;
+		arr3_protected = 1;
+	} else {
+		/* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
+		 * access to SMM memory through ARR3 (bit 7).
+		 */
+		if (ccr[1] & 0x80) {
+			ccr[1] &= 0x7f;
+			ccrc[1] |= 0x80;
+		}
+		if (ccr[1] & 0x04) {
+			ccr[1] &= 0xfb;
+			ccrc[1] |= 0x04;
+		}
+		if (ccr[1] & 0x02) {
+			ccr[1] &= 0xfd;
+			ccrc[1] |= 0x02;
+		}
+		arr3_protected = 0;
+		if (ccr[6] & 0x02) {
+			ccr[6] &= 0xfd;
+			ccrc[6] = 1;	/* Disable write protection of ARR3 */
+			setCx86(CX86_CCR6, ccr[6]);
+		}
+		/* Disable ARR3. This is safe now that we disabled SMM. */
+		/* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
+	}
+	/* If we changed CCR1 in memory, change it in the processor, too. */
+	if (ccrc[1])
+		setCx86(CX86_CCR1, ccr[1]);
+
+	/* Enable ARR usage by the processor */
+	if (!(ccr[5] & 0x20)) {
+		ccr[5] |= 0x20;
+		ccrc[5] = 1;
+		setCx86(CX86_CCR5, ccr[5]);
+	}
+#ifdef CONFIG_SMP
+	for (i = 0; i < 7; i++)
+		ccr_state[i] = ccr[i];
+	for (i = 0; i < 8; i++)
+		cyrix_get_arr(i,
+			      &arr_state[i].base, &arr_state[i].size,
+			      &arr_state[i].type);
+#endif
+
+	set_mtrr_done(&ctxt);	/* flush cache and disable MAPEN */
+
+	if (ccrc[5])
+		printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
+	if (ccrc[3])
+		printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
+/*
+    if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
+    if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
+    if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
+*/
+	if (ccrc[6])
+		printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
+}
+#endif
+
+static struct mtrr_ops cyrix_mtrr_ops = {
+	.vendor            = X86_VENDOR_CYRIX,
+//	.init              = cyrix_arr_init,
+	.set_all	   = cyrix_set_all,
+	.set               = cyrix_set_arr,
+	.get               = cyrix_get_arr,
+	.get_free_region   = cyrix_get_free_region,
+	.validate_add_page = generic_validate_add_page,
+	.have_wrcomb       = positive_have_wrcomb,
+};
+
+int __init cyrix_init_mtrr(void)
+{
+	set_mtrr_ops(&cyrix_mtrr_ops);
+	return 0;
+}
+
+//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
new file mode 100644
index 00000000000..56f64e34829
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -0,0 +1,509 @@
+/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
+   because MTRRs can span upto 40 bits (36bits on most modern x86) */ 
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <asm/cpufeature.h>
+#include <asm/tlbflush.h>
+#include "mtrr.h"
+
+struct mtrr_state {
+	struct mtrr_var_range *var_ranges;
+	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+	unsigned char enabled;
+	unsigned char have_fixed;
+	mtrr_type def_type;
+};
+
+struct fixed_range_block {
+	int base_msr; /* start address of an MTRR block */
+	int ranges;   /* number of MTRRs in this block  */
+};
+
+static struct fixed_range_block fixed_range_blocks[] = {
+	{ MTRRfix64K_00000_MSR, 1 }, /* one  64k MTRR  */
+	{ MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
+	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
+	{}
+};
+
+static unsigned long smp_changes_mask;
+static struct mtrr_state mtrr_state = {};
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "mtrr."
+
+static int mtrr_show;
+module_param_named(show, mtrr_show, bool, 0);
+
+/*  Get the MSR pair relating to a var range  */
+static void
+get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
+{
+	rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+}
+
+static void
+get_fixed_ranges(mtrr_type * frs)
+{
+	unsigned int *p = (unsigned int *) frs;
+	int i;
+
+	rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+
+	for (i = 0; i < 2; i++)
+		rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+	for (i = 0; i < 8; i++)
+		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+}
+
+void mtrr_save_fixed_ranges(void *info)
+{
+	if (cpu_has_mtrr)
+		get_fixed_ranges(mtrr_state.fixed_ranges);
+}
+
+static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
+{
+	unsigned i;
+
+	for (i = 0; i < 8; ++i, ++types, base += step)
+		printk(KERN_INFO "MTRR %05X-%05X %s\n",
+			base, base + step - 1, mtrr_attrib_to_str(*types));
+}
+
+/*  Grab all of the MTRR state for this CPU into *state  */
+void __init get_mtrr_state(void)
+{
+	unsigned int i;
+	struct mtrr_var_range *vrs;
+	unsigned lo, dummy;
+
+	if (!mtrr_state.var_ranges) {
+		mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), 
+						GFP_KERNEL);
+		if (!mtrr_state.var_ranges)
+			return;
+	} 
+	vrs = mtrr_state.var_ranges;
+
+	rdmsr(MTRRcap_MSR, lo, dummy);
+	mtrr_state.have_fixed = (lo >> 8) & 1;
+
+	for (i = 0; i < num_var_ranges; i++)
+		get_mtrr_var_range(i, &vrs[i]);
+	if (mtrr_state.have_fixed)
+		get_fixed_ranges(mtrr_state.fixed_ranges);
+
+	rdmsr(MTRRdefType_MSR, lo, dummy);
+	mtrr_state.def_type = (lo & 0xff);
+	mtrr_state.enabled = (lo & 0xc00) >> 10;
+
+	if (mtrr_show) {
+		int high_width;
+
+		printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
+		if (mtrr_state.have_fixed) {
+			printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
+			       mtrr_state.enabled & 1 ? "en" : "dis");
+			print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
+			for (i = 0; i < 2; ++i)
+				print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+			for (i = 0; i < 8; ++i)
+				print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+		}
+		printk(KERN_INFO "MTRR variable ranges %sabled:\n",
+		       mtrr_state.enabled & 2 ? "en" : "dis");
+		high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+		for (i = 0; i < num_var_ranges; ++i) {
+			if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
+				printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+				       i,
+				       high_width,
+				       mtrr_state.var_ranges[i].base_hi,
+				       mtrr_state.var_ranges[i].base_lo >> 12,
+				       high_width,
+				       mtrr_state.var_ranges[i].mask_hi,
+				       mtrr_state.var_ranges[i].mask_lo >> 12,
+				       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+			else
+				printk(KERN_INFO "MTRR %u disabled\n", i);
+		}
+	}
+}
+
+/*  Some BIOS's are fucked and don't set all MTRRs the same!  */
+void __init mtrr_state_warn(void)
+{
+	unsigned long mask = smp_changes_mask;
+
+	if (!mask)
+		return;
+	if (mask & MTRR_CHANGE_MASK_FIXED)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+	if (mask & MTRR_CHANGE_MASK_VARIABLE)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
+	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+		printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+	printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
+	printk(KERN_INFO "mtrr: corrected configuration.\n");
+}
+
+/* Doesn't attempt to pass an error out to MTRR users
+   because it's quite complicated in some cases and probably not
+   worth it because the best error handling is to ignore it. */
+void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
+{
+	if (wrmsr_safe(msr, a, b) < 0)
+		printk(KERN_ERR
+			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+			smp_processor_id(), msr, a, b);
+}
+
+/**
+ * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
+ * see AMD publication no. 24593, chapter 3.2.1 for more information
+ */
+static inline void k8_enable_fixed_iorrs(void)
+{
+	unsigned lo, hi;
+
+	rdmsr(MSR_K8_SYSCFG, lo, hi);
+	mtrr_wrmsr(MSR_K8_SYSCFG, lo
+				| K8_MTRRFIXRANGE_DRAM_ENABLE
+				| K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
+}
+
+/**
+ * Checks and updates an fixed-range MTRR if it differs from the value it
+ * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
+ * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
+ * \param msr MSR address of the MTTR which should be checked and updated
+ * \param changed pointer which indicates whether the MTRR needed to be changed
+ * \param msrwords pointer to the MSR values which the MSR should have
+ */
+static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+{
+	unsigned lo, hi;
+
+	rdmsr(msr, lo, hi);
+
+	if (lo != msrwords[0] || hi != msrwords[1]) {
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		    boot_cpu_data.x86 == 15 &&
+		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
+			k8_enable_fixed_iorrs();
+		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
+		*changed = TRUE;
+	}
+}
+
+int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+
+	max = num_var_ranges;
+	if (replace_reg >= 0 && replace_reg < max)
+		return replace_reg;
+	for (i = 0; i < max; ++i) {
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (lsize == 0)
+			return i;
+	}
+	return -ENOSPC;
+}
+
+static void generic_get_mtrr(unsigned int reg, unsigned long *base,
+			     unsigned long *size, mtrr_type *type)
+{
+	unsigned int mask_lo, mask_hi, base_lo, base_hi;
+
+	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
+	if ((mask_lo & 0x800) == 0) {
+		/*  Invalid (i.e. free) range  */
+		*base = 0;
+		*size = 0;
+		*type = 0;
+		return;
+	}
+
+	rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
+
+	/* Work out the shifted address mask. */
+	mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
+	    | mask_lo >> PAGE_SHIFT;
+
+	/* This works correctly if size is a power of two, i.e. a
+	   contiguous range. */
+	*size = -mask_lo;
+	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+	*type = base_lo & 0xff;
+}
+
+/**
+ * Checks and updates the fixed-range MTRRs if they differ from the saved set
+ * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ */
+static int set_fixed_ranges(mtrr_type * frs)
+{
+	unsigned long long *saved = (unsigned long long *) frs;
+	int changed = FALSE;
+	int block=-1, range;
+
+	while (fixed_range_blocks[++block].ranges)
+	    for (range=0; range < fixed_range_blocks[block].ranges; range++)
+		set_fixed_range(fixed_range_blocks[block].base_msr + range,
+		    &changed, (unsigned int *) saved++);
+
+	return changed;
+}
+
+/*  Set the MSR pair relating to a var range. Returns TRUE if
+    changes are made  */
+static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+{
+	unsigned int lo, hi;
+	int changed = FALSE;
+
+	rdmsr(MTRRphysBase_MSR(index), lo, hi);
+	if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
+	    || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
+		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+		mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+		changed = TRUE;
+	}
+
+	rdmsr(MTRRphysMask_MSR(index), lo, hi);
+
+	if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
+	    || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
+		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+		mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+		changed = TRUE;
+	}
+	return changed;
+}
+
+static u32 deftype_lo, deftype_hi;
+
+static unsigned long set_mtrr_state(void)
+/*  [SUMMARY] Set the MTRR state for this CPU.
+    <state> The MTRR state information to read.
+    <ctxt> Some relevant CPU context.
+    [NOTE] The CPU must already be in a safe state for MTRR changes.
+    [RETURNS] 0 if no changes made, else a mask indication what was changed.
+*/
+{
+	unsigned int i;
+	unsigned long change_mask = 0;
+
+	for (i = 0; i < num_var_ranges; i++)
+		if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
+			change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+	if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
+		change_mask |= MTRR_CHANGE_MASK_FIXED;
+
+	/*  Set_mtrr_restore restores the old value of MTRRdefType,
+	   so to set it we fiddle with the saved value  */
+	if ((deftype_lo & 0xff) != mtrr_state.def_type
+	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
+		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+	}
+
+	return change_mask;
+}
+
+
+static unsigned long cr4 = 0;
+static DEFINE_SPINLOCK(set_atomicity_lock);
+
+/*
+ * Since we are disabling the cache don't allow any interrupts - they
+ * would run extremely slow and would only increase the pain.  The caller must
+ * ensure that local interrupts are disabled and are reenabled after post_set()
+ * has been called.
+ */
+
+static void prepare_set(void) __acquires(set_atomicity_lock)
+{
+	unsigned long cr0;
+
+	/*  Note that this is not ideal, since the cache is only flushed/disabled
+	   for this CPU while the MTRRs are changed, but changing this requires
+	   more invasive changes to the way the kernel boots  */
+
+	spin_lock(&set_atomicity_lock);
+
+	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
+	write_cr0(cr0);
+	wbinvd();
+
+	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+	if ( cpu_has_pge ) {
+		cr4 = read_cr4();
+		write_cr4(cr4 & ~X86_CR4_PGE);
+	}
+
+	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+	__flush_tlb();
+
+	/*  Save MTRR state */
+	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+
+	/*  Disable MTRRs, and set the default type to uncached  */
+	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
+}
+
+static void post_set(void) __releases(set_atomicity_lock)
+{
+	/*  Flush TLBs (no need to flush caches - they are disabled)  */
+	__flush_tlb();
+
+	/* Intel (P6) standard MTRRs */
+	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+		
+	/*  Enable caches  */
+	write_cr0(read_cr0() & 0xbfffffff);
+
+	/*  Restore value of CR4  */
+	if ( cpu_has_pge )
+		write_cr4(cr4);
+	spin_unlock(&set_atomicity_lock);
+}
+
+static void generic_set_all(void)
+{
+	unsigned long mask, count;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	prepare_set();
+
+	/* Actually set the state */
+	mask = set_mtrr_state();
+
+	post_set();
+	local_irq_restore(flags);
+
+	/*  Use the atomic bitops to update the global mask  */
+	for (count = 0; count < sizeof mask * 8; ++count) {
+		if (mask & 0x01)
+			set_bit(count, &smp_changes_mask);
+		mask >>= 1;
+	}
+	
+}
+
+static void generic_set_mtrr(unsigned int reg, unsigned long base,
+			     unsigned long size, mtrr_type type)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+	unsigned long flags;
+	struct mtrr_var_range *vr;
+
+	vr = &mtrr_state.var_ranges[reg];
+
+	local_irq_save(flags);
+	prepare_set();
+
+	if (size == 0) {
+		/* The invalid bit is kept in the mask, so we simply clear the
+		   relevant mask register to disable a range. */
+		mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+		memset(vr, 0, sizeof(struct mtrr_var_range));
+	} else {
+		vr->base_lo = base << PAGE_SHIFT | type;
+		vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+		vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+		vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+		mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
+		mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
+	}
+
+	post_set();
+	local_irq_restore(flags);
+}
+
+int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+{
+	unsigned long lbase, last;
+
+	/*  For Intel PPro stepping <= 7, must be 4 MiB aligned 
+	    and not touch 0x70000000->0x7003FFFF */
+	if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
+	    boot_cpu_data.x86_model == 1 &&
+	    boot_cpu_data.x86_mask <= 7) {
+		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
+			printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+			return -EINVAL;
+		}
+		if (!(base + size < 0x70000 || base > 0x7003F) &&
+		    (type == MTRR_TYPE_WRCOMB
+		     || type == MTRR_TYPE_WRBACK)) {
+			printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+			return -EINVAL;
+		}
+	}
+
+	/*  Check upper bits of base and last are equal and lower bits are 0
+	    for base and 1 for last  */
+	last = base + size - 1;
+	for (lbase = base; !(lbase & 1) && (last & 1);
+	     lbase = lbase >> 1, last = last >> 1) ;
+	if (lbase != last) {
+		printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
+		       base, size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+static int generic_have_wrcomb(void)
+{
+	unsigned long config, dummy;
+	rdmsr(MTRRcap_MSR, config, dummy);
+	return (config & (1 << 10));
+}
+
+int positive_have_wrcomb(void)
+{
+	return 1;
+}
+
+/* generic structure...
+ */
+struct mtrr_ops generic_mtrr_ops = {
+	.use_intel_if      = 1,
+	.set_all	   = generic_set_all,
+	.get               = generic_get_mtrr,
+	.get_free_region   = generic_get_free_region,
+	.set               = generic_set_mtrr,
+	.validate_add_page = generic_validate_add_page,
+	.have_wrcomb       = generic_have_wrcomb,
+};
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
new file mode 100644
index 00000000000..c7d8f175674
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -0,0 +1,439 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/capability.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+
+#define LINE_SIZE 80
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+/* RED-PEN: this is accessed without any locking */
+extern unsigned int *usage_table;
+
+
+#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
+
+static const char *const mtrr_strings[MTRR_NUM_TYPES] =
+{
+    "uncachable",               /* 0 */
+    "write-combining",          /* 1 */
+    "?",                        /* 2 */
+    "?",                        /* 3 */
+    "write-through",            /* 4 */
+    "write-protect",            /* 5 */
+    "write-back",               /* 6 */
+};
+
+const char *mtrr_attrib_to_str(int x)
+{
+	return (x <= 6) ? mtrr_strings[x] : "?";
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int
+mtrr_file_add(unsigned long base, unsigned long size,
+	      unsigned int type, char increment, struct file *file, int page)
+{
+	int reg, max;
+	unsigned int *fcount = FILE_FCOUNT(file); 
+
+	max = num_var_ranges;
+	if (fcount == NULL) {
+		fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
+		if (!fcount)
+			return -ENOMEM;
+		FILE_FCOUNT(file) = fcount;
+	}
+	if (!page) {
+		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
+			return -EINVAL;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+	}
+	reg = mtrr_add_page(base, size, type, 1);
+	if (reg >= 0)
+		++fcount[reg];
+	return reg;
+}
+
+static int
+mtrr_file_del(unsigned long base, unsigned long size,
+	      struct file *file, int page)
+{
+	int reg;
+	unsigned int *fcount = FILE_FCOUNT(file);
+
+	if (!page) {
+		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
+			return -EINVAL;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+	}
+	reg = mtrr_del_page(-1, base, size);
+	if (reg < 0)
+		return reg;
+	if (fcount == NULL)
+		return reg;
+	if (fcount[reg] < 1)
+		return -EINVAL;
+	--fcount[reg];
+	return reg;
+}
+
+/* RED-PEN: seq_file can seek now. this is ignored. */
+static ssize_t
+mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
+/*  Format of control line:
+    "base=%Lx size=%Lx type=%s"     OR:
+    "disable=%d"
+*/
+{
+	int i, err;
+	unsigned long reg;
+	unsigned long long base, size;
+	char *ptr;
+	char line[LINE_SIZE];
+	size_t linelen;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (!len)
+		return -EINVAL;
+	memset(line, 0, LINE_SIZE);
+	if (len > LINE_SIZE)
+		len = LINE_SIZE;
+	if (copy_from_user(line, buf, len - 1))
+		return -EFAULT;
+	linelen = strlen(line);
+	ptr = line + linelen - 1;
+	if (linelen && *ptr == '\n')
+		*ptr = '\0';
+	if (!strncmp(line, "disable=", 8)) {
+		reg = simple_strtoul(line + 8, &ptr, 0);
+		err = mtrr_del_page(reg, 0, 0);
+		if (err < 0)
+			return err;
+		return len;
+	}
+	if (strncmp(line, "base=", 5))
+		return -EINVAL;
+	base = simple_strtoull(line + 5, &ptr, 0);
+	for (; isspace(*ptr); ++ptr) ;
+	if (strncmp(ptr, "size=", 5))
+		return -EINVAL;
+	size = simple_strtoull(ptr + 5, &ptr, 0);
+	if ((base & 0xfff) || (size & 0xfff))
+		return -EINVAL;
+	for (; isspace(*ptr); ++ptr) ;
+	if (strncmp(ptr, "type=", 5))
+		return -EINVAL;
+	ptr += 5;
+	for (; isspace(*ptr); ++ptr) ;
+	for (i = 0; i < MTRR_NUM_TYPES; ++i) {
+		if (strcmp(ptr, mtrr_strings[i]))
+			continue;
+		base >>= PAGE_SHIFT;
+		size >>= PAGE_SHIFT;
+		err =
+		    mtrr_add_page((unsigned long) base, (unsigned long) size, i,
+				  1);
+		if (err < 0)
+			return err;
+		return len;
+	}
+	return -EINVAL;
+}
+
+static long
+mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
+{
+	int err = 0;
+	mtrr_type type;
+	unsigned long size;
+	struct mtrr_sentry sentry;
+	struct mtrr_gentry gentry;
+	void __user *arg = (void __user *) __arg;
+
+	switch (cmd) {
+	case MTRRIOC_ADD_ENTRY:
+	case MTRRIOC_SET_ENTRY:
+	case MTRRIOC_DEL_ENTRY:
+	case MTRRIOC_KILL_ENTRY:
+	case MTRRIOC_ADD_PAGE_ENTRY:
+	case MTRRIOC_SET_PAGE_ENTRY:
+	case MTRRIOC_DEL_PAGE_ENTRY:
+	case MTRRIOC_KILL_PAGE_ENTRY:
+		if (copy_from_user(&sentry, arg, sizeof sentry))
+			return -EFAULT;
+		break;
+	case MTRRIOC_GET_ENTRY:
+	case MTRRIOC_GET_PAGE_ENTRY:
+		if (copy_from_user(&gentry, arg, sizeof gentry))
+			return -EFAULT;
+		break;
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_ADD_ENTRY:
+	case MTRRIOC32_SET_ENTRY:
+	case MTRRIOC32_DEL_ENTRY:
+	case MTRRIOC32_KILL_ENTRY:
+	case MTRRIOC32_ADD_PAGE_ENTRY:
+	case MTRRIOC32_SET_PAGE_ENTRY:
+	case MTRRIOC32_DEL_PAGE_ENTRY:
+	case MTRRIOC32_KILL_PAGE_ENTRY: {
+		struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg;
+		err = get_user(sentry.base, &s32->base);
+		err |= get_user(sentry.size, &s32->size);
+		err |= get_user(sentry.type, &s32->type);
+		if (err)
+			return err;
+		break;
+	}
+	case MTRRIOC32_GET_ENTRY:
+	case MTRRIOC32_GET_PAGE_ENTRY: {
+		struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+		err = get_user(gentry.regnum, &g32->regnum);
+		err |= get_user(gentry.base, &g32->base);
+		err |= get_user(gentry.size, &g32->size);
+		err |= get_user(gentry.type, &g32->type);
+		if (err)
+			return err;
+		break;
+	}
+#endif
+	}
+
+	switch (cmd) {
+	default:
+		return -ENOTTY;
+	case MTRRIOC_ADD_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_ADD_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err =
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+				  file, 0);
+		break;
+	case MTRRIOC_SET_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_SET_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
+		break;
+	case MTRRIOC_DEL_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_DEL_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_file_del(sentry.base, sentry.size, file, 0);
+		break;
+	case MTRRIOC_KILL_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_KILL_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_del(-1, sentry.base, sentry.size);
+		break;
+	case MTRRIOC_GET_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_GET_ENTRY:
+#endif
+		if (gentry.regnum >= num_var_ranges)
+			return -EINVAL;
+		mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
+
+		/* Hide entries that go above 4GB */
+		if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
+		    || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
+			gentry.base = gentry.size = gentry.type = 0;
+		else {
+			gentry.base <<= PAGE_SHIFT;
+			gentry.size = size << PAGE_SHIFT;
+			gentry.type = type;
+		}
+
+		break;
+	case MTRRIOC_ADD_PAGE_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_ADD_PAGE_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err =
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+				  file, 1);
+		break;
+	case MTRRIOC_SET_PAGE_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_SET_PAGE_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
+		break;
+	case MTRRIOC_DEL_PAGE_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_DEL_PAGE_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_file_del(sentry.base, sentry.size, file, 1);
+		break;
+	case MTRRIOC_KILL_PAGE_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_KILL_PAGE_ENTRY:
+#endif
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		err = mtrr_del_page(-1, sentry.base, sentry.size);
+		break;
+	case MTRRIOC_GET_PAGE_ENTRY:
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_GET_PAGE_ENTRY:
+#endif
+		if (gentry.regnum >= num_var_ranges)
+			return -EINVAL;
+		mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
+		/* Hide entries that would overflow */
+		if (size != (__typeof__(gentry.size))size)
+			gentry.base = gentry.size = gentry.type = 0;
+		else {
+			gentry.size = size;
+			gentry.type = type;
+		}
+		break;
+	}
+
+	if (err)
+		return err;
+
+	switch(cmd) {
+	case MTRRIOC_GET_ENTRY:
+	case MTRRIOC_GET_PAGE_ENTRY:
+		if (copy_to_user(arg, &gentry, sizeof gentry))
+			err = -EFAULT;
+		break;
+#ifdef CONFIG_COMPAT
+	case MTRRIOC32_GET_ENTRY:
+	case MTRRIOC32_GET_PAGE_ENTRY: {
+		struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+		err = put_user(gentry.base, &g32->base);
+		err |= put_user(gentry.size, &g32->size);
+		err |= put_user(gentry.regnum, &g32->regnum);
+		err |= put_user(gentry.type, &g32->type);
+		break;
+	}
+#endif
+	}
+	return err;
+}
+
+static int
+mtrr_close(struct inode *ino, struct file *file)
+{
+	int i, max;
+	unsigned int *fcount = FILE_FCOUNT(file);
+
+	if (fcount != NULL) {
+		max = num_var_ranges;
+		for (i = 0; i < max; ++i) {
+			while (fcount[i] > 0) {
+				mtrr_del(i, 0, 0);
+				--fcount[i];
+			}
+		}
+		kfree(fcount);
+		FILE_FCOUNT(file) = NULL;
+	}
+	return single_release(ino, file);
+}
+
+static int mtrr_seq_show(struct seq_file *seq, void *offset);
+
+static int mtrr_open(struct inode *inode, struct file *file)
+{
+	if (!mtrr_if) 
+		return -EIO;
+	if (!mtrr_if->get) 
+		return -ENXIO; 
+	return single_open(file, mtrr_seq_show, NULL);
+}
+
+static const struct file_operations mtrr_fops = {
+	.owner   = THIS_MODULE,
+	.open	 = mtrr_open, 
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = mtrr_write,
+	.unlocked_ioctl = mtrr_ioctl,
+	.compat_ioctl = mtrr_ioctl,
+	.release = mtrr_close,
+};
+
+
+static struct proc_dir_entry *proc_root_mtrr;
+
+
+static int mtrr_seq_show(struct seq_file *seq, void *offset)
+{
+	char factor;
+	int i, max, len;
+	mtrr_type type;
+	unsigned long base, size;
+
+	len = 0;
+	max = num_var_ranges;
+	for (i = 0; i < max; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (size == 0)
+			usage_table[i] = 0;
+		else {
+			if (size < (0x100000 >> PAGE_SHIFT)) {
+				/* less than 1MB */
+				factor = 'K';
+				size <<= PAGE_SHIFT - 10;
+			} else {
+				factor = 'M';
+				size >>= 20 - PAGE_SHIFT;
+			}
+			/* RED-PEN: base can be > 32bit */ 
+			len += seq_printf(seq, 
+				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
+			     mtrr_attrib_to_str(type), usage_table[i]);
+		}
+	}
+	return 0;
+}
+
+static int __init mtrr_if_init(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+	    (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+	    (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+		return -ENODEV;
+
+	proc_root_mtrr =
+	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+	if (proc_root_mtrr) {
+		proc_root_mtrr->owner = THIS_MODULE;
+		proc_root_mtrr->proc_fops = &mtrr_fops;
+	}
+	return 0;
+}
+
+arch_initcall(mtrr_if_init);
+#endif			/*  CONFIG_PROC_FS  */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
new file mode 100644
index 00000000000..c48b6fea5ab
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -0,0 +1,768 @@
+/*  Generic MTRR (Memory Type Range Register) driver.
+
+    Copyright (C) 1997-2000  Richard Gooch
+    Copyright (c) 2002	     Patrick Mochel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+    Source: "Pentium Pro Family Developer's Manual, Volume 3:
+    Operating System Writer's Guide" (Intel document number 242692),
+    section 11.11.7
+
+    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 
+    on 6-7 March 2002. 
+    Source: Intel Architecture Software Developers Manual, Volume 3: 
+    System Programming Guide; Section 9.11. (1997 edition - PPro).
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+
+#include <asm/mtrr.h>
+
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+u32 num_var_ranges = 0;
+
+unsigned int *usage_table;
+static DEFINE_MUTEX(mtrr_mutex);
+
+u64 size_or_mask, size_and_mask;
+
+static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
+
+struct mtrr_ops * mtrr_if = NULL;
+
+static void set_mtrr(unsigned int reg, unsigned long base,
+		     unsigned long size, mtrr_type type);
+
+#ifndef CONFIG_X86_64
+extern int arr3_protected;
+#else
+#define arr3_protected 0
+#endif
+
+void set_mtrr_ops(struct mtrr_ops * ops)
+{
+	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
+		mtrr_ops[ops->vendor] = ops;
+}
+
+/*  Returns non-zero if we have the write-combining memory type  */
+static int have_wrcomb(void)
+{
+	struct pci_dev *dev;
+	u8 rev;
+	
+	if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
+		/* ServerWorks LE chipsets < rev 6 have problems with write-combining
+		   Don't allow it and leave room for other chipsets to be tagged */
+		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
+		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
+			pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+			if (rev <= 5) {
+				printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+				pci_dev_put(dev);
+				return 0;
+			}
+		}
+		/* Intel 450NX errata # 23. Non ascending cacheline evictions to
+		   write combining memory may resulting in data corruption */
+		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
+		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
+			printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+			pci_dev_put(dev);
+			return 0;
+		}
+		pci_dev_put(dev);
+	}		
+	return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
+}
+
+/*  This function returns the number of variable MTRRs  */
+static void __init set_num_var_ranges(void)
+{
+	unsigned long config = 0, dummy;
+
+	if (use_intel()) {
+		rdmsr(MTRRcap_MSR, config, dummy);
+	} else if (is_cpu(AMD))
+		config = 2;
+	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
+		config = 8;
+	num_var_ranges = config & 0xff;
+}
+
+static void __init init_table(void)
+{
+	int i, max;
+
+	max = num_var_ranges;
+	if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
+	    == NULL) {
+		printk(KERN_ERR "mtrr: could not allocate\n");
+		return;
+	}
+	for (i = 0; i < max; i++)
+		usage_table[i] = 1;
+}
+
+struct set_mtrr_data {
+	atomic_t	count;
+	atomic_t	gate;
+	unsigned long	smp_base;
+	unsigned long	smp_size;
+	unsigned int	smp_reg;
+	mtrr_type	smp_type;
+};
+
+#ifdef CONFIG_SMP
+
+static void ipi_handler(void *info)
+/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
+    [RETURNS] Nothing.
+*/
+{
+	struct set_mtrr_data *data = info;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	atomic_dec(&data->count);
+	while(!atomic_read(&data->gate))
+		cpu_relax();
+
+	/*  The master has cleared me to execute  */
+	if (data->smp_reg != ~0U) 
+		mtrr_if->set(data->smp_reg, data->smp_base, 
+			     data->smp_size, data->smp_type);
+	else
+		mtrr_if->set_all();
+
+	atomic_dec(&data->count);
+	while(atomic_read(&data->gate))
+		cpu_relax();
+
+	atomic_dec(&data->count);
+	local_irq_restore(flags);
+}
+
+#endif
+
+static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
+	return type1 == MTRR_TYPE_UNCACHABLE ||
+	       type2 == MTRR_TYPE_UNCACHABLE ||
+	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
+	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
+}
+
+/**
+ * set_mtrr - update mtrrs on all processors
+ * @reg:	mtrr in question
+ * @base:	mtrr base
+ * @size:	mtrr size
+ * @type:	mtrr type
+ *
+ * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
+ * 
+ * 1. Send IPI to do the following:
+ * 2. Disable Interrupts
+ * 3. Wait for all procs to do so 
+ * 4. Enter no-fill cache mode
+ * 5. Flush caches
+ * 6. Clear PGE bit
+ * 7. Flush all TLBs
+ * 8. Disable all range registers
+ * 9. Update the MTRRs
+ * 10. Enable all range registers
+ * 11. Flush all TLBs and caches again
+ * 12. Enter normal cache mode and reenable caching
+ * 13. Set PGE 
+ * 14. Wait for buddies to catch up
+ * 15. Enable interrupts.
+ * 
+ * What does that mean for us? Well, first we set data.count to the number
+ * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
+ * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
+ * Meanwhile, they are waiting for that flag to be set. Once it's set, each 
+ * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 
+ * differently, so we call mtrr_if->set() callback and let them take care of it.
+ * When they're done, they again decrement data->count and wait for data.gate to 
+ * be reset. 
+ * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
+ * Everyone then enables interrupts and we all continue on.
+ *
+ * Note that the mechanism is the same for UP systems, too; all the SMP stuff
+ * becomes nops.
+ */
+static void set_mtrr(unsigned int reg, unsigned long base,
+		     unsigned long size, mtrr_type type)
+{
+	struct set_mtrr_data data;
+	unsigned long flags;
+
+	data.smp_reg = reg;
+	data.smp_base = base;
+	data.smp_size = size;
+	data.smp_type = type;
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	/* make sure data.count is visible before unleashing other CPUs */
+	smp_wmb();
+	atomic_set(&data.gate,0);
+
+	/*  Start the ball rolling on other CPUs  */
+	if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+		panic("mtrr: timed out waiting for other CPUs\n");
+
+	local_irq_save(flags);
+
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	/* ok, reset count and toggle gate */
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	smp_wmb();
+	atomic_set(&data.gate,1);
+
+	/* do our MTRR business */
+
+	/* HACK!
+	 * We use this same function to initialize the mtrrs on boot.
+	 * The state of the boot cpu's mtrrs has been saved, and we want
+	 * to replicate across all the APs. 
+	 * If we're doing that @reg is set to something special...
+	 */
+	if (reg != ~0U) 
+		mtrr_if->set(reg,base,size,type);
+
+	/* wait for the others */
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	atomic_set(&data.count, num_booting_cpus() - 1);
+	smp_wmb();
+	atomic_set(&data.gate,0);
+
+	/*
+	 * Wait here for everyone to have seen the gate change
+	 * So we're the last ones to touch 'data'
+	 */
+	while(atomic_read(&data.count))
+		cpu_relax();
+
+	local_irq_restore(flags);
+}
+
+/**
+ *	mtrr_add_page - Add a memory type region
+ *	@base: Physical base address of region in pages (in units of 4 kB!)
+ *	@size: Physical size of region in pages (4 kB)
+ *	@type: Type of MTRR desired
+ *	@increment: If this is true do usage counting on the region
+ *
+ *	Memory type region registers control the caching on newer Intel and
+ *	non Intel processors. This function allows drivers to request an
+ *	MTRR is added. The details and hardware specifics of each processor's
+ *	implementation are hidden from the caller, but nevertheless the 
+ *	caller should expect to need to provide a power of two size on an
+ *	equivalent power of two boundary.
+ *
+ *	If the region cannot be added either because all regions are in use
+ *	or the CPU cannot support it a negative value is returned. On success
+ *	the register number for this entry is returned, but should be treated
+ *	as a cookie only.
+ *
+ *	On a multiprocessor machine the changes are made to all processors.
+ *	This is required on x86 by the Intel processors.
+ *
+ *	The available types are
+ *
+ *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ *
+ *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ *
+ *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ *
+ *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ *
+ *	BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *	failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add_page(unsigned long base, unsigned long size, 
+		  unsigned int type, char increment)
+{
+	int i, replace, error;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+
+	if (!mtrr_if)
+		return -ENXIO;
+		
+	if ((error = mtrr_if->validate_add_page(base,size,type)))
+		return error;
+
+	if (type >= MTRR_NUM_TYPES) {
+		printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
+		return -EINVAL;
+	}
+
+	/*  If the type is WC, check that this processor supports it  */
+	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
+		printk(KERN_WARNING
+		       "mtrr: your processor doesn't support write-combining\n");
+		return -ENOSYS;
+	}
+
+	if (!size) {
+		printk(KERN_WARNING "mtrr: zero sized request\n");
+		return -EINVAL;
+	}
+
+	if (base & size_or_mask || size & size_or_mask) {
+		printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
+		return -EINVAL;
+	}
+
+	error = -EINVAL;
+	replace = -1;
+
+	/* No CPU hotplug when we change MTRR entries */
+	lock_cpu_hotplug();
+	/*  Search for existing MTRR  */
+	mutex_lock(&mtrr_mutex);
+	for (i = 0; i < num_var_ranges; ++i) {
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
+			continue;
+		/*  At this point we know there is some kind of overlap/enclosure  */
+		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
+			if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
+				/*  New region encloses an existing region  */
+				if (type == ltype) {
+					replace = replace == -1 ? i : -2;
+					continue;
+				}
+				else if (types_compatible(type, ltype))
+					continue;
+			}
+			printk(KERN_WARNING
+			       "mtrr: 0x%lx000,0x%lx000 overlaps existing"
+			       " 0x%lx000,0x%lx000\n", base, size, lbase,
+			       lsize);
+			goto out;
+		}
+		/*  New region is enclosed by an existing region  */
+		if (ltype != type) {
+			if (types_compatible(type, ltype))
+				continue;
+			printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+			     base, size, mtrr_attrib_to_str(ltype),
+			     mtrr_attrib_to_str(type));
+			goto out;
+		}
+		if (increment)
+			++usage_table[i];
+		error = i;
+		goto out;
+	}
+	/*  Search for an empty MTRR  */
+	i = mtrr_if->get_free_region(base, size, replace);
+	if (i >= 0) {
+		set_mtrr(i, base, size, type);
+		if (likely(replace < 0))
+			usage_table[i] = 1;
+		else {
+			usage_table[i] = usage_table[replace] + !!increment;
+			if (unlikely(replace != i)) {
+				set_mtrr(replace, 0, 0, 0);
+				usage_table[replace] = 0;
+			}
+		}
+	} else
+		printk(KERN_INFO "mtrr: no more MTRRs available\n");
+	error = i;
+ out:
+	mutex_unlock(&mtrr_mutex);
+	unlock_cpu_hotplug();
+	return error;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+		printk(KERN_WARNING
+			"mtrr: size and base must be multiples of 4 kiB\n");
+		printk(KERN_DEBUG
+			"mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		dump_stack();
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ *	mtrr_add - Add a memory type region
+ *	@base: Physical base address of region
+ *	@size: Physical size of region
+ *	@type: Type of MTRR desired
+ *	@increment: If this is true do usage counting on the region
+ *
+ *	Memory type region registers control the caching on newer Intel and
+ *	non Intel processors. This function allows drivers to request an
+ *	MTRR is added. The details and hardware specifics of each processor's
+ *	implementation are hidden from the caller, but nevertheless the 
+ *	caller should expect to need to provide a power of two size on an
+ *	equivalent power of two boundary.
+ *
+ *	If the region cannot be added either because all regions are in use
+ *	or the CPU cannot support it a negative value is returned. On success
+ *	the register number for this entry is returned, but should be treated
+ *	as a cookie only.
+ *
+ *	On a multiprocessor machine the changes are made to all processors.
+ *	This is required on x86 by the Intel processors.
+ *
+ *	The available types are
+ *
+ *	%MTRR_TYPE_UNCACHABLE	-	No caching
+ *
+ *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
+ *
+ *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
+ *
+ *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
+ *
+ *	BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *	failures and do not wish system log messages to be sent.
+ */
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	 char increment)
+{
+	if (mtrr_check(base, size))
+		return -EINVAL;
+	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+			     increment);
+}
+
+/**
+ *	mtrr_del_page - delete a memory type region
+ *	@reg: Register returned by mtrr_add
+ *	@base: Physical base address
+ *	@size: Size of region
+ *
+ *	If register is supplied then base and size are ignored. This is
+ *	how drivers should call it.
+ *
+ *	Releases an MTRR region. If the usage count drops to zero the 
+ *	register is freed and the region returns to default state.
+ *	On success the register is returned, on failure a negative error
+ *	code.
+ */
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+	int error = -EINVAL;
+
+	if (!mtrr_if)
+		return -ENXIO;
+
+	max = num_var_ranges;
+	/* No CPU hotplug when we change MTRR entries */
+	lock_cpu_hotplug();
+	mutex_lock(&mtrr_mutex);
+	if (reg < 0) {
+		/*  Search for existing MTRR  */
+		for (i = 0; i < max; ++i) {
+			mtrr_if->get(i, &lbase, &lsize, &ltype);
+			if (lbase == base && lsize == size) {
+				reg = i;
+				break;
+			}
+		}
+		if (reg < 0) {
+			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
+			       size);
+			goto out;
+		}
+	}
+	if (reg >= max) {
+		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
+		goto out;
+	}
+	if (is_cpu(CYRIX) && !use_intel()) {
+		if ((reg == 3) && arr3_protected) {
+			printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
+			goto out;
+		}
+	}
+	mtrr_if->get(reg, &lbase, &lsize, &ltype);
+	if (lsize < 1) {
+		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
+		goto out;
+	}
+	if (usage_table[reg] < 1) {
+		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+		goto out;
+	}
+	if (--usage_table[reg] < 1)
+		set_mtrr(reg, 0, 0, 0);
+	error = reg;
+ out:
+	mutex_unlock(&mtrr_mutex);
+	unlock_cpu_hotplug();
+	return error;
+}
+/**
+ *	mtrr_del - delete a memory type region
+ *	@reg: Register returned by mtrr_add
+ *	@base: Physical base address
+ *	@size: Size of region
+ *
+ *	If register is supplied then base and size are ignored. This is
+ *	how drivers should call it.
+ *
+ *	Releases an MTRR region. If the usage count drops to zero the 
+ *	register is freed and the region returns to default state.
+ *	On success the register is returned, on failure a negative error
+ *	code.
+ */
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+	if (mtrr_check(base, size))
+		return -EINVAL;
+	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+/* HACK ALERT!
+ * These should be called implicitly, but we can't yet until all the initcall
+ * stuff is done...
+ */
+extern void amd_init_mtrr(void);
+extern void cyrix_init_mtrr(void);
+extern void centaur_init_mtrr(void);
+
+static void __init init_ifs(void)
+{
+#ifndef CONFIG_X86_64
+	amd_init_mtrr();
+	cyrix_init_mtrr();
+	centaur_init_mtrr();
+#endif
+}
+
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
+struct mtrr_value {
+	mtrr_type	ltype;
+	unsigned long	lbase;
+	unsigned long	lsize;
+};
+
+static struct mtrr_value * mtrr_state;
+
+static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
+{
+	int i;
+	int size = num_var_ranges * sizeof(struct mtrr_value);
+
+	mtrr_state = kzalloc(size,GFP_ATOMIC);
+	if (!mtrr_state)
+		return -ENOMEM;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i,
+			     &mtrr_state[i].lbase,
+			     &mtrr_state[i].lsize,
+			     &mtrr_state[i].ltype);
+	}
+	return 0;
+}
+
+static int mtrr_restore(struct sys_device * sysdev)
+{
+	int i;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		if (mtrr_state[i].lsize) 
+			set_mtrr(i,
+				 mtrr_state[i].lbase,
+				 mtrr_state[i].lsize,
+				 mtrr_state[i].ltype);
+	}
+	kfree(mtrr_state);
+	return 0;
+}
+
+
+
+static struct sysdev_driver mtrr_sysdev_driver = {
+	.suspend	= mtrr_save,
+	.resume		= mtrr_restore,
+};
+
+
+/**
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
+ *
+ * This needs to be called early; before any of the other CPUs are 
+ * initialized (i.e. before smp_init()).
+ * 
+ */
+void __init mtrr_bp_init(void)
+{
+	init_ifs();
+
+	if (cpu_has_mtrr) {
+		mtrr_if = &generic_mtrr_ops;
+		size_or_mask = 0xff000000;	/* 36 bits */
+		size_and_mask = 0x00f00000;
+
+		/* This is an AMD specific MSR, but we assume(hope?) that
+		   Intel will implement it to when they extend the address
+		   bus of the Xeon. */
+		if (cpuid_eax(0x80000000) >= 0x80000008) {
+			u32 phys_addr;
+			phys_addr = cpuid_eax(0x80000008) & 0xff;
+			/* CPUID workaround for Intel 0F33/0F34 CPU */
+			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+			    boot_cpu_data.x86 == 0xF &&
+			    boot_cpu_data.x86_model == 0x3 &&
+			    (boot_cpu_data.x86_mask == 0x3 ||
+			     boot_cpu_data.x86_mask == 0x4))
+				phys_addr = 36;
+
+			size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
+		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
+			   boot_cpu_data.x86 == 6) {
+			/* VIA C* family have Intel style MTRRs, but
+			   don't support PAE */
+			size_or_mask = 0xfff00000;	/* 32 bits */
+			size_and_mask = 0;
+		}
+	} else {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			if (cpu_has_k6_mtrr) {
+				/* Pre-Athlon (K6) AMD CPU MTRRs */
+				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CENTAUR:
+			if (cpu_has_centaur_mcr) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CYRIX:
+			if (cpu_has_cyrix_arr) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
+				size_or_mask = 0xfff00000;	/* 32 bits */
+				size_and_mask = 0;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (mtrr_if) {
+		set_num_var_ranges();
+		init_table();
+		if (use_intel())
+			get_mtrr_state();
+	}
+}
+
+void mtrr_ap_init(void)
+{
+	unsigned long flags;
+
+	if (!mtrr_if || !use_intel())
+		return;
+	/*
+	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
+	 * but this routine will be called in cpu boot time, holding the lock
+	 * breaks it. This routine is called in two cases: 1.very earily time
+	 * of software resume, when there absolutely isn't mtrr entry changes;
+	 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+	 * prevent mtrr entry changes
+	 */
+	local_irq_save(flags);
+
+	mtrr_if->set_all();
+
+	local_irq_restore(flags);
+}
+
+/**
+ * Save current fixed-range MTRR state of the BSP
+ */
+void mtrr_save_state(void)
+{
+	int cpu = get_cpu();
+
+	if (cpu == 0)
+		mtrr_save_fixed_ranges(NULL);
+	else
+		smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+	put_cpu();
+}
+
+static int __init mtrr_init_finialize(void)
+{
+	if (!mtrr_if)
+		return 0;
+	if (use_intel())
+		mtrr_state_warn();
+	else {
+		/* The CPUs haven't MTRR and seemes not support SMP. They have
+		 * specific drivers, we use a tricky method to support
+		 * suspend/resume for them.
+		 * TBD: is there any system with such CPU which supports
+		 * suspend/resume?  if no, we should remove the code.
+		 */
+		sysdev_driver_register(&cpu_sysdev_class,
+			&mtrr_sysdev_driver);
+	}
+	return 0;
+}
+subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
new file mode 100644
index 00000000000..289dfe6030e
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -0,0 +1,98 @@
+/*
+ * local mtrr defines.
+ */
+
+#ifndef TRUE
+#define TRUE  1
+#define FALSE 0
+#endif
+
+#define MTRRcap_MSR     0x0fe
+#define MTRRdefType_MSR 0x2ff
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MTRRfix64K_00000_MSR 0x250
+#define MTRRfix16K_80000_MSR 0x258
+#define MTRRfix16K_A0000_MSR 0x259
+#define MTRRfix4K_C0000_MSR 0x268
+#define MTRRfix4K_C8000_MSR 0x269
+#define MTRRfix4K_D0000_MSR 0x26a
+#define MTRRfix4K_D8000_MSR 0x26b
+#define MTRRfix4K_E0000_MSR 0x26c
+#define MTRRfix4K_E8000_MSR 0x26d
+#define MTRRfix4K_F0000_MSR 0x26e
+#define MTRRfix4K_F8000_MSR 0x26f
+
+#define MTRR_CHANGE_MASK_FIXED     0x01
+#define MTRR_CHANGE_MASK_VARIABLE  0x02
+#define MTRR_CHANGE_MASK_DEFTYPE   0x04
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+struct mtrr_ops {
+	u32	vendor;
+	u32	use_intel_if;
+//	void	(*init)(void);
+	void	(*set)(unsigned int reg, unsigned long base,
+		       unsigned long size, mtrr_type type);
+	void	(*set_all)(void);
+
+	void	(*get)(unsigned int reg, unsigned long *base,
+		       unsigned long *size, mtrr_type * type);
+	int	(*get_free_region)(unsigned long base, unsigned long size,
+				   int replace_reg);
+	int	(*validate_add_page)(unsigned long base, unsigned long size,
+				     unsigned int type);
+	int	(*have_wrcomb)(void);
+};
+
+extern int generic_get_free_region(unsigned long base, unsigned long size,
+				   int replace_reg);
+extern int generic_validate_add_page(unsigned long base, unsigned long size,
+				     unsigned int type);
+
+extern struct mtrr_ops generic_mtrr_ops;
+
+extern int positive_have_wrcomb(void);
+
+/* library functions for processor-specific routines */
+struct set_mtrr_context {
+	unsigned long flags;
+	unsigned long cr4val;
+	u32 deftype_lo;
+	u32 deftype_hi;
+	u32 ccr3;
+};
+
+struct mtrr_var_range {
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
+};
+
+void set_mtrr_done(struct set_mtrr_context *ctxt);
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+
+void get_mtrr_state(void);
+
+extern void set_mtrr_ops(struct mtrr_ops * ops);
+
+extern u64 size_or_mask, size_and_mask;
+extern struct mtrr_ops * mtrr_if;
+
+#define is_cpu(vnd)	(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
+#define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
+
+extern unsigned int num_var_ranges;
+
+void mtrr_state_warn(void);
+const char *mtrr_attrib_to_str(int x);
+void mtrr_wrmsr(unsigned, unsigned, unsigned);
+
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
new file mode 100644
index 00000000000..49e20c2afcd
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -0,0 +1,79 @@
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/processor-cyrix.h>
+#include "mtrr.h"
+
+
+/*  Put the processor into a state where MTRRs can be safely set  */
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
+{
+	unsigned int cr0;
+
+	/*  Disable interrupts locally  */
+	local_irq_save(ctxt->flags);
+
+	if (use_intel() || is_cpu(CYRIX)) {
+
+		/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+		if ( cpu_has_pge ) {
+			ctxt->cr4val = read_cr4();
+			write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
+		}
+
+		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
+		    a side-effect  */
+		cr0 = read_cr0() | 0x40000000;
+		wbinvd();
+		write_cr0(cr0);
+		wbinvd();
+
+		if (use_intel())
+			/*  Save MTRR state */
+			rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+		else
+			/* Cyrix ARRs - everything else were excluded at the top */
+			ctxt->ccr3 = getCx86(CX86_CCR3);
+	}
+}
+
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
+{
+	if (use_intel()) 
+		/*  Disable MTRRs, and set the default type to uncached  */
+		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+		      ctxt->deftype_hi);
+	else if (is_cpu(CYRIX))
+		/* Cyrix ARRs - everything else were excluded at the top */
+		setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+}
+
+/*  Restore the processor after a set_mtrr_prepare  */
+void set_mtrr_done(struct set_mtrr_context *ctxt)
+{
+	if (use_intel() || is_cpu(CYRIX)) {
+
+		/*  Flush caches and TLBs  */
+		wbinvd();
+
+		/*  Restore MTRRdefType  */
+		if (use_intel())
+			/* Intel (P6) standard MTRRs */
+			mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+		else
+			/* Cyrix ARRs - everything else was excluded at the top */
+			setCx86(CX86_CCR3, ctxt->ccr3);
+		
+		/*  Enable caches  */
+		write_cr0(read_cr0() & 0xbfffffff);
+
+		/*  Restore value of CR4  */
+		if ( cpu_has_pge )
+			write_cr4(ctxt->cr4val);
+	}
+	/*  Re-enable interrupts locally (if enabled previously)  */
+	local_irq_restore(ctxt->flags);
+}
+
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
new file mode 100644
index 00000000000..961fbe1a748
--- /dev/null
+++ b/arch/x86/kernel/cpu/nexgen.c
@@ -0,0 +1,60 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+/*
+ *	Detect a NexGen CPU running without BIOS hypercode new enough
+ *	to have CPUID. (Thanks to Herbert Oppmann)
+ */
+ 
+static int __cpuinit deep_magic_nexgen_probe(void)
+{
+	int ret;
+	
+	__asm__ __volatile__ (
+		"	movw	$0x5555, %%ax\n"
+		"	xorw	%%dx,%%dx\n"
+		"	movw	$2, %%cx\n"
+		"	divw	%%cx\n"
+		"	movl	$0, %%eax\n"
+		"	jnz	1f\n"
+		"	movl	$1, %%eax\n"
+		"1:\n" 
+		: "=a" (ret) : : "cx", "dx" );
+	return  ret;
+}
+
+static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
+{
+	c->x86_cache_size = 256; /* A few had 1 MB... */
+}
+
+static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
+{
+	/* Detect NexGen with old hypercode */
+	if ( deep_magic_nexgen_probe() ) {
+		strcpy(c->x86_vendor_id, "NexGenDriven");
+	}
+}
+
+static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Nexgen",
+	.c_ident	= { "NexGenDriven" },
+	.c_models = {
+			{ .vendor = X86_VENDOR_NEXGEN,
+			  .family = 5,
+			  .model_names = { [1] = "Nx586" }
+			},
+	},
+	.c_init		= init_nexgen,
+	.c_identify	= nexgen_identify,
+};
+
+int __init nexgen_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
new file mode 100644
index 00000000000..93fecd4b03d
--- /dev/null
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -0,0 +1,713 @@
+/* local apic based NMI watchdog for various CPUs.
+   This file also handles reservation of performance counters for coordination
+   with other users (like oprofile).
+
+   Note that these events normally don't tick when the CPU idles. This means
+   the frequency varies with CPU load.
+
+   Original code for K7/P6 written by Keith Owens */
+
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <asm/apic.h>
+#include <asm/intel_arch_perfmon.h>
+
+struct nmi_watchdog_ctlblk {
+	unsigned int cccr_msr;
+	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
+	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
+};
+
+/* Interface defining a CPU specific perfctr watchdog */
+struct wd_ops {
+	int (*reserve)(void);
+	void (*unreserve)(void);
+	int (*setup)(unsigned nmi_hz);
+	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
+	void (*stop)(void);
+	unsigned perfctr;
+	unsigned evntsel;
+	u64 checkbit;
+};
+
+static struct wd_ops *wd_ops;
+
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
+ */
+#define NMI_MAX_COUNTER_BITS 66
+
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ *   different subsystems this reservation system just tries to coordinate
+ *   things a little
+ */
+static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
+static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
+
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+	/* returns the bit offset of the performance counter register */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return (msr - MSR_K7_PERFCTR0);
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+
+		switch (boot_cpu_data.x86) {
+		case 6:
+			return (msr - MSR_P6_PERFCTR0);
+		case 15:
+			return (msr - MSR_P4_BPU_PERFCTR0);
+		}
+	}
+	return 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+/* returns the bit offset of the event selection register */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+	/* returns the bit offset of the event selection register */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return (msr - MSR_K7_EVNTSEL0);
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+
+		switch (boot_cpu_data.x86) {
+		case 6:
+			return (msr - MSR_P6_EVNTSEL0);
+		case 15:
+			return (msr - MSR_P4_BSU_ESCR0);
+		}
+	}
+	return 0;
+
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, perfctr_nmi_owner))
+		return 1;
+	return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, perfctr_nmi_owner);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, evntsel_nmi_owner))
+		return 1;
+	return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, evntsel_nmi_owner);
+}
+
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
+
+void disable_lapic_nmi_watchdog(void)
+{
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+	if (atomic_read(&nmi_active) <= 0)
+		return;
+
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+	wd_ops->unreserve();
+
+	BUG_ON(atomic_read(&nmi_active) != 0);
+}
+
+void enable_lapic_nmi_watchdog(void)
+{
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+	/* are we already enabled */
+	if (atomic_read(&nmi_active) != 0)
+		return;
+
+	/* are we lapic aware */
+	if (!wd_ops)
+		return;
+	if (!wd_ops->reserve()) {
+		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
+		return;
+	}
+
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	touch_nmi_watchdog();
+}
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ */
+
+static unsigned int adjust_for_32bit_ctr(unsigned int hz)
+{
+	u64 counter_val;
+	unsigned int retval = hz;
+
+	/*
+	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
+	 * are writable, with higher bits sign extending from bit 31.
+	 * So, we can only program the counter with 31 bit values and
+	 * 32nd bit should be 1, for 33.. to be 1.
+	 * Find the appropriate nmi_hz
+	 */
+	counter_val = (u64)cpu_khz * 1000;
+	do_div(counter_val, retval);
+ 	if (counter_val > 0x7fffffffULL) {
+		u64 count = (u64)cpu_khz * 1000;
+		do_div(count, 0x7fffffffUL);
+		retval = count + 1;
+	}
+	return retval;
+}
+
+static void
+write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+{
+	u64 count = (u64)cpu_khz * 1000;
+
+	do_div(count, nmi_hz);
+	if(descr)
+		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+	wrmsrl(perfctr_msr, 0 - count);
+}
+
+static void write_watchdog_counter32(unsigned int perfctr_msr,
+		const char *descr, unsigned nmi_hz)
+{
+	u64 count = (u64)cpu_khz * 1000;
+
+	do_div(count, nmi_hz);
+	if(descr)
+		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+	wrmsr(perfctr_msr, (u32)(-count), 0);
+}
+
+/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
+   nicely stable so there is not much variety */
+
+#define K7_EVNTSEL_ENABLE	(1 << 22)
+#define K7_EVNTSEL_INT		(1 << 20)
+#define K7_EVNTSEL_OS		(1 << 17)
+#define K7_EVNTSEL_USR		(1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
+#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+static int setup_k7_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = wd_ops->perfctr;
+	evntsel_msr = wd_ops->evntsel;
+
+	wrmsrl(perfctr_msr, 0UL);
+
+	evntsel = K7_EVNTSEL_INT
+		| K7_EVNTSEL_OS
+		| K7_EVNTSEL_USR
+		| K7_NMI_EVENT;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= K7_EVNTSEL_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	return 1;
+}
+
+static void single_msr_stop_watchdog(void)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int single_msr_reserve(void)
+{
+	if (!reserve_perfctr_nmi(wd_ops->perfctr))
+		return 0;
+
+	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
+		release_perfctr_nmi(wd_ops->perfctr);
+		return 0;
+	}
+	return 1;
+}
+
+static void single_msr_unreserve(void)
+{
+	release_evntsel_nmi(wd_ops->evntsel);
+	release_perfctr_nmi(wd_ops->perfctr);
+}
+
+static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	/* start the cycle over again */
+	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops k7_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_k7_watchdog,
+	.rearm = single_msr_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_K7_PERFCTR0,
+	.evntsel = MSR_K7_EVNTSEL0,
+	.checkbit = 1ULL<<47,
+};
+
+/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
+
+#define P6_EVNTSEL0_ENABLE	(1 << 22)
+#define P6_EVNTSEL_INT		(1 << 20)
+#define P6_EVNTSEL_OS		(1 << 17)
+#define P6_EVNTSEL_USR		(1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+static int setup_p6_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = wd_ops->perfctr;
+	evntsel_msr = wd_ops->evntsel;
+
+	/* KVM doesn't implement this MSR */
+	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
+		return 0;
+
+	evntsel = P6_EVNTSEL_INT
+		| P6_EVNTSEL_OS
+		| P6_EVNTSEL_USR
+		| P6_NMI_EVENT;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= P6_EVNTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	return 1;
+}
+
+static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	/* P6 based Pentium M need to re-unmask
+	 * the apic vector but it doesn't hurt
+	 * other P6 variant.
+	 * ArchPerfom/Core Duo also needs this */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	/* P6/ARCH_PERFMON has 32 bit counter write */
+	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+}
+
+static struct wd_ops p6_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_p6_watchdog,
+	.rearm = p6_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_P6_PERFCTR0,
+	.evntsel = MSR_P6_EVNTSEL0,
+	.checkbit = 1ULL<<39,
+};
+
+/* Intel P4 performance counters. By far the most complicated of all. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
+#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
+#define P4_ESCR_OS		(1<<3)
+#define P4_ESCR_USR		(1<<2)
+#define P4_CCCR_OVF_PMI0	(1<<26)
+#define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_THRESHOLD(N)	((N)<<20)
+#define P4_CCCR_COMPLEMENT	(1<<19)
+#define P4_CCCR_COMPARE		(1<<18)
+#define P4_CCCR_REQUIRED	(3<<16)
+#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
+#define P4_CCCR_ENABLE		(1<<12)
+#define P4_CCCR_OVF 		(1<<31)
+
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+
+static int setup_p4_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+	unsigned int evntsel, cccr_val;
+	unsigned int misc_enable, dummy;
+	unsigned int ht_num;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+		return 0;
+
+#ifdef CONFIG_SMP
+	/* detect which hyperthread we are on */
+	if (smp_num_siblings == 2) {
+		unsigned int ebx, apicid;
+
+        	ebx = cpuid_ebx(1);
+	        apicid = (ebx >> 24) & 0xff;
+        	ht_num = apicid & 1;
+	} else
+#endif
+		ht_num = 0;
+
+	/* performance counters are shared resources
+	 * assign each hyperthread its own set
+	 * (re-use the ESCR0 register, seems safe
+	 * and keeps the cccr_val the same)
+	 */
+	if (!ht_num) {
+		/* logical cpu 0 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR0;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR0;
+		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+	} else {
+		/* logical cpu 1 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR1;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR1;
+		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+	}
+
+	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+	 	| P4_ESCR_OS
+		| P4_ESCR_USR;
+
+	cccr_val |= P4_CCCR_THRESHOLD(15)
+		 | P4_CCCR_COMPLEMENT
+		 | P4_CCCR_COMPARE
+		 | P4_CCCR_REQUIRED;
+
+	wrmsr(evntsel_msr, evntsel, 0);
+	wrmsr(cccr_msr, cccr_val, 0);
+	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	cccr_val |= P4_CCCR_ENABLE;
+	wrmsr(cccr_msr, cccr_val, 0);
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = cccr_msr;
+	return 1;
+}
+
+static void stop_p4_watchdog(void)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	wrmsr(wd->cccr_msr, 0, 0);
+	wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int p4_reserve(void)
+{
+	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
+		return 0;
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
+		goto fail1;
+#endif
+	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+		goto fail2;
+	/* RED-PEN why is ESCR1 not reserved here? */
+	return 1;
+ fail2:
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1)
+		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
+ fail1:
+#endif
+	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
+	return 0;
+}
+
+static void p4_unreserve(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1)
+		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
+#endif
+	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
+	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
+}
+
+static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	unsigned dummy;
+	/*
+ 	 * P4 quirks:
+	 * - An overflown perfctr will assert its interrupt
+	 *   until the OVF flag in its CCCR is cleared.
+	 * - LVTPC is masked on interrupt and must be
+	 *   unmasked by the LVTPC handler.
+	 */
+	rdmsrl(wd->cccr_msr, dummy);
+	dummy &= ~P4_CCCR_OVF;
+	wrmsrl(wd->cccr_msr, dummy);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	/* start the cycle over again */
+	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops p4_wd_ops = {
+	.reserve = p4_reserve,
+	.unreserve = p4_unreserve,
+	.setup = setup_p4_watchdog,
+	.rearm = p4_rearm,
+	.stop = stop_p4_watchdog,
+	/* RED-PEN this is wrong for the other sibling */
+	.perfctr = MSR_P4_BPU_PERFCTR0,
+	.evntsel = MSR_P4_BSU_ESCR0,
+	.checkbit = 1ULL<<39,
+};
+
+/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
+   all future Intel CPUs. */
+
+#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(unsigned nmi_hz)
+{
+	unsigned int ebx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
+	 */
+	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		return 0;
+
+	perfctr_msr = wd_ops->perfctr;
+	evntsel_msr = wd_ops->evntsel;
+
+	wrmsrl(perfctr_msr, 0UL);
+
+	evntsel = ARCH_PERFMON_EVENTSEL_INT
+		| ARCH_PERFMON_EVENTSEL_OS
+		| ARCH_PERFMON_EVENTSEL_USR
+		| ARCH_PERFMON_NMI_EVENT_SEL
+		| ARCH_PERFMON_NMI_EVENT_UMASK;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
+	return 1;
+}
+
+static struct wd_ops intel_arch_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_intel_arch_watchdog,
+	.rearm = p6_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
+	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
+};
+
+static struct wd_ops coreduo_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_intel_arch_watchdog,
+	.rearm = p6_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+};
+
+static void probe_nmi_watchdog(void)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
+		    boot_cpu_data.x86 != 16)
+			return;
+		wd_ops = &k7_wd_ops;
+		break;
+	case X86_VENDOR_INTEL:
+		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
+		   doesn't have a working enable bit. */
+		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
+			wd_ops = &coreduo_wd_ops;
+			break;
+		}
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			wd_ops = &intel_arch_wd_ops;
+			break;
+		}
+		switch (boot_cpu_data.x86) {
+		case 6:
+			if (boot_cpu_data.x86_model > 0xd)
+				return;
+
+			wd_ops = &p6_wd_ops;
+			break;
+		case 15:
+			if (boot_cpu_data.x86_model > 0x4)
+				return;
+
+			wd_ops = &p4_wd_ops;
+			break;
+		default:
+			return;
+		}
+		break;
+	}
+}
+
+/* Interface to nmi.c */
+
+int lapic_watchdog_init(unsigned nmi_hz)
+{
+	if (!wd_ops) {
+		probe_nmi_watchdog();
+		if (!wd_ops)
+			return -1;
+
+		if (!wd_ops->reserve()) {
+			printk(KERN_ERR
+				"NMI watchdog: cannot reserve perfctrs\n");
+			return -1;
+		}
+	}
+
+	if (!(wd_ops->setup(nmi_hz))) {
+		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
+		       raw_smp_processor_id());
+		return -1;
+	}
+
+	return 0;
+}
+
+void lapic_watchdog_stop(void)
+{
+	if (wd_ops)
+		wd_ops->stop();
+}
+
+unsigned lapic_adjust_nmi_hz(unsigned hz)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
+		hz = adjust_for_32bit_ctr(hz);
+	return hz;
+}
+
+int lapic_wd_event(unsigned nmi_hz)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	u64 ctr;
+	rdmsrl(wd->perfctr_msr, ctr);
+	if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+		return 0;
+	}
+	wd_ops->rearm(wd, nmi_hz);
+	return 1;
+}
+
+int lapic_watchdog_ok(void)
+{
+	return wd_ops != NULL;
+}
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
new file mode 100644
index 00000000000..1e31b6caffb
--- /dev/null
+++ b/arch/x86/kernel/cpu/proc.c
@@ -0,0 +1,192 @@
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <linux/string.h>
+#include <asm/semaphore.h>
+#include <linux/seq_file.h>
+#include <linux/cpufreq.h>
+
+/*
+ *	Get CPU information for use by the procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	/* 
+	 * These flag bits must match the definitions in <asm/cpufeature.h>.
+	 * NULL means this bit is undefined or reserved; either way it doesn't
+	 * have meaning as far as Linux is concerned.  Note that it's important
+	 * to realize there is a difference between this table and CPUID -- if
+	 * applications want to get the raw CPUID data, they should access
+	 * /dev/cpu/<cpu_nr>/cpuid instead.
+	 */
+	static const char * const x86_cap_flags[] = {
+		/* Intel-defined */
+	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+		/* AMD-defined */
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+		"3dnowext", "3dnow",
+
+		/* Transmeta-defined */
+		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* Other (Linux-defined) */
+		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+		NULL, NULL, NULL, NULL,
+		"constant_tsc", "up", NULL, "arch_perfmon",
+		"pebs", "bts", NULL, "sync_rdtsc",
+		"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* Intel-defined (#2) */
+		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+		NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* VIA/Cyrix/Centaur-defined */
+		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+		"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* AMD-defined (#2) */
+		"lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
+		"altmovcr8", "abm", "sse4a",
+		"misalignsse", "3dnowprefetch",
+		"osvw", "ibs", NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+		/* Auxiliary (Linux-defined) */
+		"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	};
+	static const char * const x86_power_flags[] = {
+		"ts",	/* temperature sensor */
+		"fid",  /* frequency id control */
+		"vid",  /* voltage id control */
+		"ttp",  /* thermal trip */
+		"tm",
+		"stc",
+		"100mhzsteps",
+		"hwpstate",
+		"",	/* constant_tsc - moved to flags */
+		/* nothing */
+	};
+	struct cpuinfo_x86 *c = v;
+	int i, n = c - cpu_data;
+	int fpu_exception;
+
+#ifdef CONFIG_SMP
+	if (!cpu_online(n))
+		return 0;
+#endif
+	seq_printf(m, "processor\t: %d\n"
+		"vendor_id\t: %s\n"
+		"cpu family\t: %d\n"
+		"model\t\t: %d\n"
+		"model name\t: %s\n",
+		n,
+		c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		c->x86,
+		c->x86_model,
+		c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
+	if (c->x86_mask || c->cpuid_level >= 0)
+		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+	else
+		seq_printf(m, "stepping\t: unknown\n");
+
+	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+		unsigned int freq = cpufreq_quick_get(n);
+		if (!freq)
+			freq = cpu_khz;
+		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+			freq / 1000, (freq % 1000));
+	}
+
+	/* Cache size */
+	if (c->x86_cache_size >= 0)
+		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+#ifdef CONFIG_X86_HT
+	if (c->x86_max_cores * smp_num_siblings > 1) {
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+	}
+#endif
+	
+	/* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
+	fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
+	seq_printf(m, "fdiv_bug\t: %s\n"
+			"hlt_bug\t\t: %s\n"
+			"f00f_bug\t: %s\n"
+			"coma_bug\t: %s\n"
+			"fpu\t\t: %s\n"
+			"fpu_exception\t: %s\n"
+			"cpuid level\t: %d\n"
+			"wp\t\t: %s\n"
+			"flags\t\t:",
+		     c->fdiv_bug ? "yes" : "no",
+		     c->hlt_works_ok ? "no" : "yes",
+		     c->f00f_bug ? "yes" : "no",
+		     c->coma_bug ? "yes" : "no",
+		     c->hard_math ? "yes" : "no",
+		     fpu_exception ? "yes" : "no",
+		     c->cpuid_level,
+		     c->wp_works_ok ? "yes" : "no");
+
+	for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+		if ( test_bit(i, c->x86_capability) &&
+		     x86_cap_flags[i] != NULL )
+			seq_printf(m, " %s", x86_cap_flags[i]);
+
+	for (i = 0; i < 32; i++)
+		if (c->x86_power & (1 << i)) {
+			if (i < ARRAY_SIZE(x86_power_flags) &&
+			    x86_power_flags[i])
+				seq_printf(m, "%s%s",
+					   x86_power_flags[i][0]?" ":"",
+					   x86_power_flags[i]);
+			else
+				seq_printf(m, " [%d]", i);
+		}
+
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+		     c->loops_per_jiffy/(500000/HZ),
+		     (c->loops_per_jiffy/(5000/HZ)) % 100);
+	seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
new file mode 100644
index 00000000000..200fb3f9ebf
--- /dev/null
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -0,0 +1,116 @@
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "cpu.h"
+
+static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
+{
+	unsigned int cap_mask, uk, max, dummy;
+	unsigned int cms_rev1, cms_rev2;
+	unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
+	char cpu_info[65];
+
+	get_model_name(c);	/* Same as AMD/Cyrix */
+	display_cacheinfo(c);
+
+	/* Print CMS and CPU revision */
+	max = cpuid_eax(0x80860000);
+	cpu_rev = 0;
+	if ( max >= 0x80860001 ) {
+		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); 
+		if (cpu_rev != 0x02000000) {
+			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+				(cpu_rev >> 24) & 0xff,
+				(cpu_rev >> 16) & 0xff,
+				(cpu_rev >> 8) & 0xff,
+				cpu_rev & 0xff,
+				cpu_freq);
+		}
+	}
+	if ( max >= 0x80860002 ) {
+		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
+		if (cpu_rev == 0x02000000) {
+			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+				new_cpu_rev, cpu_freq);
+		}
+		printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+		       (cms_rev1 >> 24) & 0xff,
+		       (cms_rev1 >> 16) & 0xff,
+		       (cms_rev1 >> 8) & 0xff,
+		       cms_rev1 & 0xff,
+		       cms_rev2);
+	}
+	if ( max >= 0x80860006 ) {
+		cpuid(0x80860003,
+		      (void *)&cpu_info[0],
+		      (void *)&cpu_info[4],
+		      (void *)&cpu_info[8],
+		      (void *)&cpu_info[12]);
+		cpuid(0x80860004,
+		      (void *)&cpu_info[16],
+		      (void *)&cpu_info[20],
+		      (void *)&cpu_info[24],
+		      (void *)&cpu_info[28]);
+		cpuid(0x80860005,
+		      (void *)&cpu_info[32],
+		      (void *)&cpu_info[36],
+		      (void *)&cpu_info[40],
+		      (void *)&cpu_info[44]);
+		cpuid(0x80860006,
+		      (void *)&cpu_info[48],
+		      (void *)&cpu_info[52],
+		      (void *)&cpu_info[56],
+		      (void *)&cpu_info[60]);
+		cpu_info[64] = '\0';
+		printk(KERN_INFO "CPU: %s\n", cpu_info);
+	}
+
+	/* Unhide possibly hidden capability flags */
+	rdmsr(0x80860004, cap_mask, uk);
+	wrmsr(0x80860004, ~0, uk);
+	c->x86_capability[0] = cpuid_edx(0x00000001);
+	wrmsr(0x80860004, cap_mask, uk);
+
+	/* All Transmeta CPUs have a constant TSC */
+	set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+	
+	/* If we can run i686 user-space code, call us an i686 */
+#define USER686 ((1 << X86_FEATURE_TSC)|\
+		 (1 << X86_FEATURE_CX8)|\
+		 (1 << X86_FEATURE_CMOV))
+        if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
+		c->x86 = 6;
+
+#ifdef CONFIG_SYSCTL
+	/* randomize_va_space slows us down enormously;
+	   it probably triggers retranslation of x86->native bytecode */
+	randomize_va_space = 0;
+#endif
+}
+
+static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
+{
+	u32 xlvl;
+
+	/* Transmeta-defined flags: level 0x80860001 */
+	xlvl = cpuid_eax(0x80860000);
+	if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+		if (  xlvl >= 0x80860001 )
+			c->x86_capability[2] = cpuid_edx(0x80860001);
+	}
+}
+
+static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
+	.c_vendor	= "Transmeta",
+	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
+	.c_init		= init_transmeta,
+	.c_identify	= transmeta_identify,
+};
+
+int __init transmeta_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
new file mode 100644
index 00000000000..a7a4e75bdcd
--- /dev/null
+++ b/arch/x86/kernel/cpu/umc.c
@@ -0,0 +1,26 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include "cpu.h"
+
+/* UMC chips appear to be only either 386 or 486, so no special init takes place.
+ */
+
+static struct cpu_dev umc_cpu_dev __cpuinitdata = {
+	.c_vendor	= "UMC",
+	.c_ident 	= { "UMC UMC UMC" },
+	.c_models = {
+		{ .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
+		  { 
+			  [1] = "U5D", 
+			  [2] = "U5S", 
+		  }
+		},
+	},
+};
+
+int __init umc_init_cpu(void)
+{
+	cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
+	return 0;
+}