39 files changed, 3063 insertions, 2450 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1d2cb383410..c202b62f367 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -19,8 +19,6 @@ obj-y			+= vmware.o hypervisor.o sched.o
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
 
-obj-$(CONFIG_X86_CPU_DEBUG)		+= cpu_debug.o
-
 obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 468489b57aa..97ad79cdf68 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
 		{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
 		{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
+		{ X86_FEATURE_NPT,   CR_EDX, 0, 0x8000000a },
+		{ X86_FEATURE_LBRV,  CR_EDX, 1, 0x8000000a },
+		{ X86_FEATURE_SVML,  CR_EDX, 2, 0x8000000a },
+		{ X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
 		{ 0, 0, 0, 0 }
 	};
 
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
deleted file mode 100644
index b368cd86299..00000000000
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ /dev/null
@@ -1,688 +0,0 @@
-/*
- * CPU x86 architecture debug code
- *
- * Copyright(C) 2009 Jaswinder Singh Rajput
- *
- * For licencing details see kernel-base/COPYING
- */
-
-#include <linux/interrupt.h>
-#include <linux/compiler.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-
-#include <asm/cpu_debug.h>
-#include <asm/paravirt.h>
-#include <asm/system.h>
-#include <asm/traps.h>
-#include <asm/apic.h>
-#include <asm/desc.h>
-
-static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr);
-static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr);
-static DEFINE_PER_CPU(int, cpud_priv_count);
-
-static DEFINE_MUTEX(cpu_debug_lock);
-
-static struct dentry *cpu_debugfs_dir;
-
-static struct cpu_debug_base cpu_base[] = {
-	{ "mc",		CPU_MC,		0	},
-	{ "monitor",	CPU_MONITOR,	0	},
-	{ "time",	CPU_TIME,	0	},
-	{ "pmc",	CPU_PMC,	1	},
-	{ "platform",	CPU_PLATFORM,	0	},
-	{ "apic",	CPU_APIC,	0	},
-	{ "poweron",	CPU_POWERON,	0	},
-	{ "control",	CPU_CONTROL,	0	},
-	{ "features",	CPU_FEATURES,	0	},
-	{ "lastbranch",	CPU_LBRANCH,	0	},
-	{ "bios",	CPU_BIOS,	0	},
-	{ "freq",	CPU_FREQ,	0	},
-	{ "mtrr",	CPU_MTRR,	0	},
-	{ "perf",	CPU_PERF,	0	},
-	{ "cache",	CPU_CACHE,	0	},
-	{ "sysenter",	CPU_SYSENTER,	0	},
-	{ "therm",	CPU_THERM,	0	},
-	{ "misc",	CPU_MISC,	0	},
-	{ "debug",	CPU_DEBUG,	0	},
-	{ "pat",	CPU_PAT,	0	},
-	{ "vmx",	CPU_VMX,	0	},
-	{ "call",	CPU_CALL,	0	},
-	{ "base",	CPU_BASE,	0	},
-	{ "ver",	CPU_VER,	0	},
-	{ "conf",	CPU_CONF,	0	},
-	{ "smm",	CPU_SMM,	0	},
-	{ "svm",	CPU_SVM,	0	},
-	{ "osvm",	CPU_OSVM,	0	},
-	{ "tss",	CPU_TSS,	0	},
-	{ "cr",		CPU_CR,		0	},
-	{ "dt",		CPU_DT,		0	},
-	{ "registers",	CPU_REG_ALL,	0	},
-};
-
-static struct cpu_file_base cpu_file[] = {
-	{ "index",	CPU_REG_ALL,	0	},
-	{ "value",	CPU_REG_ALL,	1	},
-};
-
-/* CPU Registers Range */
-static struct cpu_debug_range cpu_reg_range[] = {
-	{ 0x00000000, 0x00000001, CPU_MC,	},
-	{ 0x00000006, 0x00000007, CPU_MONITOR,	},
-	{ 0x00000010, 0x00000010, CPU_TIME,	},
-	{ 0x00000011, 0x00000013, CPU_PMC,	},
-	{ 0x00000017, 0x00000017, CPU_PLATFORM,	},
-	{ 0x0000001B, 0x0000001B, CPU_APIC,	},
-	{ 0x0000002A, 0x0000002B, CPU_POWERON,	},
-	{ 0x0000002C, 0x0000002C, CPU_FREQ,	},
-	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	},
-	{ 0x00000040, 0x00000047, CPU_LBRANCH,	},
-	{ 0x00000060, 0x00000067, CPU_LBRANCH,	},
-	{ 0x00000079, 0x00000079, CPU_BIOS,	},
-	{ 0x00000088, 0x0000008A, CPU_CACHE,	},
-	{ 0x0000008B, 0x0000008B, CPU_BIOS,	},
-	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	},
-	{ 0x000000C1, 0x000000C4, CPU_PMC,	},
-	{ 0x000000CD, 0x000000CD, CPU_FREQ,	},
-	{ 0x000000E7, 0x000000E8, CPU_PERF,	},
-	{ 0x000000FE, 0x000000FE, CPU_MTRR,	},
-
-	{ 0x00000116, 0x0000011E, CPU_CACHE,	},
-	{ 0x00000174, 0x00000176, CPU_SYSENTER,	},
-	{ 0x00000179, 0x0000017B, CPU_MC,	},
-	{ 0x00000186, 0x00000189, CPU_PMC,	},
-	{ 0x00000198, 0x00000199, CPU_PERF,	},
-	{ 0x0000019A, 0x0000019A, CPU_TIME,	},
-	{ 0x0000019B, 0x0000019D, CPU_THERM,	},
-	{ 0x000001A0, 0x000001A0, CPU_MISC,	},
-	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	},
-	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	},
-	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	},
-	{ 0x000001DA, 0x000001E0, CPU_LBRANCH,	},
-
-	{ 0x00000200, 0x0000020F, CPU_MTRR,	},
-	{ 0x00000250, 0x00000250, CPU_MTRR,	},
-	{ 0x00000258, 0x00000259, CPU_MTRR,	},
-	{ 0x00000268, 0x0000026F, CPU_MTRR,	},
-	{ 0x00000277, 0x00000277, CPU_PAT,	},
-	{ 0x000002FF, 0x000002FF, CPU_MTRR,	},
-
-	{ 0x00000300, 0x00000311, CPU_PMC,	},
-	{ 0x00000345, 0x00000345, CPU_PMC,	},
-	{ 0x00000360, 0x00000371, CPU_PMC,	},
-	{ 0x0000038D, 0x00000390, CPU_PMC,	},
-	{ 0x000003A0, 0x000003BE, CPU_PMC,	},
-	{ 0x000003C0, 0x000003CD, CPU_PMC,	},
-	{ 0x000003E0, 0x000003E1, CPU_PMC,	},
-	{ 0x000003F0, 0x000003F2, CPU_PMC,	},
-
-	{ 0x00000400, 0x00000417, CPU_MC,	},
-	{ 0x00000480, 0x0000048B, CPU_VMX,	},
-
-	{ 0x00000600, 0x00000600, CPU_DEBUG,	},
-	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	},
-	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	},
-
-	{ 0x000107CC, 0x000107D3, CPU_PMC,	},
-
-	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	},
-	{ 0xC0000081, 0xC0000084, CPU_CALL,	},
-	{ 0xC0000100, 0xC0000102, CPU_BASE,	},
-	{ 0xC0000103, 0xC0000103, CPU_TIME,	},
-
-	{ 0xC0010000, 0xC0010007, CPU_PMC,	},
-	{ 0xC0010010, 0xC0010010, CPU_CONF,	},
-	{ 0xC0010015, 0xC0010015, CPU_CONF,	},
-	{ 0xC0010016, 0xC001001A, CPU_MTRR,	},
-	{ 0xC001001D, 0xC001001D, CPU_MTRR,	},
-	{ 0xC001001F, 0xC001001F, CPU_CONF,	},
-	{ 0xC0010030, 0xC0010035, CPU_BIOS,	},
-	{ 0xC0010044, 0xC0010048, CPU_MC,	},
-	{ 0xC0010050, 0xC0010056, CPU_SMM,	},
-	{ 0xC0010058, 0xC0010058, CPU_CONF,	},
-	{ 0xC0010060, 0xC0010060, CPU_CACHE,	},
-	{ 0xC0010061, 0xC0010068, CPU_SMM,	},
-	{ 0xC0010069, 0xC001006B, CPU_SMM,	},
-	{ 0xC0010070, 0xC0010071, CPU_SMM,	},
-	{ 0xC0010111, 0xC0010113, CPU_SMM,	},
-	{ 0xC0010114, 0xC0010118, CPU_SVM,	},
-	{ 0xC0010140, 0xC0010141, CPU_OSVM,	},
-	{ 0xC0011022, 0xC0011023, CPU_CONF,	},
-};
-
-static int is_typeflag_valid(unsigned cpu, unsigned flag)
-{
-	int i;
-
-	/* Standard Registers should be always valid */
-	if (flag >= CPU_TSS)
-		return 1;
-
-	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
-		if (cpu_reg_range[i].flag == flag)
-			return 1;
-	}
-
-	/* Invalid */
-	return 0;
-}
-
-static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
-			      int index, unsigned flag)
-{
-	if (cpu_reg_range[index].flag == flag) {
-		*min = cpu_reg_range[index].min;
-		*max = cpu_reg_range[index].max;
-	} else
-		*max = 0;
-
-	return *max;
-}
-
-/* This function can also be called with seq = NULL for printk */
-static void print_cpu_data(struct seq_file *seq, unsigned type,
-			   u32 low, u32 high)
-{
-	struct cpu_private *priv;
-	u64 val = high;
-
-	if (seq) {
-		priv = seq->private;
-		if (priv->file) {
-			val = (val << 32) | low;
-			seq_printf(seq, "0x%llx\n", val);
-		} else
-			seq_printf(seq, " %08x: %08x_%08x\n",
-				   type, high, low);
-	} else
-		printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
-}
-
-/* This function can also be called with seq = NULL for printk */
-static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
-{
-	unsigned msr, msr_min, msr_max;
-	struct cpu_private *priv;
-	u32 low, high;
-	int i;
-
-	if (seq) {
-		priv = seq->private;
-		if (priv->file) {
-			if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
-					       &low, &high))
-				print_cpu_data(seq, priv->reg, low, high);
-			return;
-		}
-	}
-
-	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
-		if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
-			continue;
-
-		for (msr = msr_min; msr <= msr_max; msr++) {
-			if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
-				continue;
-			print_cpu_data(seq, msr, low, high);
-		}
-	}
-}
-
-static void print_tss(void *arg)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	struct seq_file *seq = arg;
-	unsigned int seg;
-
-	seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
-	seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
-	seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
-	seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
-
-	seq_printf(seq, " RSI\t: %016lx\n", regs->si);
-	seq_printf(seq, " RDI\t: %016lx\n", regs->di);
-	seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
-	seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
-
-#ifdef CONFIG_X86_64
-	seq_printf(seq, " R08\t: %016lx\n", regs->r8);
-	seq_printf(seq, " R09\t: %016lx\n", regs->r9);
-	seq_printf(seq, " R10\t: %016lx\n", regs->r10);
-	seq_printf(seq, " R11\t: %016lx\n", regs->r11);
-	seq_printf(seq, " R12\t: %016lx\n", regs->r12);
-	seq_printf(seq, " R13\t: %016lx\n", regs->r13);
-	seq_printf(seq, " R14\t: %016lx\n", regs->r14);
-	seq_printf(seq, " R15\t: %016lx\n", regs->r15);
-#endif
-
-	asm("movl %%cs,%0" : "=r" (seg));
-	seq_printf(seq, " CS\t:             %04x\n", seg);
-	asm("movl %%ds,%0" : "=r" (seg));
-	seq_printf(seq, " DS\t:             %04x\n", seg);
-	seq_printf(seq, " SS\t:             %04lx\n", regs->ss & 0xffff);
-	asm("movl %%es,%0" : "=r" (seg));
-	seq_printf(seq, " ES\t:             %04x\n", seg);
-	asm("movl %%fs,%0" : "=r" (seg));
-	seq_printf(seq, " FS\t:             %04x\n", seg);
-	asm("movl %%gs,%0" : "=r" (seg));
-	seq_printf(seq, " GS\t:             %04x\n", seg);
-
-	seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
-
-	seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
-}
-
-static void print_cr(void *arg)
-{
-	struct seq_file *seq = arg;
-
-	seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
-	seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
-	seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
-	seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
-#ifdef CONFIG_X86_64
-	seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
-#endif
-}
-
-static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
-{
-	seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
-}
-
-static void print_dt(void *seq)
-{
-	struct desc_ptr dt;
-	unsigned long ldt;
-
-	/* IDT */
-	store_idt((struct desc_ptr *)&dt);
-	print_desc_ptr("IDT", seq, dt);
-
-	/* GDT */
-	store_gdt((struct desc_ptr *)&dt);
-	print_desc_ptr("GDT", seq, dt);
-
-	/* LDT */
-	store_ldt(ldt);
-	seq_printf(seq, " LDT\t: %016lx\n", ldt);
-
-	/* TR */
-	store_tr(ldt);
-	seq_printf(seq, " TR\t: %016lx\n", ldt);
-}
-
-static void print_dr(void *arg)
-{
-	struct seq_file *seq = arg;
-	unsigned long dr;
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		/* Ignore db4, db5 */
-		if ((i == 4) || (i == 5))
-			continue;
-		get_debugreg(dr, i);
-		seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
-	}
-
-	seq_printf(seq, "\n MSR\t:\n");
-}
-
-static void print_apic(void *arg)
-{
-	struct seq_file *seq = arg;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	seq_printf(seq, " LAPIC\t:\n");
-	seq_printf(seq, " ID\t\t: %08x\n",  apic_read(APIC_ID) >> 24);
-	seq_printf(seq, " LVR\t\t: %08x\n",  apic_read(APIC_LVR));
-	seq_printf(seq, " TASKPRI\t: %08x\n",  apic_read(APIC_TASKPRI));
-	seq_printf(seq, " ARBPRI\t\t: %08x\n",  apic_read(APIC_ARBPRI));
-	seq_printf(seq, " PROCPRI\t: %08x\n",  apic_read(APIC_PROCPRI));
-	seq_printf(seq, " LDR\t\t: %08x\n",  apic_read(APIC_LDR));
-	seq_printf(seq, " DFR\t\t: %08x\n",  apic_read(APIC_DFR));
-	seq_printf(seq, " SPIV\t\t: %08x\n",  apic_read(APIC_SPIV));
-	seq_printf(seq, " ISR\t\t: %08x\n",  apic_read(APIC_ISR));
-	seq_printf(seq, " ESR\t\t: %08x\n",  apic_read(APIC_ESR));
-	seq_printf(seq, " ICR\t\t: %08x\n",  apic_read(APIC_ICR));
-	seq_printf(seq, " ICR2\t\t: %08x\n",  apic_read(APIC_ICR2));
-	seq_printf(seq, " LVTT\t\t: %08x\n",  apic_read(APIC_LVTT));
-	seq_printf(seq, " LVTTHMR\t: %08x\n",  apic_read(APIC_LVTTHMR));
-	seq_printf(seq, " LVTPC\t\t: %08x\n",  apic_read(APIC_LVTPC));
-	seq_printf(seq, " LVT0\t\t: %08x\n",  apic_read(APIC_LVT0));
-	seq_printf(seq, " LVT1\t\t: %08x\n",  apic_read(APIC_LVT1));
-	seq_printf(seq, " LVTERR\t\t: %08x\n",  apic_read(APIC_LVTERR));
-	seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
-	seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
-	seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
-	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
-		unsigned int i, v, maxeilvt;
-
-		v = apic_read(APIC_EFEAT);
-		maxeilvt = (v >> 16) & 0xff;
-		seq_printf(seq, " EFEAT\t\t: %08x\n", v);
-		seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
-
-		for (i = 0; i < maxeilvt; i++) {
-			v = apic_read(APIC_EILVTn(i));
-			seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
-		}
-	}
-#endif /* CONFIG_X86_LOCAL_APIC */
-	seq_printf(seq, "\n MSR\t:\n");
-}
-
-static int cpu_seq_show(struct seq_file *seq, void *v)
-{
-	struct cpu_private *priv = seq->private;
-
-	if (priv == NULL)
-		return -EINVAL;
-
-	switch (cpu_base[priv->type].flag) {
-	case CPU_TSS:
-		smp_call_function_single(priv->cpu, print_tss, seq, 1);
-		break;
-	case CPU_CR:
-		smp_call_function_single(priv->cpu, print_cr, seq, 1);
-		break;
-	case CPU_DT:
-		smp_call_function_single(priv->cpu, print_dt, seq, 1);
-		break;
-	case CPU_DEBUG:
-		if (priv->file == CPU_INDEX_BIT)
-			smp_call_function_single(priv->cpu, print_dr, seq, 1);
-		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
-		break;
-	case CPU_APIC:
-		if (priv->file == CPU_INDEX_BIT)
-			smp_call_function_single(priv->cpu, print_apic, seq, 1);
-		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
-		break;
-
-	default:
-		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
-		break;
-	}
-	seq_printf(seq, "\n");
-
-	return 0;
-}
-
-static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	if (*pos == 0) /* One time is enough ;-) */
-		return seq;
-
-	return NULL;
-}
-
-static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	(*pos)++;
-
-	return cpu_seq_start(seq, pos);
-}
-
-static void cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations cpu_seq_ops = {
-	.start		= cpu_seq_start,
-	.next		= cpu_seq_next,
-	.stop		= cpu_seq_stop,
-	.show		= cpu_seq_show,
-};
-
-static int cpu_seq_open(struct inode *inode, struct file *file)
-{
-	struct cpu_private *priv = inode->i_private;
-	struct seq_file *seq;
-	int err;
-
-	err = seq_open(file, &cpu_seq_ops);
-	if (!err) {
-		seq = file->private_data;
-		seq->private = priv;
-	}
-
-	return err;
-}
-
-static int write_msr(struct cpu_private *priv, u64 val)
-{
-	u32 low, high;
-
-	high = (val >> 32) & 0xffffffff;
-	low = val & 0xffffffff;
-
-	if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
-		return 0;
-
-	return -EPERM;
-}
-
-static int write_cpu_register(struct cpu_private *priv, const char *buf)
-{
-	int ret = -EPERM;
-	u64 val;
-
-	ret = strict_strtoull(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-
-	/* Supporting only MSRs */
-	if (priv->type < CPU_TSS_BIT)
-		return write_msr(priv, val);
-
-	return ret;
-}
-
-static ssize_t cpu_write(struct file *file, const char __user *ubuf,
-			     size_t count, loff_t *off)
-{
-	struct seq_file *seq = file->private_data;
-	struct cpu_private *priv = seq->private;
-	char buf[19];
-
-	if ((priv == NULL) || (count >= sizeof(buf)))
-		return -EINVAL;
-
-	if (copy_from_user(&buf, ubuf, count))
-		return -EFAULT;
-
-	buf[count] = 0;
-
-	if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
-		if (!write_cpu_register(priv, buf))
-			return count;
-
-	return -EACCES;
-}
-
-static const struct file_operations cpu_fops = {
-	.owner		= THIS_MODULE,
-	.open		= cpu_seq_open,
-	.read		= seq_read,
-	.write		= cpu_write,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
-			   unsigned file, struct dentry *dentry)
-{
-	struct cpu_private *priv = NULL;
-
-	/* Already intialized */
-	if (file == CPU_INDEX_BIT)
-		if (per_cpu(cpud_arr[type].init, cpu))
-			return 0;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (priv == NULL)
-		return -ENOMEM;
-
-	priv->cpu = cpu;
-	priv->type = type;
-	priv->reg = reg;
-	priv->file = file;
-	mutex_lock(&cpu_debug_lock);
-	per_cpu(cpud_priv_arr[type], cpu) = priv;
-	per_cpu(cpud_priv_count, cpu)++;
-	mutex_unlock(&cpu_debug_lock);
-
-	if (file)
-		debugfs_create_file(cpu_file[file].name, S_IRUGO,
-				    dentry, (void *)priv, &cpu_fops);
-	else {
-		debugfs_create_file(cpu_base[type].name, S_IRUGO,
-				    per_cpu(cpud_arr[type].dentry, cpu),
-				    (void *)priv, &cpu_fops);
-		mutex_lock(&cpu_debug_lock);
-		per_cpu(cpud_arr[type].init, cpu) = 1;
-		mutex_unlock(&cpu_debug_lock);
-	}
-
-	return 0;
-}
-
-static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
-			     struct dentry *dentry)
-{
-	unsigned file;
-	int err = 0;
-
-	for (file = 0; file <  ARRAY_SIZE(cpu_file); file++) {
-		err = cpu_create_file(cpu, type, reg, file, dentry);
-		if (err)
-			return err;
-	}
-
-	return err;
-}
-
-static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
-{
-	struct dentry *cpu_dentry = NULL;
-	unsigned reg, reg_min, reg_max;
-	int i, err = 0;
-	char reg_dir[12];
-	u32 low, high;
-
-	for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
-		if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
-				   cpu_base[type].flag))
-			continue;
-
-		for (reg = reg_min; reg <= reg_max; reg++) {
-			if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
-				continue;
-
-			sprintf(reg_dir, "0x%x", reg);
-			cpu_dentry = debugfs_create_dir(reg_dir, dentry);
-			err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
-			if (err)
-				return err;
-		}
-	}
-
-	return err;
-}
-
-static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
-{
-	struct dentry *cpu_dentry = NULL;
-	unsigned type;
-	int err = 0;
-
-	for (type = 0; type <  ARRAY_SIZE(cpu_base) - 1; type++) {
-		if (!is_typeflag_valid(cpu, cpu_base[type].flag))
-			continue;
-		cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
-		per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry;
-
-		if (type < CPU_TSS_BIT)
-			err = cpu_init_msr(cpu, type, cpu_dentry);
-		else
-			err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
-					      cpu_dentry);
-		if (err)
-			return err;
-	}
-
-	return err;
-}
-
-static int cpu_init_cpu(void)
-{
-	struct dentry *cpu_dentry = NULL;
-	struct cpuinfo_x86 *cpui;
-	char cpu_dir[12];
-	unsigned cpu;
-	int err = 0;
-
-	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
-		cpui = &cpu_data(cpu);
-		if (!cpu_has(cpui, X86_FEATURE_MSR))
-			continue;
-
-		sprintf(cpu_dir, "cpu%d", cpu);
-		cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
-		err = cpu_init_allreg(cpu, cpu_dentry);
-
-		pr_info("cpu%d(%d) debug files %d\n",
-			cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu));
-		if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) {
-			pr_err("Register files count %d exceeds limit %d\n",
-				per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES);
-			per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES;
-			err = -ENFILE;
-		}
-		if (err)
-			return err;
-	}
-
-	return err;
-}
-
-static int __init cpu_debug_init(void)
-{
-	cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
-
-	return cpu_init_cpu();
-}
-
-static void __exit cpu_debug_exit(void)
-{
-	int i, cpu;
-
-	if (cpu_debugfs_dir)
-		debugfs_remove_recursive(cpu_debugfs_dir);
-
-	for (cpu = 0; cpu <  nr_cpu_ids; cpu++)
-		for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++)
-			kfree(per_cpu(cpud_priv_arr[i], cpu));
-}
-
-module_init(cpu_debug_init);
-module_exit(cpu_debug_exit);
-
-MODULE_AUTHOR("Jaswinder Singh Rajput");
-MODULE_DESCRIPTION("CPU Debug module");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b..870e6cc6ad2 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
 
 comment "CPUFreq processor drivers"
 
+config X86_PCC_CPUFREQ
+	tristate "Processor Clocking Control interface driver"
+	depends on ACPI && ACPI_PROCESSOR
+	help
+	  This driver adds support for the PCC interface.
+
+	  For details, take a look at:
+	  <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pcc-cpufreq.
+
+	  If in doubt, say N.
+
 config X86_ACPI_CPUFREQ
 	tristate "ACPI Processor P-States driver"
 	select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294..1840c0a5170 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
 obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
+obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
 obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
 obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
 obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1b1920fa7c8..459168083b7 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
+#include <linux/slab.h>
 #include <trace/events/power.h>
 
 #include <linux/acpi.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 006b278b0d5..c587db472a7 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/cpufreq.h>
 
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index ac27ec2264d..16e3483be9e 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -80,6 +80,7 @@
 #include <linux/cpufreq.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 
 #include <asm/processor-cyrix.h>
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index da5f70fcb76..e7b559d74c5 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -9,7 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpufreq.h>
 #include <linux/timex.h>
 
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 86961519372..7b8a8ba67b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
-#include <linux/slab.h>
 #include <linux/cpumask.h>
 #include <linux/timex.h>
 
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 00000000000..ce7cde713e7
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,621 @@
+/*
+ *  pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
+ *
+ *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+ *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *	Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
+ *  INFRINGEMENT. See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#include <acpi/processor.h>
+
+#define PCC_VERSION 	"1.00.00"
+#define POLL_LOOPS 	300
+
+#define CMD_COMPLETE 	0x1
+#define CMD_GET_FREQ 	0x0
+#define CMD_SET_FREQ 	0x1
+
+#define BUF_SZ		4
+
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
+					     "pcc-cpufreq", msg)
+
+struct pcc_register_resource {
+	u8 descriptor;
+	u16 length;
+	u8 space_id;
+	u8 bit_width;
+	u8 bit_offset;
+	u8 access_size;
+	u64 address;
+} __attribute__ ((packed));
+
+struct pcc_memory_resource {
+	u8 descriptor;
+	u16 length;
+	u8 space_id;
+	u8 resource_usage;
+	u8 type_specific;
+	u64 granularity;
+	u64 minimum;
+	u64 maximum;
+	u64 translation_offset;
+	u64 address_length;
+} __attribute__ ((packed));
+
+static struct cpufreq_driver pcc_cpufreq_driver;
+
+struct pcc_header {
+	u32 signature;
+	u16 length;
+	u8 major;
+	u8 minor;
+	u32 features;
+	u16 command;
+	u16 status;
+	u32 latency;
+	u32 minimum_time;
+	u32 maximum_time;
+	u32 nominal;
+	u32 throttled_frequency;
+	u32 minimum_frequency;
+};
+
+static void __iomem *pcch_virt_addr;
+static struct pcc_header __iomem *pcch_hdr;
+
+static DEFINE_SPINLOCK(pcc_lock);
+
+static struct acpi_generic_address doorbell;
+
+static u64 doorbell_preserve;
+static u64 doorbell_write;
+
+static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
+			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
+
+struct pcc_cpu {
+	u32 input_offset;
+	u32 output_offset;
+};
+
+static struct pcc_cpu *pcc_cpu_info;
+
+static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     policy->cpuinfo.max_freq);
+	return 0;
+}
+
+static inline void pcc_cmd(void)
+{
+	u64 doorbell_value;
+	int i;
+
+	acpi_read(&doorbell_value, &doorbell);
+	acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
+		   &doorbell);
+
+	for (i = 0; i < POLL_LOOPS; i++) {
+		if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
+			break;
+	}
+}
+
+static inline void pcc_clear_mapping(void)
+{
+	if (pcch_virt_addr)
+		iounmap(pcch_virt_addr);
+	pcch_virt_addr = NULL;
+}
+
+static unsigned int pcc_get_freq(unsigned int cpu)
+{
+	struct pcc_cpu *pcc_cpu_data;
+	unsigned int curr_freq;
+	unsigned int freq_limit;
+	u16 status;
+	u32 input_buffer;
+	u32 output_buffer;
+
+	spin_lock(&pcc_lock);
+
+	dprintk("get: get_freq for CPU %d\n", cpu);
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	input_buffer = 0x1;
+	iowrite32(input_buffer,
+			(pcch_virt_addr + pcc_cpu_data->input_offset));
+	iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
+
+	pcc_cmd();
+
+	output_buffer =
+		ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
+
+	/* Clear the input buffer - we are done with the current command */
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+	status = ioread16(&pcch_hdr->status);
+	if (status != CMD_COMPLETE) {
+		dprintk("get: FAILED: for CPU %d, status is %d\n",
+			cpu, status);
+		goto cmd_incomplete;
+	}
+	iowrite16(0, &pcch_hdr->status);
+	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
+			/ 100) * 1000);
+
+	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
+		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
+		output_buffer, curr_freq);
+
+	freq_limit = (output_buffer >> 8) & 0xff;
+	if (freq_limit != 0xff) {
+		dprintk("get: frequency for cpu %d is being temporarily"
+			" capped at %d\n", cpu, curr_freq);
+	}
+
+	spin_unlock(&pcc_lock);
+	return curr_freq;
+
+cmd_incomplete:
+	iowrite16(0, &pcch_hdr->status);
+	spin_unlock(&pcc_lock);
+	return -EINVAL;
+}
+
+static int pcc_cpufreq_target(struct cpufreq_policy *policy,
+			      unsigned int target_freq,
+			      unsigned int relation)
+{
+	struct pcc_cpu *pcc_cpu_data;
+	struct cpufreq_freqs freqs;
+	u16 status;
+	u32 input_buffer;
+	int cpu;
+
+	spin_lock(&pcc_lock);
+	cpu = policy->cpu;
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	dprintk("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%x\n",
+		cpu, target_freq,
+		(pcch_virt_addr + pcc_cpu_data->input_offset));
+
+	freqs.new = target_freq;
+	freqs.cpu = cpu;
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	input_buffer = 0x1 | (((target_freq * 100)
+			       / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
+	iowrite32(input_buffer,
+			(pcch_virt_addr + pcc_cpu_data->input_offset));
+	iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
+
+	pcc_cmd();
+
+	/* Clear the input buffer - we are done with the current command */
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+	status = ioread16(&pcch_hdr->status);
+	if (status != CMD_COMPLETE) {
+		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
+			cpu, status);
+		goto cmd_incomplete;
+	}
+	iowrite16(0, &pcch_hdr->status);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
+	spin_unlock(&pcc_lock);
+
+	return 0;
+
+cmd_incomplete:
+	iowrite16(0, &pcch_hdr->status);
+	spin_unlock(&pcc_lock);
+	return -EINVAL;
+}
+
+static int pcc_get_offset(int cpu)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *pccp, *offset;
+	struct pcc_cpu *pcc_cpu_data;
+	struct acpi_processor *pr;
+	int ret = 0;
+
+	pr = per_cpu(processors, cpu);
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	pccp = buffer.pointer;
+	if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
+		ret = -ENODEV;
+		goto out_free;
+	};
+
+	offset = &(pccp->package.elements[0]);
+	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcc_cpu_data->input_offset = offset->integer.value;
+
+	offset = &(pccp->package.elements[1]);
+	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcc_cpu_data->output_offset = offset->integer.value;
+
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
+
+	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
+		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
+		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
+out_free:
+	kfree(buffer.pointer);
+	return ret;
+}
+
+static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object in_params[4];
+	union acpi_object *out_obj;
+	u32 capabilities[2];
+	u32 errors;
+	u32 supported;
+	int ret = 0;
+
+	input.count = 4;
+	input.pointer = in_params;
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type               = ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length      = 16;
+	in_params[0].buffer.pointer     = OSC_UUID;
+	in_params[1].type               = ACPI_TYPE_INTEGER;
+	in_params[1].integer.value      = 1;
+	in_params[2].type               = ACPI_TYPE_INTEGER;
+	in_params[2].integer.value      = 2;
+	in_params[3].type               = ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length      = 8;
+	in_params[3].buffer.pointer     = (u8 *)&capabilities;
+
+	capabilities[0] = OSC_QUERY_ENABLE;
+	capabilities[1] = 0x1;
+
+	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!output.length)
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	supported = *((u32 *)(out_obj->buffer.pointer + 4));
+	if (!(supported & 0x1)) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	kfree(output.pointer);
+	capabilities[0] = 0x0;
+	capabilities[1] = 0x1;
+
+	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!output.length)
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	supported = *((u32 *)(out_obj->buffer.pointer + 4));
+	if (!(supported & 0x1)) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+out_free:
+	kfree(output.pointer);
+	return ret;
+}
+
+static int __init pcc_cpufreq_probe(void)
+{
+	acpi_status status;
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct pcc_memory_resource *mem_resource;
+	struct pcc_register_resource *reg_resource;
+	union acpi_object *out_obj, *member;
+	acpi_handle handle, osc_handle;
+	int ret = 0;
+
+	status = acpi_get_handle(NULL, "\\_SB", &handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	status = acpi_get_handle(handle, "_OSC", &osc_handle);
+	if (ACPI_SUCCESS(status)) {
+		ret = pcc_cpufreq_do_osc(&osc_handle);
+		if (ret)
+			dprintk("probe: _OSC evaluation did not succeed\n");
+		/* Firmware's use of _OSC is optional */
+		ret = 0;
+	}
+
+	status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_PACKAGE) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	member = &out_obj->package.elements[0];
+	if (member->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
+
+	dprintk("probe: mem_resource descriptor: 0x%x,"
+		" length: %d, space_id: %d, resource_usage: %d,"
+		" type_specific: %d, granularity: 0x%llx,"
+		" minimum: 0x%llx, maximum: 0x%llx,"
+		" translation_offset: 0x%llx, address_length: 0x%llx\n",
+		mem_resource->descriptor, mem_resource->length,
+		mem_resource->space_id, mem_resource->resource_usage,
+		mem_resource->type_specific, mem_resource->granularity,
+		mem_resource->minimum, mem_resource->maximum,
+		mem_resource->translation_offset,
+		mem_resource->address_length);
+
+	if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
+					mem_resource->address_length);
+	if (pcch_virt_addr == NULL) {
+		dprintk("probe: could not map shared mem region\n");
+		goto out_free;
+	}
+	pcch_hdr = pcch_virt_addr;
+
+	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	dprintk("probe: PCCH header is at physical address: 0x%llx,"
+		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
+		" supported features: 0x%x, command field: 0x%x,"
+		" status field: 0x%x, nominal latency: %d us\n",
+		mem_resource->minimum, ioread32(&pcch_hdr->signature),
+		ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
+		ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
+		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
+		ioread32(&pcch_hdr->latency));
+
+	dprintk("probe: min time between commands: %d us,"
+		" max time between commands: %d us,"
+		" nominal CPU frequency: %d MHz,"
+		" minimum CPU frequency: %d MHz,"
+		" minimum CPU frequency without throttling: %d MHz\n",
+		ioread32(&pcch_hdr->minimum_time),
+		ioread32(&pcch_hdr->maximum_time),
+		ioread32(&pcch_hdr->nominal),
+		ioread32(&pcch_hdr->throttled_frequency),
+		ioread32(&pcch_hdr->minimum_frequency));
+
+	member = &out_obj->package.elements[1];
+	if (member->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
+
+	doorbell.space_id = reg_resource->space_id;
+	doorbell.bit_width = reg_resource->bit_width;
+	doorbell.bit_offset = reg_resource->bit_offset;
+	doorbell.access_width = 64;
+	doorbell.address = reg_resource->address;
+
+	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
+		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
+		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
+		doorbell.access_width, reg_resource->address);
+
+	member = &out_obj->package.elements[2];
+	if (member->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	doorbell_preserve = member->integer.value;
+
+	member = &out_obj->package.elements[3];
+	if (member->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	doorbell_write = member->integer.value;
+
+	dprintk("probe: doorbell_preserve: 0x%llx,"
+		" doorbell_write: 0x%llx\n",
+		doorbell_preserve, doorbell_write);
+
+	pcc_cpu_info = alloc_percpu(struct pcc_cpu);
+	if (!pcc_cpu_info) {
+		ret = -ENOMEM;
+		goto pcch_free;
+	}
+
+	printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
+	       " limits: %d MHz, %d MHz\n", PCC_VERSION,
+	       ioread32(&pcch_hdr->minimum_frequency),
+	       ioread32(&pcch_hdr->nominal));
+	kfree(output.pointer);
+	return ret;
+pcch_free:
+	pcc_clear_mapping();
+out_free:
+	kfree(output.pointer);
+	return ret;
+}
+
+static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	unsigned int result = 0;
+
+	if (!pcch_virt_addr) {
+		result = -1;
+		goto pcch_null;
+	}
+
+	result = pcc_get_offset(cpu);
+	if (result) {
+		dprintk("init: PCCP evaluation failed\n");
+		goto free;
+	}
+
+	policy->max = policy->cpuinfo.max_freq =
+		ioread32(&pcch_hdr->nominal) * 1000;
+	policy->min = policy->cpuinfo.min_freq =
+		ioread32(&pcch_hdr->minimum_frequency) * 1000;
+	policy->cur = pcc_get_freq(cpu);
+
+	dprintk("init: policy->max is %d, policy->min is %d\n",
+		policy->max, policy->min);
+
+	return 0;
+free:
+	pcc_clear_mapping();
+	free_percpu(pcc_cpu_info);
+pcch_null:
+	return result;
+}
+
+static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static struct cpufreq_driver pcc_cpufreq_driver = {
+	.flags = CPUFREQ_CONST_LOOPS,
+	.get = pcc_get_freq,
+	.verify = pcc_cpufreq_verify,
+	.target = pcc_cpufreq_target,
+	.init = pcc_cpufreq_cpu_init,
+	.exit = pcc_cpufreq_cpu_exit,
+	.name = "pcc-cpufreq",
+	.owner = THIS_MODULE,
+};
+
+static int __init pcc_cpufreq_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	ret = pcc_cpufreq_probe();
+	if (ret) {
+		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
+		return ret;
+	}
+
+	ret = cpufreq_register_driver(&pcc_cpufreq_driver);
+
+	return ret;
+}
+
+static void __exit pcc_cpufreq_exit(void)
+{
+	cpufreq_unregister_driver(&pcc_cpufreq_driver);
+
+	pcc_clear_mapping();
+
+	free_percpu(pcc_cpu_info);
+}
+
+MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
+MODULE_VERSION(PCC_VERSION);
+MODULE_DESCRIPTION("Processor Clocking Control interface driver");
+MODULE_LICENSE("GPL");
+
+late_initcall(pcc_cpufreq_init);
+module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index cb01dac267d..b3379d6a5c5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 #include <linux/ioport.h>
-#include <linux/slab.h>
 #include <linux/timex.h>
 #include <linux/io.h>
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f125e5c551c..d360b56e982 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
 		unsigned int index)
 {
-	acpi_integer control;
+	u64 control;
 
 	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
 		return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
 	struct cpufreq_frequency_table *powernow_table;
 	int ret_val = -ENODEV;
-	acpi_integer control, status;
+	u64 control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		u32 fid;
 		u32 vid;
 		u32 freq, index;
-		acpi_integer status, control;
+		u64 status, control;
 
 		if (data->exttype) {
 			status =  data->acpi_data.states[i].status;
@@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
 
 	kfree(data->powernow_table);
 	kfree(data);
+	per_cpu(powernow_data, pol->cpu) = NULL;
 
 	return 0;
 }
@@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
 	int err;
 
 	if (!data)
-		return -EINVAL;
+		return 0;
 
 	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
 	if (err)
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 8d672ef162c..9b1ff37de46 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>	/* current */
 #include <linux/delay.h>
 #include <linux/compiler.h>
+#include <linux/gfp.h>
 
 #include <asm/msr.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 2ce8e0b5cc5..561758e9518 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -23,7 +23,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 #include <linux/pci.h>
-#include <linux/slab.h>
 #include <linux/sched.h>
 
 #include "speedstep-lib.h"
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index ad0083abfa2..a94ec6be69f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -13,7 +13,6 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
-#include <linux/slab.h>
 
 #include <asm/msr.h>
 #include <asm/tsc.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 04d73c114e4..8abd869baab 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -17,7 +17,6 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
-#include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <asm/ist.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 879666f4d87..7e1cca13af3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -70,7 +70,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		sched_clock_stable = 1;
+		if (!check_tsc_unstable())
+			sched_clock_stable = 1;
 	}
 
 	/*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c2b722d5a72..b3eeb66c0a5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <linux/smp.h>
 #include <asm/k8.h>
+#include <asm/smp.h>
 
 #define LVL_1_INST	1
 #define LVL_1_DATA	2
@@ -152,7 +153,8 @@ struct _cpuid4_info {
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
-	unsigned long can_disable;
+	bool can_disable;
+	unsigned int l3_indices;
 	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 };
 
@@ -162,7 +164,8 @@ struct _cpuid4_info_regs {
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
-	unsigned long can_disable;
+	bool can_disable;
+	unsigned int l3_indices;
 };
 
 unsigned short			num_cache_leaves;
@@ -292,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 
+struct _cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct _cpuid4_info *, char *);
+	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#ifdef CONFIG_CPU_SUP_AMD
+static unsigned int __cpuinit amd_calc_l3_indices(void)
+{
+	/*
+	 * We're called over smp_call_function_single() and therefore
+	 * are on the correct cpu.
+	 */
+	int cpu = smp_processor_id();
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int sc0, sc1, sc2, sc3;
+	u32 val = 0;
+
+	pci_read_config_dword(dev, 0x1C4, &val);
+
+	/* calculate subcache sizes */
+	sc0 = !(val & BIT(0));
+	sc1 = !(val & BIT(4));
+	sc2 = !(val & BIT(8))  + !(val & BIT(9));
+	sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+	return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+}
+
 static void __cpuinit
 amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 {
@@ -301,12 +334,103 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 	if (boot_cpu_data.x86 == 0x11)
 		return;
 
-	/* see erratum #382 */
-	if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+	/* see errata #382 and #388 */
+	if ((boot_cpu_data.x86 == 0x10) &&
+	    ((boot_cpu_data.x86_model < 0x8) ||
+	     (boot_cpu_data.x86_mask  < 0x1)))
 		return;
 
-	this_leaf->can_disable = 1;
+	this_leaf->can_disable = true;
+	this_leaf->l3_indices  = amd_calc_l3_indices();
+}
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+				  unsigned int index)
+{
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = amd_get_nb_id(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int reg = 0;
+
+	if (!this_leaf->can_disable)
+		return -EINVAL;
+
+	if (!dev)
+		return -EINVAL;
+
+	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+	return sprintf(buf, "0x%08x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index)					\
+static ssize_t								\
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)	\
+{									\
+	return show_cache_disable(this_leaf, buf, index);		\
+}
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+	const char *buf, size_t count, unsigned int index)
+{
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = amd_get_nb_id(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned long val = 0;
+
+#define SUBCACHE_MASK	(3UL << 20)
+#define SUBCACHE_INDEX	0xfff
+
+	if (!this_leaf->can_disable)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (strict_strtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	/* do not allow writes outside of allowed bits */
+	if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
+	    ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
+		return -EINVAL;
+
+	val |= BIT(30);
+	pci_write_config_dword(dev, 0x1BC + index * 4, val);
+	/*
+	 * We need to WBINVD on a core on the node containing the L3 cache which
+	 * indices we disable therefore a simple wbinvd() is not sufficient.
+	 */
+	wbinvd_on_cpu(cpu);
+	pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
+	return count;
+}
+
+#define STORE_CACHE_DISABLE(index)					\
+static ssize_t								\
+store_cache_disable_##index(struct _cpuid4_info *this_leaf,		\
+			    const char *buf, size_t count)		\
+{									\
+	return store_cache_disable(this_leaf, buf, count, index);	\
 }
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
+
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+		show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+		show_cache_disable_1, store_cache_disable_1);
+
+#else	/* CONFIG_CPU_SUP_AMD */
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
+{
+};
+#endif /* CONFIG_CPU_SUP_AMD */
 
 static int
 __cpuinit cpuid4_cache_lookup_regs(int index,
@@ -713,82 +837,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
-				  unsigned int index)
-{
-	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
-	int node = cpu_to_node(cpu);
-	struct pci_dev *dev = node_to_k8_nb_misc(node);
-	unsigned int reg = 0;
-
-	if (!this_leaf->can_disable)
-		return -EINVAL;
-
-	if (!dev)
-		return -EINVAL;
-
-	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
-	return sprintf(buf, "%x\n", reg);
-}
-
-#define SHOW_CACHE_DISABLE(index)					\
-static ssize_t								\
-show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)  	\
-{									\
-	return show_cache_disable(this_leaf, buf, index);		\
-}
-SHOW_CACHE_DISABLE(0)
-SHOW_CACHE_DISABLE(1)
-
-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
-	const char *buf, size_t count, unsigned int index)
-{
-	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
-	int node = cpu_to_node(cpu);
-	struct pci_dev *dev = node_to_k8_nb_misc(node);
-	unsigned long val = 0;
-	unsigned int scrubber = 0;
-
-	if (!this_leaf->can_disable)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (!dev)
-		return -EINVAL;
-
-	if (strict_strtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	val |= 0xc0000000;
-
-	pci_read_config_dword(dev, 0x58, &scrubber);
-	scrubber &= ~0x1f000000;
-	pci_write_config_dword(dev, 0x58, scrubber);
-
-	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
-	wbinvd();
-	pci_write_config_dword(dev, 0x1BC + index * 4, val);
-	return count;
-}
-
-#define STORE_CACHE_DISABLE(index)					\
-static ssize_t								\
-store_cache_disable_##index(struct _cpuid4_info *this_leaf,	     	\
-			    const char *buf, size_t count)		\
-{									\
-	return store_cache_disable(this_leaf, buf, count, index);	\
-}
-STORE_CACHE_DISABLE(0)
-STORE_CACHE_DISABLE(1)
-
-struct _cache_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct _cpuid4_info *, char *);
-	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
-};
-
 #define define_one_ro(_name) \
 static struct _cache_attr _name = \
 	__ATTR(_name, 0444, show_##_name, NULL)
@@ -803,23 +851,28 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
-		show_cache_disable_0, store_cache_disable_0);
-static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
-		show_cache_disable_1, store_cache_disable_1);
+#define DEFAULT_SYSFS_CACHE_ATTRS	\
+	&type.attr,			\
+	&level.attr,			\
+	&coherency_line_size.attr,	\
+	&physical_line_partition.attr,	\
+	&ways_of_associativity.attr,	\
+	&number_of_sets.attr,		\
+	&size.attr,			\
+	&shared_cpu_map.attr,		\
+	&shared_cpu_list.attr
 
 static struct attribute *default_attrs[] = {
-	&type.attr,
-	&level.attr,
-	&coherency_line_size.attr,
-	&physical_line_partition.attr,
-	&ways_of_associativity.attr,
-	&number_of_sets.attr,
-	&size.attr,
-	&shared_cpu_map.attr,
-	&shared_cpu_list.attr,
+	DEFAULT_SYSFS_CACHE_ATTRS,
+	NULL
+};
+
+static struct attribute *default_l3_attrs[] = {
+	DEFAULT_SYSFS_CACHE_ATTRS,
+#ifdef CONFIG_CPU_SUP_AMD
 	&cache_disable_0.attr,
 	&cache_disable_1.attr,
+#endif
 	NULL
 };
 
@@ -850,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show   = show,
 	.store  = store,
 };
@@ -910,6 +963,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 	unsigned int cpu = sys_dev->id;
 	unsigned long i, j;
 	struct _index_kobject *this_object;
+	struct _cpuid4_info   *this_leaf;
 	int retval;
 
 	retval = cpuid4_cache_sysfs_init(cpu);
@@ -928,6 +982,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 		this_object = INDEX_KOBJECT_PTR(cpu, i);
 		this_object->cpu = cpu;
 		this_object->index = i;
+
+		this_leaf = CPUID4_INFO_IDX(cpu, i);
+
+		if (this_leaf->can_disable)
+			ktype_cache.default_attrs = default_l3_attrs;
+		else
+			ktype_cache.default_attrs = default_attrs;
+
 		retval = kobject_init_and_add(&(this_object->kobj),
 					      &ktype_cache,
 					      per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 73734baa50f..e7dbde7bfed 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -22,6 +22,7 @@
 #include <linux/kdebug.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/gfp.h>
 #include <asm/mce.h>
 #include <asm/apic.h>
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a8aacd4b513..7a355ddcc64 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/poll.h>
@@ -46,6 +47,13 @@
 
 #include "mce-internal.h"
 
+static DEFINE_MUTEX(mce_read_mutex);
+
+#define rcu_dereference_check_mce(p) \
+	rcu_dereference_check((p), \
+			      rcu_read_lock_sched_held() || \
+			      lockdep_is_held(&mce_read_mutex))
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
@@ -158,7 +166,7 @@ void mce_log(struct mce *mce)
 	mce->finished = 0;
 	wmb();
 	for (;;) {
-		entry = rcu_dereference(mcelog.next);
+		entry = rcu_dereference_check_mce(mcelog.next);
 		for (;;) {
 			/*
 			 * When the buffer fills up discard new entries.
@@ -531,7 +539,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	struct mce m;
 	int i;
 
-	__get_cpu_var(mce_poll_count)++;
+	percpu_inc(mce_poll_count);
 
 	mce_setup(&m);
 
@@ -926,7 +934,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	atomic_inc(&mce_entry);
 
-	__get_cpu_var(mce_exception_count)++;
+	percpu_inc(mce_exception_count);
 
 	if (notify_die(DIE_NMI, "machine check", regs, error_code,
 			   18, SIGKILL) == NOTIFY_STOP)
@@ -1485,8 +1493,6 @@ static void collect_tscs(void *data)
 	rdtscll(cpu_tsc[smp_processor_id()]);
 }
 
-static DEFINE_MUTEX(mce_read_mutex);
-
 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 			loff_t *off)
 {
@@ -1500,7 +1506,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
 		return -ENOMEM;
 
 	mutex_lock(&mce_read_mutex);
-	next = rcu_dereference(mcelog.next);
+	next = rcu_dereference_check_mce(mcelog.next);
 
 	/* Only supports full reads right now */
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@ -1565,7 +1571,7 @@ timeout:
 static unsigned int mce_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &mce_wait, wait);
-	if (rcu_dereference(mcelog.next))
+	if (rcu_dereference_check_mce(mcelog.next))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -2044,6 +2050,7 @@ static __init void mce_init_banks(void)
 		struct mce_bank *b = &mce_banks[i];
 		struct sysdev_attribute *a = &b->attr;
 
+		sysfs_attr_init(&a->attr);
 		a->attr.name	= b->attrname;
 		snprintf(b->attrname, ATTR_LEN, "bank%d", i);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efc..224392d8fe8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -21,6 +21,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sysfs.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
@@ -388,7 +389,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops threshold_ops = {
+static const struct sysfs_ops threshold_ops = {
 	.show			= show,
 	.store			= store,
 };
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 7c785634af2..62b48e40920 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -5,6 +5,7 @@
  * Author: Andi Kleen
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
@@ -95,7 +96,7 @@ static void cmci_discover(int banks, int boot)
 
 		/* Already owned by someone else? */
 		if (val & CMCI_EN) {
-			if (test_and_clear_bit(i, owned) || boot)
+			if (test_and_clear_bit(i, owned) && !boot)
 				print_update("SHD", &hdr, i);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 			continue;
@@ -107,7 +108,7 @@ static void cmci_discover(int banks, int boot)
 
 		/* Did the enable bit stick? -- the bank supports CMCI */
 		if (val & CMCI_EN) {
-			if (!test_and_set_bit(i, owned) || boot)
+			if (!test_and_set_bit(i, owned) && !boot)
 				print_update("CMCI", &hdr, i);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 		} else {
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b56f8e..ad9e5ed8118 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y		:= main.o if.o generic.o state.o cleanup.o
+obj-y		:= main.o if.o generic.o cleanup.o
 obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
 
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 33af14110df..92ba9cd31c9 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
 	return 0;
 }
 
-static struct mtrr_ops amd_mtrr_ops = {
+static const struct mtrr_ops amd_mtrr_ops = {
 	.vendor            = X86_VENDOR_AMD,
 	.set               = amd_set_mtrr,
 	.get               = amd_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index de89f14eff3..316fe3e60a9 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
 	return 0;
 }
 
-static struct mtrr_ops centaur_mtrr_ops = {
+static const struct mtrr_ops centaur_mtrr_ops = {
 	.vendor            = X86_VENDOR_CENTAUR,
 	.set               = centaur_set_mcr,
 	.get               = centaur_get_mcr,
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e046..06130b52f01 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
 #include <linux/pci.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
-#include <linux/sort.h>
 #include <linux/mutex.h>
 #include <linux/uaccess.h>
 #include <linux/kvm_para.h>
+#include <linux/range.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
@@ -34,11 +34,6 @@
 
 #include "mtrr.h"
 
-struct res_range {
-	unsigned long	start;
-	unsigned long	end;
-};
-
 struct var_mtrr_range_state {
 	unsigned long	base_pfn;
 	unsigned long	size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
 /* Should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM				256
 
-static struct res_range __initdata		range[RANGE_NUM];
+static struct range __initdata		range[RANGE_NUM];
 static int __initdata				nr_range;
 
 static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
 static int __initdata debug_print;
 #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
 
-
-static int __init
-add_range(struct res_range *range, int nr_range,
-	  unsigned long start, unsigned long end)
-{
-	/* Out of slots: */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-}
-
-static int __init
-add_range_with_merge(struct res_range *range, int nr_range,
-		     unsigned long start, unsigned long end)
-{
-	int i;
-
-	/* Try to merge it with old one: */
-	for (i = 0; i < nr_range; i++) {
-		unsigned long final_start, final_end;
-		unsigned long common_start, common_end;
-
-		if (!range[i].end)
-			continue;
-
-		common_start = max(range[i].start, start);
-		common_end = min(range[i].end, end);
-		if (common_start > common_end + 1)
-			continue;
-
-		final_start = min(range[i].start, start);
-		final_end = max(range[i].end, end);
-
-		range[i].start = final_start;
-		range[i].end =  final_end;
-		return nr_range;
-	}
-
-	/* Need to add it: */
-	return add_range(range, nr_range, start, end);
-}
-
-static void __init
-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
-{
-	int i, j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end &&
-		    range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end &&
-		    range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* Find the new spare: */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
-static int __init cmp_range(const void *x1, const void *x2)
-{
-	const struct res_range *r1 = x1;
-	const struct res_range *r2 = x2;
-	long start1, start2;
-
-	start1 = r1->start;
-	start2 = r2->start;
-
-	return start1 - start2;
-}
-
-static int __init clean_sort_range(struct res_range *range, int az)
-{
-	int i, j, k = az - 1, nr_range = 0;
-
-	for (i = 0; i < k; i++) {
-		if (range[i].end)
-			continue;
-		for (j = k; j > i; j--) {
-			if (range[j].end) {
-				k = j;
-				break;
-			}
-		}
-		if (j == i)
-			break;
-		range[i].start = range[k].start;
-		range[i].end   = range[k].end;
-		range[k].start = 0;
-		range[k].end   = 0;
-		k--;
-	}
-	/* count it */
-	for (i = 0; i < az; i++) {
-		if (!range[i].end) {
-			nr_range = i;
-			break;
-		}
-	}
-
-	/* sort them */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
-	return nr_range;
-}
-
 #define BIOS_BUG_MSG KERN_WARNING \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+x86_get_mtrr_mem_range(struct range *range, int nr_range,
 		       unsigned long extra_remove_base,
 		       unsigned long extra_remove_size)
 {
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			continue;
 		base = range_state[i].base_pfn;
 		size = range_state[i].size_pfn;
-		nr_range = add_range_with_merge(range, nr_range, base,
-						base + size - 1);
+		nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
+						base, base + size);
 	}
 	if (debug_print) {
 		printk(KERN_DEBUG "After WB checking\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+				 range[i].start, range[i].end);
 	}
 
 	/* Take out UC ranges: */
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			size -= (1<<(20-PAGE_SHIFT)) - base;
 			base = 1<<(20-PAGE_SHIFT);
 		}
-		subtract_range(range, base, base + size - 1);
+		subtract_range(range, RANGE_NUM, base, base + size);
 	}
 	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,
-				 extra_remove_base + extra_remove_size  - 1);
+		subtract_range(range, RANGE_NUM, extra_remove_base,
+				 extra_remove_base + extra_remove_size);
 
 	if  (debug_print) {
 		printk(KERN_DEBUG "After UC checking\n");
 		for (i = 0; i < RANGE_NUM; i++) {
 			if (!range[i].end)
 				continue;
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+				 range[i].start, range[i].end);
 		}
 	}
 
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 	if  (debug_print) {
 		printk(KERN_DEBUG "After sorting\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
-				 range[i].start, range[i].end + 1);
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+				 range[i].start, range[i].end);
 	}
 
-	/* clear those is not used */
-	for (i = nr_range; i < RANGE_NUM; i++)
-		memset(&range[i], 0, sizeof(range[i]));
-
 	return nr_range;
 }
 
 #ifdef CONFIG_MTRR_SANITIZER
 
-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+static unsigned long __init sum_ranges(struct range *range, int nr_range)
 {
 	unsigned long sum = 0;
 	int i;
 
 	for (i = 0; i < nr_range; i++)
-		sum += range[i].end + 1 - range[i].start;
+		sum += range[i].end - range[i].start;
 
 	return sum;
 }
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
 
 static int __init
-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+x86_setup_var_mtrrs(struct range *range, int nr_range,
 		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 	/* Write the range: */
 	for (i = 0; i < nr_range; i++) {
 		set_var_mtrr_range(&var_state, range[i].start,
-				   range[i].end - range[i].start + 1);
+				   range[i].end - range[i].start);
 	}
 
 	/* Write the last range: */
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
 		      unsigned long x_remove_base,
 		      unsigned long x_remove_size, int i)
 {
-	static struct res_range range_new[RANGE_NUM];
+	static struct range range_new[RANGE_NUM];
 	unsigned long range_sums_new;
 	static int nr_range_new;
 	int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
 	 * [0, 1M) should always be covered by var mtrr with WB
 	 * and fixed mtrrs should take effect before var mtrr for it:
 	 */
-	nr_range = add_range_with_merge(range, nr_range, 0,
-					(1ULL<<(20 - PAGE_SHIFT)) - 1);
+	nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+					1ULL<<(20 - PAGE_SHIFT));
 	/* Sort the ranges: */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	sort_range(range, nr_range);
 
 	range_sums = sum_ranges(range, nr_range);
 	printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 	nr_range = 0;
 	if (mtrr_tom2) {
 		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
-		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
-		if (highest_pfn < range[nr_range].end + 1)
-			highest_pfn = range[nr_range].end + 1;
+		range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
+		if (highest_pfn < range[nr_range].end)
+			highest_pfn = range[nr_range].end;
 		nr_range++;
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
 	/* Check the holes: */
 	for (i = 0; i < nr_range - 1; i++) {
-		if (range[i].end + 1 < range[i+1].start)
-			total_trim_size += real_trim_memory(range[i].end + 1,
+		if (range[i].end < range[i+1].start)
+			total_trim_size += real_trim_memory(range[i].end,
 							    range[i+1].start);
 	}
 
 	/* Check the top: */
 	i = nr_range - 1;
-	if (range[i].end + 1 < end_pfn)
-		total_trim_size += real_trim_memory(range[i].end + 1,
+	if (range[i].end < end_pfn)
+		total_trim_size += real_trim_memory(range[i].end,
 							 end_pfn);
 
 	if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 228d982ce09..68a3343e579 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -265,7 +265,7 @@ static void cyrix_set_all(void)
 	post_set();
 }
 
-static struct mtrr_ops cyrix_mtrr_ops = {
+static const struct mtrr_ops cyrix_mtrr_ops = {
 	.vendor            = X86_VENDOR_CYRIX,
 	.set_all	   = cyrix_set_all,
 	.set               = cyrix_set_arr,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55da0c5f68d..fd31a441c61 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -6,7 +6,6 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 
@@ -464,7 +463,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 		tmp |= ~((1<<(hi - 1)) - 1);
 
 		if (tmp != mask_lo) {
-			WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
+			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
 			mask_lo = tmp;
 		}
 	}
@@ -570,7 +569,7 @@ static unsigned long set_mtrr_state(void)
 
 
 static unsigned long cr4;
-static DEFINE_SPINLOCK(set_atomicity_lock);
+static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
 
 /*
  * Since we are disabling the cache don't allow any interrupts,
@@ -590,7 +589,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	 * changes to the way the kernel boots
 	 */
 
-	spin_lock(&set_atomicity_lock);
+	raw_spin_lock(&set_atomicity_lock);
 
 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | X86_CR0_CD;
@@ -627,7 +626,7 @@ static void post_set(void) __releases(set_atomicity_lock)
 	/* Restore value of CR4 */
 	if (cpu_has_pge)
 		write_cr4(cr4);
-	spin_unlock(&set_atomicity_lock);
+	raw_spin_unlock(&set_atomicity_lock);
 }
 
 static void generic_set_all(void)
@@ -752,7 +751,7 @@ int positive_have_wrcomb(void)
 /*
  * Generic structure...
  */
-struct mtrr_ops generic_mtrr_ops = {
+const struct mtrr_ops generic_mtrr_ops = {
 	.use_intel_if		= 1,
 	.set_all		= generic_set_all,
 	.get			= generic_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index e006e56f699..79289632cb2 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 
 #define LINE_SIZE 80
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 84e83de5457..79556bd9b60 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex);
 u64 size_or_mask, size_and_mask;
 static bool mtrr_aps_delayed_init;
 
-static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
+static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
 
-struct mtrr_ops *mtrr_if;
+const struct mtrr_ops *mtrr_if;
 
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
-void set_mtrr_ops(struct mtrr_ops *ops)
+void set_mtrr_ops(const struct mtrr_ops *ops)
 {
 	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
 		mtrr_ops[ops->vendor] = ops;
@@ -145,6 +145,7 @@ struct set_mtrr_data {
 
 /**
  * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ * @info: pointer to mtrr configuration data
  *
  * Returns nothing.
  */
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a501dee9a87..df5e41f31a2 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size,
 extern int generic_validate_add_page(unsigned long base, unsigned long size,
 				     unsigned int type);
 
-extern struct mtrr_ops generic_mtrr_ops;
+extern const struct mtrr_ops generic_mtrr_ops;
 
 extern int positive_have_wrcomb(void);
 
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
-extern void set_mtrr_ops(struct mtrr_ops *ops);
+extern void set_mtrr_ops(const struct mtrr_ops *ops);
 
 extern u64 size_or_mask, size_and_mask;
-extern struct mtrr_ops *mtrr_if;
+extern const struct mtrr_ops *mtrr_if;
 
 #define is_cpu(vnd)	(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
 #define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
deleted file mode 100644
index dfc80b4e6b0..00000000000
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,94 +0,0 @@
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/mm.h>
-
-#include <asm/processor-cyrix.h>
-#include <asm/processor-flags.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-
-#include "mtrr.h"
-
-/* Put the processor into a state where MTRRs can be safely set */
-void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
-{
-	unsigned int cr0;
-
-	/* Disable interrupts locally */
-	local_irq_save(ctxt->flags);
-
-	if (use_intel() || is_cpu(CYRIX)) {
-
-		/* Save value of CR4 and clear Page Global Enable (bit 7) */
-		if (cpu_has_pge) {
-			ctxt->cr4val = read_cr4();
-			write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
-		}
-
-		/*
-		 * Disable and flush caches. Note that wbinvd flushes the TLBs
-		 * as a side-effect
-		 */
-		cr0 = read_cr0() | X86_CR0_CD;
-		wbinvd();
-		write_cr0(cr0);
-		wbinvd();
-
-		if (use_intel()) {
-			/* Save MTRR state */
-			rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
-		} else {
-			/*
-			 * Cyrix ARRs -
-			 * everything else were excluded at the top
-			 */
-			ctxt->ccr3 = getCx86(CX86_CCR3);
-		}
-	}
-}
-
-void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
-{
-	if (use_intel()) {
-		/* Disable MTRRs, and set the default type to uncached */
-		mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
-		      ctxt->deftype_hi);
-	} else {
-		if (is_cpu(CYRIX)) {
-			/* Cyrix ARRs - everything else were excluded at the top */
-			setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
-		}
-	}
-}
-
-/* Restore the processor after a set_mtrr_prepare */
-void set_mtrr_done(struct set_mtrr_context *ctxt)
-{
-	if (use_intel() || is_cpu(CYRIX)) {
-
-		/* Flush caches and TLBs */
-		wbinvd();
-
-		/* Restore MTRRdefType */
-		if (use_intel()) {
-			/* Intel (P6) standard MTRRs */
-			mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
-				   ctxt->deftype_hi);
-		} else {
-			/*
-			 * Cyrix ARRs -
-			 * everything else was excluded at the top
-			 */
-			setCx86(CX86_CCR3, ctxt->ccr3);
-		}
-
-		/* Enable caches */
-		write_cr0(read_cr0() & 0xbfffffff);
-
-		/* Restore value of CR4 */
-		if (cpu_has_pge)
-			write_cr4(ctxt->cr4val);
-	}
-	/* Re-enable interrupts locally (if enabled previously) */
-	local_irq_restore(ctxt->flags);
-}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c223b7e895d..db5bdc8addf 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -20,12 +21,15 @@
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/cpu.h>
+#include <linux/bitops.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
+#include <asm/compat.h>
 
 static u64 perf_event_mask __read_mostly;
 
@@ -68,26 +72,59 @@ struct debug_store {
 	u64	pebs_event_reset[MAX_PEBS_EVENTS];
 };
 
+struct event_constraint {
+	union {
+		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+		u64		idxmsk64;
+	};
+	u64	code;
+	u64	cmask;
+	int	weight;
+};
+
+struct amd_nb {
+	int nb_id;  /* NorthBridge id */
+	int refcnt; /* reference count */
+	struct perf_event *owners[X86_PMC_IDX_MAX];
+	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
 struct cpu_hw_events {
-	struct perf_event	*events[X86_PMC_IDX_MAX];
-	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	int			enabled;
 	struct debug_store	*ds;
-};
 
-struct event_constraint {
-	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int		code;
+	int			n_events;
+	int			n_added;
+	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+	u64			tags[X86_PMC_IDX_MAX];
+	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+	struct amd_nb		*amd_nb;
 };
 
-#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
-#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
+	{ .idxmsk64 = (n) },		\
+	.code = (c),			\
+	.cmask = (m),			\
+	.weight = (w),			\
+}
+
+#define EVENT_CONSTRAINT(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
 
-#define for_each_event_constraint(e, c) \
-	for ((e) = (c); (e)->idxmsk[0]; (e)++)
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
+
+#define EVENT_CONSTRAINT_END		\
+	EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c)	\
+	for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu
@@ -98,8 +135,8 @@ struct x86_pmu {
 	int		(*handle_irq)(struct pt_regs *);
 	void		(*disable_all)(void);
 	void		(*enable_all)(void);
-	void		(*enable)(struct hw_perf_event *, int);
-	void		(*disable)(struct hw_perf_event *, int);
+	void		(*enable)(struct perf_event *);
+	void		(*disable)(struct perf_event *);
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
@@ -114,121 +151,28 @@ struct x86_pmu {
 	u64		intel_ctrl;
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
-};
 
-static struct x86_pmu x86_pmu __read_mostly;
+	struct event_constraint *
+			(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
 
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
-	.enabled = 1,
-};
+	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
+	struct event_constraint *event_constraints;
 
-static const struct event_constraint *event_constraints;
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-	return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Event setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_EVENT			0x0000002EULL
-
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_REG_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
-static const struct event_constraint intel_p6_event_constraints[] =
-{
-	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
-	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
-	EVENT_CONSTRAINT_END
-};
-
-/*
- * Intel PerfMon v3. Used on Core2 and later.
- */
-static const u64 intel_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+	int		(*cpu_prepare)(int cpu);
+	void		(*cpu_starting)(int cpu);
+	void		(*cpu_dying)(int cpu);
+	void		(*cpu_dead)(int cpu);
 };
 
-static const struct event_constraint intel_core_event_constraints[] =
-{
-	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
-	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
-	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
-	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
-	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
-	EVENT_CONSTRAINT_END
-};
+static struct x86_pmu x86_pmu __read_mostly;
 
-static const struct event_constraint intel_nehalem_event_constraints[] =
-{
-	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
-	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
-	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
-	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
-	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
-	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
-	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
-	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
-	EVENT_CONSTRAINT_END
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.enabled = 1,
 };
 
-static u64 intel_pmu_event_map(int hw_event)
-{
-	return intel_perfmon_event_map[hw_event];
-}
+static int x86_perf_event_set_period(struct perf_event *event);
 
 /*
  * Generalized hw caching related hw_event table, filled
@@ -245,435 +189,18 @@ static u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
-static __initconst u64 nehalem_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static __initconst u64 core2_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static __initconst u64 atom_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(CORE_EVNTSEL_EVENT_MASK |	\
-	 CORE_EVNTSEL_UNIT_MASK  |	\
-	 CORE_EVNTSEL_EDGE_MASK  |	\
-	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_REG_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
-static __initconst u64 amd_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-	return amd_perfmon_event_map[hw_event];
-}
-
-static u64 amd_pmu_raw_event(u64 hw_event)
-{
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_REG_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
-}
-
 /*
  * Propagate event elapsed time into the generic event.
  * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
 static u64
-x86_perf_event_update(struct perf_event *event,
-			struct hw_perf_event *hwc, int idx)
+x86_perf_event_update(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	int shift = 64 - x86_pmu.event_bits;
 	u64 prev_raw_count, new_raw_count;
+	int idx = hwc->idx;
 	s64 delta;
 
 	if (idx == X86_PMC_IDX_FIXED_BTS)
@@ -773,7 +300,7 @@ static inline bool bts_available(void)
 	return x86_pmu.enable_bts != NULL;
 }
 
-static inline void init_debug_store_on_cpu(int cpu)
+static void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
@@ -785,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu)
 		     (u32)((u64)(unsigned long)ds >> 32));
 }
 
-static inline void fini_debug_store_on_cpu(int cpu)
+static void fini_debug_store_on_cpu(int cpu)
 {
 	if (!per_cpu(cpu_hw_events, cpu).ds)
 		return;
@@ -914,42 +441,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
 	return 0;
 }
 
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -988,6 +479,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
 	hwc->idx = -1;
+	hwc->last_cpu = -1;
+	hwc->last_tag = ~0ULL;
 
 	/*
 	 * Count user and OS events unless requested not to.
@@ -1017,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
+		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
+		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		return 0;
 	}
 
@@ -1056,216 +552,314 @@ static int __hw_perf_event_init(struct perf_event *event)
 	return 0;
 }
 
-static void p6_pmu_disable_all(void)
+static void x86_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
+	int idx;
 
-	cpuc->enabled = 0;
-	barrier();
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		u64 val;
 
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(x86_pmu.eventsel + idx, val);
+		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+		wrmsrl(x86_pmu.eventsel + idx, val);
+	}
 }
 
-static void intel_pmu_disable_all(void)
+void hw_perf_disable(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+	if (!x86_pmu_initialized())
+		return;
+
 	if (!cpuc->enabled)
 		return;
 
+	cpuc->n_added = 0;
 	cpuc->enabled = 0;
 	barrier();
 
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-		intel_pmu_disable_bts();
+	x86_pmu.disable_all();
 }
 
-static void amd_pmu_disable_all(void)
+static void x86_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * events proper, so that amd_pmu_enable_event() does the
-	 * right thing.
-	 */
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
-		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
-			continue;
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+
+		val = event->hw.config;
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+		wrmsrl(x86_pmu.eventsel + idx, val);
 	}
 }
 
-void hw_perf_disable(void)
+static const struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
 {
-	if (!x86_pmu_initialized())
-		return;
-	return x86_pmu.disable_all();
+	return event->pmu == &pmu;
 }
 
-static void p6_pmu_enable_all(void)
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	unsigned long val;
+	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	int i, j, w, wmax, num = 0;
+	struct hw_perf_event *hwc;
 
-	if (cpuc->enabled)
-		return;
+	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-	cpuc->enabled = 1;
-	barrier();
+	for (i = 0; i < n; i++) {
+		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+		constraints[i] = c;
+	}
 
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
+	/*
+	 * fastpath, try to reuse previous register
+	 */
+	for (i = 0; i < n; i++) {
+		hwc = &cpuc->event_list[i]->hw;
+		c = constraints[i];
 
-static void intel_pmu_enable_all(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+		/* never assigned */
+		if (hwc->idx == -1)
+			break;
 
-	if (cpuc->enabled)
-		return;
+		/* constraint still honored */
+		if (!test_bit(hwc->idx, c->idxmsk))
+			break;
 
-	cpuc->enabled = 1;
-	barrier();
+		/* not already used */
+		if (test_bit(hwc->idx, used_mask))
+			break;
 
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+		__set_bit(hwc->idx, used_mask);
+		if (assign)
+			assign[i] = hwc->idx;
+	}
+	if (i == n)
+		goto done;
 
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_event *event =
-			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	/*
+	 * begin slow path
+	 */
 
-		if (WARN_ON_ONCE(!event))
-			return;
+	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-		intel_pmu_enable_bts(event->hw.config);
-	}
-}
+	/*
+	 * weight = number of possible counters
+	 *
+	 * 1    = most constrained, only works on one counter
+	 * wmax = least constrained, works on any counter
+	 *
+	 * assign events to counters starting with most
+	 * constrained events.
+	 */
+	wmax = x86_pmu.num_events;
 
-static void amd_pmu_enable_all(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	int idx;
+	/*
+	 * when fixed event counters are present,
+	 * wmax is incremented by 1 to account
+	 * for one more choice
+	 */
+	if (x86_pmu.num_events_fixed)
+		wmax++;
 
-	if (cpuc->enabled)
-		return;
+	for (w = 1, num = n; num && w <= wmax; w++) {
+		/* for each event */
+		for (i = 0; num && i < n; i++) {
+			c = constraints[i];
+			hwc = &cpuc->event_list[i]->hw;
 
-	cpuc->enabled = 1;
-	barrier();
+			if (c->weight != w)
+				continue;
 
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
-		struct perf_event *event = cpuc->events[idx];
-		u64 val;
+			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+				if (!test_bit(j, used_mask))
+					break;
+			}
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
+			if (j == X86_PMC_IDX_MAX)
+				break;
 
-		val = event->hw.config;
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+			__set_bit(j, used_mask);
+
+			if (assign)
+				assign[i] = j;
+			num--;
+		}
 	}
+done:
+	/*
+	 * scheduling failed or is just a simulation,
+	 * free resources if necessary
+	 */
+	if (!assign || num) {
+		for (i = 0; i < n; i++) {
+			if (x86_pmu.put_event_constraints)
+				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+		}
+	}
+	return num ? -ENOSPC : 0;
 }
 
-void hw_perf_enable(void)
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
 {
-	if (!x86_pmu_initialized())
-		return;
-	x86_pmu.enable_all();
-}
+	struct perf_event *event;
+	int n, max_count;
 
-static inline u64 intel_pmu_get_status(void)
-{
-	u64 status;
+	max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+	/* current number of events already accepted */
+	n = cpuc->n_events;
 
-	return status;
-}
+	if (is_x86_event(leader)) {
+		if (n >= max_count)
+			return -ENOSPC;
+		cpuc->event_list[n] = leader;
+		n++;
+	}
+	if (!dogrp)
+		return n;
 
-static inline void intel_pmu_ack_status(u64 ack)
-{
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
+	list_for_each_entry(event, &leader->sibling_list, group_entry) {
+		if (!is_x86_event(event) ||
+		    event->state <= PERF_EVENT_STATE_OFF)
+			continue;
 
-static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx,
-			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
+		if (n >= max_count)
+			return -ENOSPC;
 
-static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
+		cpuc->event_list[n] = event;
+		n++;
+	}
+	return n;
 }
 
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
+static inline void x86_assign_hw_event(struct perf_event *event,
+				struct cpu_hw_events *cpuc, int i)
 {
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, mask;
+	struct hw_perf_event *hwc = &event->hw;
 
-	mask = 0xfULL << (idx * 4);
+	hwc->idx = cpuc->assign[i];
+	hwc->last_cpu = smp_processor_id();
+	hwc->last_tag = ++cpuc->tags[i];
 
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+		hwc->config_base = 0;
+		hwc->event_base	= 0;
+	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->event_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+	} else {
+		hwc->config_base = x86_pmu.eventsel;
+		hwc->event_base  = x86_pmu.perfctr;
+	}
 }
 
-static inline void
-p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+					struct cpu_hw_events *cpuc,
+					int i)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val = P6_NOP_EVENT;
-
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
+	return hwc->idx == cpuc->assign[i] &&
+		hwc->last_cpu == smp_processor_id() &&
+		hwc->last_tag == cpuc->tags[i];
 }
 
-static inline void
-intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static int x86_pmu_start(struct perf_event *event);
+static void x86_pmu_stop(struct perf_event *event);
+
+void hw_perf_enable(void)
 {
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		intel_pmu_disable_bts();
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int i;
+
+	if (!x86_pmu_initialized())
 		return;
-	}
 
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_disable_fixed(hwc, idx);
+	if (cpuc->enabled)
 		return;
+
+	if (cpuc->n_added) {
+		int n_running = cpuc->n_events - cpuc->n_added;
+		/*
+		 * apply assignment obtained either from
+		 * hw_perf_group_sched_in() or x86_pmu_enable()
+		 *
+		 * step1: save events moving to new counters
+		 * step2: reprogram moved events into new counters
+		 */
+		for (i = 0; i < n_running; i++) {
+			event = cpuc->event_list[i];
+			hwc = &event->hw;
+
+			/*
+			 * we can avoid reprogramming counter if:
+			 * - assigned same counter as last time
+			 * - running on same CPU as last time
+			 * - no other event has used the counter since
+			 */
+			if (hwc->idx == -1 ||
+			    match_prev_assignment(hwc, cpuc, i))
+				continue;
+
+			x86_pmu_stop(event);
+		}
+
+		for (i = 0; i < cpuc->n_events; i++) {
+			event = cpuc->event_list[i];
+			hwc = &event->hw;
+
+			if (!match_prev_assignment(hwc, cpuc, i))
+				x86_assign_hw_event(event, cpuc, i);
+			else if (i < n_running)
+				continue;
+
+			x86_pmu_start(event);
+		}
+		cpuc->n_added = 0;
+		perf_events_lapic_init();
 	}
 
-	x86_pmu_disable_event(hwc, idx);
+	cpuc->enabled = 1;
+	barrier();
+
+	x86_pmu.enable_all();
 }
 
-static inline void
-amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
 {
-	x86_pmu_disable_event(hwc, idx);
+	(void)checking_wrmsrl(hwc->config_base + hwc->idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+static inline void x86_pmu_disable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	(void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
 }
 
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1275,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
  * To be called with the event disabled in hw:
  */
 static int
-x86_perf_event_set_period(struct perf_event *event,
-			     struct hw_perf_event *hwc, int idx)
+x86_perf_event_set_period(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	s64 left = atomic64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
-	int err, ret = 0;
+	int err, ret = 0, idx = hwc->idx;
 
 	if (idx == X86_PMC_IDX_FIXED_BTS)
 		return 0;
@@ -1326,212 +920,63 @@ x86_perf_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
-	int err;
-
-	/*
-	 * Enable IRQ generation (0x8),
-	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-	 * if requested:
-	 */
-	bits = 0x8ULL;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-		bits |= 0x2;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-		bits |= 0x1;
-	bits <<= (idx * 4);
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	err = checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void x86_pmu_enable_event(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val;
-
-	val = hwc->config;
 	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
+		__x86_pmu_enable_event(&event->hw);
 }
 
-
-static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_events).enabled)
-			return;
-
-		intel_pmu_enable_bts(hwc->config);
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_enable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_enable_event(hwc, idx);
-}
-
-static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+/*
+ * activate a single event
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ *
+ * Called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+static int x86_pmu_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc;
+	int assign[X86_PMC_IDX_MAX];
+	int n, n0, ret;
 
-	if (cpuc->enabled)
-		x86_pmu_enable_event(hwc, idx);
-}
-
-static int fixed_mode_idx(struct hw_perf_event *hwc)
-{
-	unsigned int hw_event;
-
-	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (hwc->sample_period == 1)))
-		return X86_PMC_IDX_FIXED_BTS;
+	hwc = &event->hw;
 
-	if (!x86_pmu.num_events_fixed)
-		return -1;
+	n0 = cpuc->n_events;
+	n = collect_events(cpuc, event, false);
+	if (n < 0)
+		return n;
 
+	ret = x86_schedule_events(cpuc, n, assign);
+	if (ret)
+		return ret;
 	/*
-	 * fixed counters do not take all possible filters
+	 * copy new assignment, now we know it is possible
+	 * will be used by hw_perf_enable()
 	 */
-	if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
-		return -1;
-
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
-		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
-		return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
-	return -1;
-}
-
-/*
- * generic counter allocator: get next free counter
- */
-static int
-gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	int idx;
-
-	idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
-	return idx == x86_pmu.num_events ? -1 : idx;
-}
+	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-/*
- * intel-specific counter allocator: check event constraints
- */
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	const struct event_constraint *event_constraint;
-	int i, code;
+	cpuc->n_events = n;
+	cpuc->n_added += n - n0;
 
-	if (!event_constraints)
-		goto skip;
-
-	code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
-
-	for_each_event_constraint(event_constraint, event_constraints) {
-		if (code == event_constraint->code) {
-			for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
-				if (!test_and_set_bit(i, cpuc->used_mask))
-					return i;
-			}
-			return -1;
-		}
-	}
-skip:
-	return gen_get_event_idx(cpuc, hwc);
-}
-
-static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	int idx;
-
-	idx = fixed_mode_idx(hwc);
-	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/* BTS is already occupied. */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			return -EAGAIN;
-
-		hwc->config_base	= 0;
-		hwc->event_base		= 0;
-		hwc->idx		= idx;
-	} else if (idx >= 0) {
-		/*
-		 * Try to get the fixed event, if that is already taken
-		 * then try to get a generic event:
-		 */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
-
-		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		/*
-		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
-		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-		 */
-		hwc->event_base =
-			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
-		hwc->idx = idx;
-	} else {
-		idx = hwc->idx;
-		/* Try to get the previous generic event again */
-		if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
-			idx = x86_pmu.get_event_idx(cpuc, hwc);
-			if (idx == -1)
-				return -EAGAIN;
-
-			set_bit(idx, cpuc->used_mask);
-			hwc->idx = idx;
-		}
-		hwc->config_base = x86_pmu.eventsel;
-		hwc->event_base  = x86_pmu.perfctr;
-	}
-
-	return idx;
+	return 0;
 }
 
-/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
- */
-static int x86_pmu_enable(struct perf_event *event)
+static int x86_pmu_start(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-
-	idx = x86_schedule_event(cpuc, hwc);
-	if (idx < 0)
-		return idx;
-
-	perf_events_lapic_init();
+	int idx = event->hw.idx;
 
-	x86_pmu.disable(hwc, idx);
+	if (idx == -1)
+		return -EAGAIN;
 
+	x86_perf_event_set_period(event);
 	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
-
-	x86_perf_event_set_period(event, hwc, idx);
-	x86_pmu.enable(hwc, idx);
-
+	__set_bit(idx, cpuc->active_mask);
+	x86_pmu.enable(event);
 	perf_event_update_userpage(event);
 
 	return 0;
@@ -1539,14 +984,8 @@ static int x86_pmu_enable(struct perf_event *event)
 
 static void x86_pmu_unthrottle(struct perf_event *event)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
-				cpuc->events[hwc->idx] != event))
-		return;
-
-	x86_pmu.enable(hwc, hwc->idx);
+	int ret = x86_pmu_start(event);
+	WARN_ON_ONCE(ret);
 }
 
 void perf_event_print_debug(void)
@@ -1576,7 +1015,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+	pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1600,257 +1039,50 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
-{
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!event)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-
-	data.period	= event->hw.last_period;
-	data.addr	= 0;
-	data.raw	= NULL;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, event, &regs);
-
-	if (perf_output_begin(&handle, event,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, event);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	event->hw.interrupts++;
-	event->pending_kill = POLL_IN;
-}
-
-static void x86_pmu_disable(struct perf_event *event)
+static void x86_pmu_stop(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	/*
-	 * Must be done before we disable, otherwise the nmi handler
-	 * could reenable again:
-	 */
-	clear_bit(idx, cpuc->active_mask);
-	x86_pmu.disable(hwc, idx);
+	if (!__test_and_clear_bit(idx, cpuc->active_mask))
+		return;
 
-	/*
-	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the event:
-	 */
-	barrier();
+	x86_pmu.disable(event);
 
 	/*
 	 * Drain the remaining delta count out of a event
 	 * that we are disabling:
 	 */
-	x86_perf_event_update(event, hwc, idx);
-
-	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
-		intel_pmu_drain_bts_buffer(cpuc);
+	x86_perf_event_update(event);
 
 	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_event_update_userpage(event);
-}
-
-/*
- * Save and restart an expired event. Called by NMI contexts,
- * so it has to be careful about preempting normal event ops:
- */
-static int intel_pmu_save_and_restart(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-	int ret;
-
-	x86_perf_event_update(event, hwc, idx);
-	ret = x86_perf_event_set_period(event, hwc, idx);
-
-	if (event->state == PERF_EVENT_STATE_ACTIVE)
-		intel_pmu_enable_event(hwc, idx);
-
-	return ret;
-}
-
-static void intel_pmu_reset(void)
-{
-	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
-	unsigned long flags;
-	int idx;
-
-	if (!x86_pmu.num_events)
-		return;
-
-	local_irq_save(flags);
-
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
-		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
-		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
-	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
-	if (ds)
-		ds->bts_index = ds->bts_buffer_base;
-
-	local_irq_restore(flags);
-}
-
-static int p6_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct perf_event *event;
-	struct hw_perf_event *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-	data.raw = NULL;
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		event = cpuc->events[idx];
-		hwc = &event->hw;
-
-		val = x86_perf_event_update(event, hwc, idx);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
-			continue;
-
-		/*
-		 * event overflow
-		 */
-		handled		= 1;
-		data.period	= event->hw.last_period;
-
-		if (!x86_perf_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 1, &data, regs))
-			p6_pmu_disable_event(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
 }
 
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
+static void x86_pmu_disable(struct perf_event *event)
 {
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	int bit, loops;
-	u64 ack, status;
-
-	data.addr = 0;
-	data.raw = NULL;
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int i;
 
-	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc);
-	status = intel_pmu_get_status();
-	if (!status) {
-		perf_enable();
-		return 0;
-	}
+	x86_pmu_stop(event);
 
-	loops = 0;
-again:
-	if (++loops > 100) {
-		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
-		perf_event_print_debug();
-		intel_pmu_reset();
-		perf_enable();
-		return 1;
-	}
-
-	inc_irq_stat(apic_perf_irqs);
-	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_event *event = cpuc->events[bit];
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (event == cpuc->event_list[i]) {
 
-		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active_mask))
-			continue;
+			if (x86_pmu.put_event_constraints)
+				x86_pmu.put_event_constraints(cpuc, event);
 
-		if (!intel_pmu_save_and_restart(event))
-			continue;
+			while (++i < cpuc->n_events)
+				cpuc->event_list[i-1] = cpuc->event_list[i];
 
-		data.period = event->hw.last_period;
-
-		if (perf_event_overflow(event, 1, &data, regs))
-			intel_pmu_disable_event(&event->hw, bit);
+			--cpuc->n_events;
+			break;
+		}
 	}
-
-	intel_pmu_ack_status(ack);
-
-	/*
-	 * Repeat if there is more work to be done:
-	 */
-	status = intel_pmu_get_status();
-	if (status)
-		goto again;
-
-	perf_enable();
-
-	return 1;
+	perf_event_update_userpage(event);
 }
 
-static int amd_pmu_handle_irq(struct pt_regs *regs)
+static int x86_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
@@ -1859,8 +1091,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -1871,7 +1102,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		event = cpuc->events[idx];
 		hwc = &event->hw;
 
-		val = x86_perf_event_update(event, hwc, idx);
+		val = x86_perf_event_update(event);
 		if (val & (1ULL << (x86_pmu.event_bits - 1)))
 			continue;
 
@@ -1881,11 +1112,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		handled		= 1;
 		data.period	= event->hw.last_period;
 
-		if (!x86_perf_event_set_period(event, hwc, idx))
+		if (!x86_perf_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, 1, &data, regs))
-			amd_pmu_disable_event(hwc, idx);
+			x86_pmu_stop(event);
 	}
 
 	if (handled)
@@ -1968,193 +1199,171 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
 	.priority		= 1
 };
 
-static __initconst struct x86_pmu p6_pmu = {
-	.name			= "p6",
-	.handle_irq		= p6_pmu_handle_irq,
-	.disable_all		= p6_pmu_disable_all,
-	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_event,
-	.disable		= p6_pmu_disable_event,
-	.eventsel		= MSR_P6_EVNTSEL0,
-	.perfctr		= MSR_P6_PERFCTR0,
-	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
-	.apic			= 1,
-	.max_period		= (1ULL << 31) - 1,
-	.version		= 0,
-	.num_events		= 2,
-	/*
-	 * Events have 40 bits implemented. However they are designed such
-	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a event for P6-like PMU is 32 bits only.
-	 *
-	 * See IA-32 Intel Architecture Software developer manual Vol 3B
-	 */
-	.event_bits		= 32,
-	.event_mask		= (1ULL << 32) - 1,
-	.get_event_idx		= intel_get_event_idx,
-};
+static struct event_constraint unconstrained;
+static struct event_constraint emptyconstraint;
 
-static __initconst struct x86_pmu intel_pmu = {
-	.name			= "Intel",
-	.handle_irq		= intel_pmu_handle_irq,
-	.disable_all		= intel_pmu_disable_all,
-	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_event,
-	.disable		= intel_pmu_disable_event,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
-	.get_event_idx		= intel_get_event_idx,
-};
+static struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c;
 
-static __initconst struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= amd_pmu_handle_irq,
-	.disable_all		= amd_pmu_disable_all,
-	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_event,
-	.disable		= amd_pmu_disable_event,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-	.get_event_idx		= gen_get_event_idx,
-};
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &unconstrained;
+}
 
-static __init int p6_pmu_init(void)
+static int x86_event_sched_in(struct perf_event *event,
+			  struct perf_cpu_context *cpuctx)
 {
-	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-		event_constraints = intel_p6_event_constraints;
-		break;
-	case 9:
-	case 13:
-		/* Pentium M */
-		event_constraints = intel_p6_event_constraints;
-		break;
-	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
-		return -ENODEV;
-	}
+	int ret = 0;
 
-	x86_pmu = p6_pmu;
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = smp_processor_id();
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 
-	return 0;
+	if (!is_x86_event(event))
+		ret = event->pmu->enable(event);
+
+	if (!ret && !is_software_event(event))
+		cpuctx->active_oncpu++;
+
+	if (!ret && event->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return ret;
 }
 
-static __init int intel_pmu_init(void)
+static void x86_event_sched_out(struct perf_event *event,
+			    struct perf_cpu_context *cpuctx)
 {
-	union cpuid10_edx edx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int ebx;
-	int version;
-
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-		/* check for P6 processor family */
-	   if (boot_cpu_data.x86 == 6) {
-		return p6_pmu_init();
-	   } else {
-		return -ENODEV;
-	   }
-	}
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	event->oncpu = -1;
 
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired hw_event or not.
-	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return -ENODEV;
+	if (!is_x86_event(event))
+		event->pmu->disable(event);
 
-	version = eax.split.version_id;
-	if (version < 2)
-		return -ENODEV;
+	event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
 
-	x86_pmu				= intel_pmu;
-	x86_pmu.version			= version;
-	x86_pmu.num_events		= eax.split.num_events;
-	x86_pmu.event_bits		= eax.split.bit_width;
-	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+	if (!is_software_event(event))
+		cpuctx->active_oncpu--;
 
+	if (event->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ *
+ * called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+int hw_perf_group_sched_in(struct perf_event *leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *sub;
+	int assign[X86_PMC_IDX_MAX];
+	int n0, n1, ret;
+
+	/* n0 = total number of events */
+	n0 = collect_events(cpuc, leader, true);
+	if (n0 < 0)
+		return n0;
+
+	ret = x86_schedule_events(cpuc, n0, assign);
+	if (ret)
+		return ret;
+
+	ret = x86_event_sched_in(leader, cpuctx);
+	if (ret)
+		return ret;
+
+	n1 = 1;
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		if (sub->state > PERF_EVENT_STATE_OFF) {
+			ret = x86_event_sched_in(sub, cpuctx);
+			if (ret)
+				goto undo;
+			++n1;
+		}
+	}
 	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose events, so
-	 * assume at least 3 events:
+	 * copy new assignment, now we know it is possible
+	 * will be used by hw_perf_enable()
 	 */
-	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
+	memcpy(cpuc->assign, assign, n0*sizeof(int));
+
+	cpuc->n_events  = n0;
+	cpuc->n_added  += n1;
+	ctx->nr_active += n1;
 
 	/*
-	 * Install the hw-cache-events table:
+	 * 1 means successful and events are active
+	 * This is not quite true because we defer
+	 * actual activation until hw_perf_enable() but
+	 * this way we* ensure caller won't try to enable
+	 * individual events
 	 */
-	switch (boot_cpu_data.x86_model) {
-	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-	case 29: /* six-core 45 nm xeon "Dunnington" */
-		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Core2 events, ");
-		event_constraints = intel_core_event_constraints;
-		break;
-	default:
-	case 26:
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
+	return 1;
+undo:
+	x86_event_sched_out(leader, cpuctx);
+	n0  = 1;
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+			x86_event_sched_out(sub, cpuctx);
+			if (++n0 == n1)
+				break;
+		}
+	}
+	return ret;
+}
+
+#include "perf_event_amd.c"
+#include "perf_event_p6.c"
+#include "perf_event_intel.c"
 
-		event_constraints = intel_nehalem_event_constraints;
-		pr_cont("Nehalem/Corei7 events, ");
+static int __cpuinit
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+	int ret = NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		if (x86_pmu.cpu_prepare)
+			ret = x86_pmu.cpu_prepare(cpu);
 		break;
-	case 28:
-		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
 
-		pr_cont("Atom events, ");
+	case CPU_STARTING:
+		if (x86_pmu.cpu_starting)
+			x86_pmu.cpu_starting(cpu);
 		break;
-	}
-	return 0;
-}
 
-static __init int amd_pmu_init(void)
-{
-	/* Performance-monitoring supported from K7 and later: */
-	if (boot_cpu_data.x86 < 6)
-		return -ENODEV;
+	case CPU_DYING:
+		if (x86_pmu.cpu_dying)
+			x86_pmu.cpu_dying(cpu);
+		break;
 
-	x86_pmu = amd_pmu;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		if (x86_pmu.cpu_dead)
+			x86_pmu.cpu_dead(cpu);
+		break;
 
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
+	default:
+		break;
+	}
 
-	return 0;
+	return ret;
 }
 
 static void __init pmu_check_apic(void)
@@ -2169,6 +1378,7 @@ static void __init pmu_check_apic(void)
 
 void __init init_hw_perf_events(void)
 {
+	struct event_constraint *c;
 	int err;
 
 	pr_info("Performance Events: ");
@@ -2213,6 +1423,20 @@ void __init init_hw_perf_events(void)
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
+	unconstrained = (struct event_constraint)
+		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
+				   0, x86_pmu.num_events);
+
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if (c->cmask != INTEL_ARCH_FIXED_MASK)
+				continue;
+
+			c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
+			c->weight += x86_pmu.num_events;
+		}
+	}
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
@@ -2220,60 +1444,91 @@ void __init init_hw_perf_events(void)
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
 	pr_info("... event mask:             %016Lx\n", perf_event_mask);
+
+	perf_cpu_notifier(x86_pmu_notifier);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
-	x86_perf_event_update(event, &event->hw, event->hw.idx);
+	x86_perf_event_update(event);
 }
 
 static const struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
+	.start		= x86_pmu_start,
+	.stop		= x86_pmu_stop,
 	.read		= x86_pmu_read,
 	.unthrottle	= x86_pmu_unthrottle,
 };
 
-static int
-validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-	struct hw_perf_event fake_event = event->hw;
-
-	if (event->pmu && event->pmu != &pmu)
-		return 0;
-
-	return x86_schedule_event(cpuc, &fake_event) >= 0;
-}
-
+/*
+ * validate a single event group
+ *
+ * validation include:
+ *	- check events are compatible which each other
+ *	- events do not compete for the same counter
+ *	- number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
 static int validate_group(struct perf_event *event)
 {
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct perf_event *leader = event->group_leader;
+	struct cpu_hw_events *fake_cpuc;
+	int ret, n;
 
-	memset(&fake_pmu, 0, sizeof(fake_pmu));
+	ret = -ENOMEM;
+	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+	if (!fake_cpuc)
+		goto out;
 
-	if (!validate_event(&fake_pmu, leader))
-		return -ENOSPC;
+	/*
+	 * the event is not yet connected with its
+	 * siblings therefore we must first collect
+	 * existing siblings, then add the new event
+	 * before we can simulate the scheduling
+	 */
+	ret = -ENOSPC;
+	n = collect_events(fake_cpuc, leader, true);
+	if (n < 0)
+		goto out_free;
 
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
-			return -ENOSPC;
-	}
+	fake_cpuc->n_events = n;
+	n = collect_events(fake_cpuc, event, false);
+	if (n < 0)
+		goto out_free;
 
-	if (!validate_event(&fake_pmu, event))
-		return -ENOSPC;
+	fake_cpuc->n_events = n;
 
-	return 0;
+	ret = x86_schedule_events(fake_cpuc, n, NULL);
+
+out_free:
+	kfree(fake_cpuc);
+out:
+	return ret;
 }
 
 const struct pmu *hw_perf_event_init(struct perf_event *event)
 {
+	const struct pmu *tmp;
 	int err;
 
 	err = __hw_perf_event_init(event);
 	if (!err) {
+		/*
+		 * we temporarily connect event to its pmu
+		 * such that validate_group() can classify
+		 * it as an x86 event using is_x86_event()
+		 */
+		tmp = event->pmu;
+		event->pmu = &pmu;
+
 		if (event->group_leader != event)
 			err = validate_group(event);
+
+		event->pmu = tmp;
 	}
 	if (err) {
 		if (event->destroy)
@@ -2297,7 +1552,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_ignored_frame);
 
 
 static void
@@ -2313,10 +1567,6 @@ static void backtrace_warning(void *data, char *msg)
 
 static int backtrace_stack(void *data, char *name)
 {
-	per_cpu(in_ignored_frame, smp_processor_id()) =
-			x86_is_stack_id(NMI_STACK, name) ||
-			x86_is_stack_id(DEBUG_STACK, name);
-
 	return 0;
 }
 
@@ -2324,9 +1574,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	struct perf_callchain_entry *entry = data;
 
-	if (per_cpu(in_ignored_frame, smp_processor_id()))
-		return;
-
 	if (reliable)
 		callchain_store(entry, addr);
 }
@@ -2347,7 +1594,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 	callchain_store(entry, PERF_CONTEXT_KERNEL);
 	callchain_store(entry, regs->ip);
 
-	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
 
 /*
@@ -2385,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	return len;
 }
 
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+#ifdef CONFIG_COMPAT
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-	unsigned long bytes;
+	/* 32-bit process in 64-bit kernel. */
+	struct stack_frame_ia32 frame;
+	const void __user *fp;
+
+	if (!test_thread_flag(TIF_IA32))
+		return 0;
+
+	fp = compat_ptr(regs->bp);
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		unsigned long bytes;
+		frame.next_frame     = 0;
+		frame.return_address = 0;
 
-	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+		if (bytes != sizeof(frame))
+			break;
+
+		if (fp < compat_ptr(regs->sp))
+			break;
 
-	return bytes == sizeof(*frame);
+		callchain_store(entry, frame.return_address);
+		fp = compat_ptr(frame.next_frame);
+	}
+	return 1;
+}
+#else
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+    return 0;
 }
+#endif
 
 static void
 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -2408,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 	callchain_store(entry, PERF_CONTEXT_USER);
 	callchain_store(entry, regs->ip);
 
+	if (perf_callchain_user32(regs, entry))
+		return;
+
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		unsigned long bytes;
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 
-		if (!copy_stack_frame(fp, &frame))
+		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+		if (bytes != sizeof(frame))
 			break;
 
 		if ((unsigned long)fp < regs->sp)
@@ -2433,9 +1713,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
 	is_user = user_mode(regs);
 
-	if (!current || current->pid == 0)
-		return;
-
 	if (is_user && current->state != TASK_RUNNING)
 		return;
 
@@ -2462,7 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return entry;
 }
 
-void hw_perf_event_setup_online(int cpu)
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
 {
-	init_debug_store_on_cpu(cpu);
+	regs->ip = ip;
+	/*
+	 * perf_arch_fetch_caller_regs adds another call, we need to increment
+	 * the skip level
+	 */
+	regs->bp = rewind_frame_pointer(skip + 1);
+	regs->cs = __KERNEL_CS;
+	local_save_flags(regs->flags);
 }
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 00000000000..db6f7d4056e
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,422 @@
+#ifdef CONFIG_CPU_SUP_AMD
+
+static DEFINE_RAW_SPINLOCK(amd_nb_lock);
+
+static __initconst u64 amd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+	return amd_perfmon_event_map[hw_event];
+}
+
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_REG_MASK)
+
+	return hw_event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+	return (hwc->config & 0xe0) == 0xe0;
+}
+
+static inline int amd_has_nb(struct cpu_hw_events *cpuc)
+{
+	struct amd_nb *nb = cpuc->amd_nb;
+
+	return nb && nb->nb_id != -1;
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	int i;
+
+	/*
+	 * only care about NB events
+	 */
+	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
+		return;
+
+	/*
+	 * need to scan whole list because event may not have
+	 * been assigned during scheduling
+	 *
+	 * no race condition possible because event can only
+	 * be removed on one CPU at a time AND PMU is disabled
+	 * when we come here
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (nb->owners[i] == event) {
+			cmpxchg(nb->owners+i, event, NULL);
+			break;
+		}
+	}
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_events;
+	int i, j, k = -1;
+
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
+		return &unconstrained;
+
+	/*
+	 * detect if already present, if so reuse
+	 *
+	 * cannot merge with actual allocation
+	 * because of possible holes
+	 *
+	 * event can already be present yet not assigned (in hwc->idx)
+	 * because of successive calls to x86_schedule_events() from
+	 * hw_perf_group_sched_in() without hw_perf_enable()
+	 */
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
+
+		/* already present, reuse */
+		if (nb->owners[i] == event)
+			goto done;
+	}
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
+}
+
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+	struct amd_nb *nb;
+	int i;
+
+	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+	if (!nb)
+		return NULL;
+
+	memset(nb, 0, sizeof(*nb));
+	nb->nb_id = nb_id;
+
+	/*
+	 * initialize all possible NB constraints
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		__set_bit(i, nb->event_constraints[i].idxmsk);
+		nb->event_constraints[i].weight = 1;
+	}
+	return nb;
+}
+
+static int amd_pmu_cpu_prepare(int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	WARN_ON_ONCE(cpuc->amd_nb);
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return NOTIFY_OK;
+
+	cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+	if (!cpuc->amd_nb)
+		return NOTIFY_BAD;
+
+	return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct amd_nb *nb;
+	int i, nb_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	nb_id = amd_get_nb_id(cpu);
+	WARN_ON_ONCE(nb_id == BAD_APICID);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	for_each_online_cpu(i) {
+		nb = per_cpu(cpu_hw_events, i).amd_nb;
+		if (WARN_ON_ONCE(!nb))
+			continue;
+
+		if (nb->nb_id == nb_id) {
+			kfree(cpuc->amd_nb);
+			cpuc->amd_nb = nb;
+			break;
+		}
+	}
+
+	cpuc->amd_nb->nb_id = nb_id;
+	cpuc->amd_nb->refcnt++;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static void amd_pmu_cpu_dead(int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	if (cpuhw->amd_nb) {
+		struct amd_nb *nb = cpuhw->amd_nb;
+
+		if (nb->nb_id == -1 || --nb->refcnt == 0)
+			kfree(nb);
+
+		cpuhw->amd_nb = NULL;
+	}
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static __initconst struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints,
+
+	.cpu_prepare		= amd_pmu_cpu_prepare,
+	.cpu_starting		= amd_pmu_cpu_starting,
+	.cpu_dead		= amd_pmu_cpu_dead,
+};
+
+static __init int amd_pmu_init(void)
+{
+	/* Performance-monitoring supported from K7 and later: */
+	if (boot_cpu_data.x86 < 6)
+		return -ENODEV;
+
+	x86_pmu = amd_pmu;
+
+	/* Events are common for all AMDs */
+	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+	       sizeof(hw_cache_event_ids));
+
+	return 0;
+}
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static int amd_pmu_init(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 00000000000..9c794ac8783
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,980 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+/*
+ * Intel PerfMon, used on Core and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static struct event_constraint intel_core_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/*
+	 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
+	 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
+	 * ratio between these counters.
+	 */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
+	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+	EVENT_CONSTRAINT_END
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+	return intel_perfmon_event_map[hw_event];
+}
+
+static __initconst u64 westmere_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 hw_event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(INTEL_ARCH_EVTSEL_MASK |	\
+	 INTEL_ARCH_UNIT_MASK   |	\
+	 INTEL_ARCH_EDGE_MASK   |	\
+	 INTEL_ARCH_INV_MASK    |	\
+	 INTEL_ARCH_CNT_MASK)
+
+	return hw_event & CORE_EVNTSEL_MASK;
+}
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
+}
+
+static void intel_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!event))
+			return;
+
+		intel_pmu_enable_bts(event->hw.config);
+	}
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+	int idx = hwc->idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_drain_bts_buffer(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+	perf_sample_data_init(&data, 0);
+
+	data.period	= event->hw.last_period;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+static inline void
+intel_pmu_disable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		intel_pmu_drain_bts_buffer();
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc);
+		return;
+	}
+
+	x86_pmu_disable_event(event);
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+	int idx = hwc->idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+
+	/*
+	 * ANY bit is supported in v3 and up
+	 */
+	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+		bits |= 0x4;
+
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_events).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc);
+		return;
+	}
+
+	__x86_pmu_enable_event(hwc);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_event *event)
+{
+	x86_perf_event_update(event);
+	return x86_perf_event_set_period(event);
+}
+
+static void intel_pmu_reset(void)
+{
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int bit, loops;
+	u64 ack, status;
+
+	perf_sample_data_init(&data, 0);
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	intel_pmu_disable_all();
+	intel_pmu_drain_bts_buffer();
+	status = intel_pmu_get_status();
+	if (!status) {
+		intel_pmu_enable_all();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
+		intel_pmu_reset();
+		goto done;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		data.period = event->hw.last_period;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			x86_pmu_stop(event);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+done:
+	intel_pmu_enable_all();
+	return 1;
+}
+
+static struct event_constraint bts_constraint =
+	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+
+static struct event_constraint *
+intel_special_constraints(struct perf_event *event)
+{
+	unsigned int hw_event;
+
+	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (event->hw.sample_period == 1))) {
+
+		return &bts_constraint;
+	}
+	return NULL;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	c = intel_special_constraints(event);
+	if (c)
+		return c;
+
+	return x86_get_event_constraints(cpuc, event);
+}
+
+static __initconst struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+};
+
+static __initconst struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
+	.get_event_constraints	= intel_get_event_constraints,
+
+	.cpu_starting		= init_debug_store_on_cpu,
+	.cpu_dying		= fini_debug_store_on_cpu,
+};
+
+static __init int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+		/* check for P6 processor family */
+	   if (boot_cpu_data.x86 == 6) {
+		return p6_pmu_init();
+	   } else {
+		return -ENODEV;
+	   }
+	}
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		x86_pmu = core_pmu;
+	else
+		x86_pmu = intel_pmu;
+
+	x86_pmu.version			= version;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
+	 */
+	if (version > 1)
+		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 14: /* 65 nm core solo/duo, "Yonah" */
+		pr_cont("Core events, ");
+		break;
+
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_core2_event_constraints;
+		pr_cont("Core2 events, ");
+		break;
+
+	case 26: /* 45 nm nehalem, "Bloomfield" */
+	case 30: /* 45 nm nehalem, "Lynnfield" */
+	case 46: /* 45 nm nehalem-ex, "Beckton" */
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_nehalem_event_constraints;
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28: /* Atom */
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("Atom events, ");
+		break;
+
+	case 37: /* 32 nm nehalem, "Clarkdale" */
+	case 44: /* 32 nm nehalem, "Gulftown" */
+		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		pr_cont("Westmere events, ");
+		break;
+
+	default:
+		/*
+		 * default constraints for v2 and up
+		 */
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("generic architected perfmon, ");
+	}
+	return 0;
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static int intel_pmu_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 00000000000..a330485d14d
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,159 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+	return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT			0x0000002EULL
+
+static u64 p6_pmu_raw_event(u64 hw_event)
+{
+#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define P6_EVNTSEL_INV_MASK		0x00800000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define P6_EVNTSEL_MASK			\
+	(P6_EVNTSEL_EVENT_MASK |	\
+	 P6_EVNTSEL_UNIT_MASK  |	\
+	 P6_EVNTSEL_EDGE_MASK  |	\
+	 P6_EVNTSEL_INV_MASK   |	\
+	 P6_EVNTSEL_REG_MASK)
+
+	return hw_event & P6_EVNTSEL_MASK;
+}
+
+static struct event_constraint p6_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT_END
+};
+
+static void p6_pmu_disable_all(void)
+{
+	u64 val;
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void p6_pmu_enable_all(void)
+{
+	unsigned long val;
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static inline void
+p6_pmu_disable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val = P6_NOP_EVENT;
+
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+}
+
+static void p6_pmu_enable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+}
+
+static __initconst struct x86_pmu p6_pmu = {
+	.name			= "p6",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= p6_pmu_disable_all,
+	.enable_all		= p6_pmu_enable_all,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
+	.eventsel		= MSR_P6_EVNTSEL0,
+	.perfctr		= MSR_P6_PERFCTR0,
+	.event_map		= p6_pmu_event_map,
+	.raw_event		= p6_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_events		= 2,
+	/*
+	 * Events have 40 bits implemented. However they are designed such
+	 * that bits [32-39] are sign extensions of bit 31. As such the
+	 * effective width of a event for P6-like PMU is 32 bits only.
+	 *
+	 * See IA-32 Intel Architecture Software developer manual Vol 3B
+	 */
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
+	.get_event_constraints	= x86_get_event_constraints,
+	.event_constraints	= p6_event_constraints,
+};
+
+static __init int p6_pmu_init(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case 1:
+	case 3:  /* Pentium Pro */
+	case 5:
+	case 6:  /* Pentium II */
+	case 7:
+	case 8:
+	case 11: /* Pentium III */
+	case 9:
+	case 13:
+		/* Pentium M */
+		break;
+	default:
+		pr_cont("unsupported p6 CPU model %d ",
+			boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+
+	x86_pmu = p6_pmu;
+
+	return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719af..fb329e9f849 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 
 	return !test_bit(counter, perfctr_nmi_owner);
 }
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
-	unsigned int counter;
-
-	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-	return !test_bit(counter, perfctr_nmi_owner);
-}
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 
 int reserve_perfctr_nmi(unsigned int msr)
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
 	cpu_nmi_set_wd_enabled();
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59c..dfdb4dba232 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/dmi.h>
+#include <linux/module.h>
 #include <asm/div64.h>
 #include <asm/vmware.h>
 #include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
 
 	return 0;
 }
+EXPORT_SYMBOL(vmware_platform);
 
 /*
  * VMware hypervisor takes care of exporting a reliable TSC to the guest.