diff options
Diffstat (limited to 'arch/x86/kernel')
29 files changed, 1914 insertions, 360 deletions
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 95bf99de905..1b8e5a03d94 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags; static char temp_stack[4096]; #endif -asmlinkage void acpi_enter_s3(void) -{ - acpi_enter_sleep_state(3, wake_sleep_flags); -} /** * acpi_suspend_lowlevel - save kernel state * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 5653a5791ec..67f59f8c695 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -2,7 +2,6 @@ * Variables and functions used by the code in sleep.c */ -#include <linux/linkage.h> #include <asm/realmode.h> extern unsigned long saved_video_mode; @@ -11,7 +10,6 @@ extern long saved_magic; extern int wakeup_pmode_return; extern u8 wake_sleep_flags; -extern asmlinkage void acpi_enter_s3(void); extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 72610839f03..13ab720573e 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -74,7 +74,9 @@ restore_registers: ENTRY(do_suspend_lowlevel) call save_processor_state call save_registers - call acpi_enter_s3 + pushl $3 + call acpi_enter_sleep_state + addl $4, %esp # In case of S3 failure, we'll emerge here. Jump # to ret_point to recover diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 014d1d28c39..8ea5164cbd0 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel) movq %rsi, saved_rsi addq $8, %rsp - call acpi_enter_s3 + movl $3, %edi + xorl %eax, %eax + call acpi_enter_sleep_state /* in case something went wrong, restore the machine status and go on */ jmp resume_point diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 931280ff829..ced4534baed 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -165,7 +165,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = #endif #ifdef P6_NOP1 -static const unsigned char __initconst_or_module p6nops[] = +static const unsigned char p6nops[] = { P6_NOP1, P6_NOP2, @@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void) ideal_nops = intel_nops; #endif } - + break; default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 98e24131ff3..24deb308232 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; /* * Map cpu index to physical APIC ID */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); @@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); * used for the mapping. This is where the behaviors of x86_64 and 32 * actually diverge. Let's keep it ugly for now. */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); /* * Knob to control our willingness to enable the local APIC. diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 406eee78468..c265593ec2c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu(cpu, cfg->domain) + for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; @@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; - for_each_cpu(cpu, cfg->old_domain) { + for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1356,6 +1356,16 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, if (!IO_APIC_IRQ(irq)) return; + /* + * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC + * can handle this irq and the apic driver is finialized at this point, + * update the cfg->domain. + */ + if (irq < legacy_pic->nr_legacy_irqs && + cpumask_equal(cfg->domain, cpumask_of(0))) + apic->vector_allocation_domain(0, cfg->domain, + apic->target_cpus()); + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index bac4c3804cc..d30a6a9a012 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o sched.o mshyperv.o +obj-y += vmware.o hypervisor.o mshyperv.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5bbc082c47a..a5fbc3c5fcc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -144,6 +144,8 @@ static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); return 1; } __setup("noxsave", x86_xsave_setup); @@ -452,6 +454,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; } +u16 __read_mostly tlb_lli_4k[NR_INFO]; +u16 __read_mostly tlb_lli_2m[NR_INFO]; +u16 __read_mostly tlb_lli_4m[NR_INFO]; +u16 __read_mostly tlb_lld_4k[NR_INFO]; +u16 __read_mostly tlb_lld_2m[NR_INFO]; +u16 __read_mostly tlb_lld_4m[NR_INFO]; + +/* + * tlb_flushall_shift shows the balance point in replacing cr3 write + * with multiple 'invlpg'. It will do this replacement when + * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. + * If tlb_flushall_shift is -1, means the replacement will be disabled. + */ +s8 __read_mostly tlb_flushall_shift = -1; + +void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) +{ + if (this_cpu->c_detect_tlb) + this_cpu->c_detect_tlb(c); + + printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + "tlb_flushall_shift is 0x%x\n", + tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], + tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], + tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], + tlb_flushall_shift); +} + void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -911,6 +942,8 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif + if (boot_cpu_data.cpuid_level >= 2) + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 8bacc7826fb..4041c24ae7d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -20,10 +20,19 @@ struct cpu_dev { void (*c_bsp_init)(struct cpuinfo_x86 *); void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); + void (*c_detect_tlb)(struct cpuinfo_x86 *); unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); int c_x86_vendor; }; +struct _tlb_table { + unsigned char descriptor; + char tlb_type; + unsigned int entries; + /* unsigned int ways; */ + char info[128]; +}; + #define cpu_dev_register(cpu_devX) \ static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ __attribute__((__section__(".x86_cpu_dev.init"))) = \ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3e6ff6cbf42..0a4ce2980a5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i } #endif +#define TLB_INST_4K 0x01 +#define TLB_INST_4M 0x02 +#define TLB_INST_2M_4M 0x03 + +#define TLB_INST_ALL 0x05 +#define TLB_INST_1G 0x06 + +#define TLB_DATA_4K 0x11 +#define TLB_DATA_4M 0x12 +#define TLB_DATA_2M_4M 0x13 +#define TLB_DATA_4K_4M 0x14 + +#define TLB_DATA_1G 0x16 + +#define TLB_DATA0_4K 0x21 +#define TLB_DATA0_4M 0x22 +#define TLB_DATA0_2M_4M 0x23 + +#define STLB_4K 0x41 + +static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { + { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, + { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, + { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, + { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, + { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, + { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, + { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, + { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, + { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, + { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, + { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, + { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, + { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, + { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, + { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, + { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, + { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, + { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, + { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, + { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, + { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, + { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, + { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, + { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, + { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, + { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, + { 0x00, 0, 0 } +}; + +static void __cpuinit intel_tlb_lookup(const unsigned char desc) +{ + unsigned char k; + if (desc == 0) + return; + + /* look up this descriptor in the table */ + for (k = 0; intel_tlb_table[k].descriptor != desc && \ + intel_tlb_table[k].descriptor != 0; k++) + ; + + if (intel_tlb_table[k].tlb_type == 0) + return; + + switch (intel_tlb_table[k].tlb_type) { + case STLB_4K: + if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_INST_ALL: + if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_INST_4K: + if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_INST_4M: + if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_INST_2M_4M: + if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_DATA_4K: + case TLB_DATA0_4K: + if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_DATA_4M: + case TLB_DATA0_4M: + if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_DATA_2M_4M: + case TLB_DATA0_2M_4M: + if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + case TLB_DATA_4K_4M: + if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; + if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) + tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; + break; + } +} + +static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) { + tlb_flushall_shift = -1; + return; + } + switch ((c->x86 << 8) + c->x86_model) { + case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 0x61d: /* six-core 45 nm xeon "Dunnington" */ + tlb_flushall_shift = -1; + break; + case 0x61a: /* 45 nm nehalem, "Bloomfield" */ + case 0x61e: /* 45 nm nehalem, "Lynnfield" */ + case 0x625: /* 32 nm nehalem, "Clarkdale" */ + case 0x62c: /* 32 nm nehalem, "Gulftown" */ + case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ + case 0x62f: /* 32 nm Xeon E7 */ + tlb_flushall_shift = 6; + break; + case 0x62a: /* SandyBridge */ + case 0x62d: /* SandyBridge, "Romely-EP" */ + tlb_flushall_shift = 5; + break; + case 0x63a: /* Ivybridge */ + tlb_flushall_shift = 1; + break; + default: + tlb_flushall_shift = 6; + } +} + +static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) +{ + int i, j, n; + unsigned int regs[4]; + unsigned char *desc = (unsigned char *)regs; + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for (i = 0 ; i < n ; i++) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for (j = 0 ; j < 3 ; j++) + if (regs[j] & (1 << 31)) + regs[j] = 0; + + /* Byte 0 is level count, not a descriptor */ + for (j = 1 ; j < 16 ; j++) + intel_tlb_lookup(desc[j]); + } + intel_tlb_flushall_shift_set(c); +} + static const struct cpu_dev __cpuinitconst intel_cpu_dev = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, @@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = { }, .c_size_cache = intel_size_cache, #endif + .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 413c2ced887..13017626f9a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -55,13 +55,6 @@ static struct severity { #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) -#define MCACOD 0xffff -/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ -#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 -#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ -#define MCACOD_DATA 0x0134 /* Data Load */ -#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ MCESEV( NO, "Invalid", diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5e095f873e3..292d0258311 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -103,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { static DEFINE_PER_CPU(struct work_struct, mce_work); +static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); + /* * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. @@ -650,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); * Do a quick check if any of the events requires a panic. * This decides if we keep the events around or clear them. */ -static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) +static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, + struct pt_regs *regs) { int i, ret = 0; for (i = 0; i < banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); - if (m->status & MCI_STATUS_VAL) + if (m->status & MCI_STATUS_VAL) { __set_bit(i, validp); + if (quirk_no_way_out) + quirk_no_way_out(i, m, regs); + } if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) ret = 1; } @@ -1040,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) *final = m; memset(valid_banks, 0, sizeof(valid_banks)); - no_way_out = mce_no_way_out(&m, &msg, valid_banks); + no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs); barrier(); @@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void) } } +/* + * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and + * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM + * Vol 3B Table 15-20). But this confuses both the code that determines + * whether the machine check occurred in kernel or user mode, and also + * the severity assessment code. Pretend that EIPV was set, and take the + * ip/cs values from the pt_regs that mce_gather_info() ignored earlier. + */ +static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) +{ + if (bank != 0) + return; + if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0) + return; + if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC| + MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV| + MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR| + MCACOD)) != + (MCI_STATUS_UC|MCI_STATUS_EN| + MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S| + MCI_STATUS_AR|MCACOD_INSTR)) + return; + + m->mcgstatus |= MCG_STATUS_EIPV; + m->ip = regs->ip; + m->cs = regs->cs; +} + /* Add per CPU specific workarounds here */ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { @@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) mce_bootlog = 0; + + if (c->x86 == 6 && c->x86_model == 45) + quirk_no_way_out = quirk_sandybridge_ifu; } if (monarch_timeout < 0) monarch_timeout = 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 29557aa06dd..915b876edd1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -32,6 +32,8 @@ #include <asm/smp.h> #include <asm/alternative.h> #include <asm/timer.h> +#include <asm/desc.h> +#include <asm/ldt.h> #include "perf_event.h" @@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size) return (__range_not_ok(fp, size, TASK_SIZE) == 0); } +static unsigned long get_segment_base(unsigned int segment) +{ + struct desc_struct *desc; + int idx = segment >> 3; + + if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + if (idx > LDT_ENTRIES) + return 0; + + if (idx > current->active_mm->context.size) + return 0; + + desc = current->active_mm->context.ldt; + } else { + if (idx > GDT_ENTRIES) + return 0; + + desc = __this_cpu_ptr(&gdt_page.gdt[0]); + } + + return get_desc_base(desc + idx); +} + #ifdef CONFIG_COMPAT #include <asm/compat.h> @@ -1746,13 +1771,17 @@ static inline int perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) { /* 32-bit process in 64-bit kernel. */ + unsigned long ss_base, cs_base; struct stack_frame_ia32 frame; const void __user *fp; if (!test_thread_flag(TIF_IA32)) return 0; - fp = compat_ptr(regs->bp); + cs_base = get_segment_base(regs->cs); + ss_base = get_segment_base(regs->ss); + + fp = compat_ptr(ss_base + regs->bp); while (entry->nr < PERF_MAX_STACK_DEPTH) { unsigned long bytes; frame.next_frame = 0; @@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (!valid_user_frame(fp, sizeof(frame))) break; - perf_callchain_store(entry, frame.return_address); - fp = compat_ptr(frame.next_frame); + perf_callchain_store(entry, cs_base + frame.return_address); + fp = compat_ptr(ss_base + frame.next_frame); } return 1; } @@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } + /* + * We don't know what to do with VM86 stacks.. ignore them for now. + */ + if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) + return; + fp = (void __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } } -unsigned long perf_instruction_pointer(struct pt_regs *regs) +/* + * Deal with code segment offsets for the various execution modes: + * + * VM86 - the good olde 16 bit days, where the linear address is + * 20 bits and we use regs->ip + 0x10 * regs->cs. + * + * IA32 - Where we need to look at GDT/LDT segment descriptor tables + * to figure out what the 32bit base address is. + * + * X32 - has TIF_X32 set, but is running in x86_64 + * + * X86_64 - CS,DS,SS,ES are all zero based. + */ +static unsigned long code_segment_base(struct pt_regs *regs) { - unsigned long ip; + /* + * If we are in VM86 mode, add the segment offset to convert to a + * linear address. + */ + if (regs->flags & X86_VM_MASK) + return 0x10 * regs->cs; + + /* + * For IA32 we look at the GDT/LDT segment base to convert the + * effective IP to a linear address. + */ +#ifdef CONFIG_X86_32 + if (user_mode(regs) && regs->cs != __USER_CS) + return get_segment_base(regs->cs); +#else + if (test_thread_flag(TIF_IA32)) { + if (user_mode(regs) && regs->cs != __USER32_CS) + return get_segment_base(regs->cs); + } +#endif + return 0; +} +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - ip = perf_guest_cbs->get_guest_ip(); - else - ip = instruction_pointer(regs); + return perf_guest_cbs->get_guest_ip(); - return ip; + return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) @@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) else misc |= PERF_RECORD_MISC_GUEST_KERNEL; } else { - if (!kernel_ip(regs->ip)) + if (user_mode(regs)) misc |= PERF_RECORD_MISC_USER; else misc |= PERF_RECORD_MISC_KERNEL; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a15df4be151..6605a81ba33 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -374,7 +374,7 @@ struct x86_pmu { /* * Intel DebugStore bits */ - int bts :1, + unsigned int bts :1, bts_active :1, pebs :1, pebs_active :1, @@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip) #endif } +/* + * Not all PMUs provide the right context information to place the reported IP + * into full context. Specifically segment registers are typically not + * supplied. + * + * Assuming the address is a linear address (it is for IBS), we fake the CS and + * vm86 mode using the known zero-based code segment and 'fix up' the registers + * to reflect this. + * + * Intel PEBS/LBR appear to typically provide the effective address, nothing + * much we can do about that but pray and treat it like a linear address. + */ +static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) +{ + regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; + if (regs->flags & X86_VM_MASK) + regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); + regs->ip = ip; +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index da9bcdcd985..7bfb5bec863 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -13,6 +13,8 @@ #include <asm/apic.h> +#include "perf_event.h" + static u32 ibs_caps; #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) @@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { regs.flags &= ~PERF_EFLAGS_EXACT; } else { - instruction_pointer_set(®s, ibs_data.regs[1]); + set_linear_ip(®s, ibs_data.regs[1]); regs.flags |= PERF_EFLAGS_EXACT; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7a8b9d0abca..7f2739e03e7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -138,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +#define SNB_DMND_DATA_RD (1ULL << 0) +#define SNB_DMND_RFO (1ULL << 1) +#define SNB_DMND_IFETCH (1ULL << 2) +#define SNB_DMND_WB (1ULL << 3) +#define SNB_PF_DATA_RD (1ULL << 4) +#define SNB_PF_RFO (1ULL << 5) +#define SNB_PF_IFETCH (1ULL << 6) +#define SNB_LLC_DATA_RD (1ULL << 7) +#define SNB_LLC_RFO (1ULL << 8) +#define SNB_LLC_IFETCH (1ULL << 9) +#define SNB_BUS_LOCKS (1ULL << 10) +#define SNB_STRM_ST (1ULL << 11) +#define SNB_OTHER (1ULL << 15) +#define SNB_RESP_ANY (1ULL << 16) +#define SNB_NO_SUPP (1ULL << 17) +#define SNB_LLC_HITM (1ULL << 18) +#define SNB_LLC_HITE (1ULL << 19) +#define SNB_LLC_HITS (1ULL << 20) +#define SNB_LLC_HITF (1ULL << 21) +#define SNB_LOCAL (1ULL << 22) +#define SNB_REMOTE (0xffULL << 23) +#define SNB_SNP_NONE (1ULL << 31) +#define SNB_SNP_NOT_NEEDED (1ULL << 32) +#define SNB_SNP_MISS (1ULL << 33) +#define SNB_NO_FWD (1ULL << 34) +#define SNB_SNP_FWD (1ULL << 35) +#define SNB_HITM (1ULL << 36) +#define SNB_NON_DRAM (1ULL << 37) + +#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) +#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) +#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) + +#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ + SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ + SNB_HITM) + +#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) +#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) + +#define SNB_L3_ACCESS SNB_RESP_ANY +#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) + +static __initconst const u64 snb_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, + }, + }, +}; + static __initconst const u64 snb_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -235,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids }, [ C(NODE) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, }, @@ -1444,8 +1522,16 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; + /* + * If PMU counter has PEBS enabled it is not enough to disable counter + * on a guest entry since PEBS memory write can overshoot guest entry + * and corrupt guest memory. Disabling PEBS solves the problem. + */ + arr[1].msr = MSR_IA32_PEBS_ENABLE; + arr[1].host = cpuc->pebs_enabled; + arr[1].guest = 0; - *nr = 1; + *nr = 2; return arr; } @@ -1964,6 +2050,8 @@ __init int intel_pmu_init(void) case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_snb(); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 629ae0b7ad9..e38d97bf425 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) * We sampled a branch insn, rewind using the LBR stack */ if (ip == to) { - regs->ip = from; + set_linear_ip(regs, from); return 1; } @@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) } while (to < ip); if (to == ip) { - regs->ip = old_to; + set_linear_ip(regs, old_to); return 1; } @@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * A possible PERF_SAMPLE_REGS will have to transfer all regs. */ regs = *iregs; - regs.ip = pebs->ip; + regs.flags = pebs->flags; + set_linear_ip(®s, pebs->ip); regs.bp = pebs->bp; regs.sp = pebs->sp; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 19faffc6088..0a5571080e7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -18,6 +18,7 @@ static struct event_constraint constraint_empty = EVENT_CONSTRAINT(0, 0, 0); DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -33,10 +34,81 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); -DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); + +static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + u64 count; + + rdmsrl(event->hw.event_base, count); + + return count; +} + +/* + * generic get constraint function for shared match/mask registers. + */ +static struct event_constraint * +uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + unsigned long flags; + bool ok = false; + + /* + * reg->alloc can be set due to existing state, so for fake box we + * need to ignore this, otherwise we might fail to allocate proper + * fake state for this extra reg constraint. + */ + if (reg1->idx == EXTRA_REG_NONE || + (!uncore_box_is_fake(box) && reg1->alloc)) + return NULL; + + er = &box->shared_regs[reg1->idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || + (er->config1 == reg1->config && er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (ok) { + if (!uncore_box_is_fake(box)) + reg1->alloc = 1; + return NULL; + } + + return &constraint_empty; +} + +static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + /* + * Only put constraint if extra reg was actually allocated. Also + * takes care of event which do not use an extra shared reg. + * + * Also, if this is a fake box we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent box + * state either since it will be thrown out. + */ + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + er = &box->shared_regs[reg1->idx]; + atomic_dec(&er->ref); + reg1->alloc = 0; +} /* Sandy Bridge-EP uncore support */ static struct intel_uncore_type snbep_uncore_cbox; @@ -64,18 +136,15 @@ static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) pci_write_config_dword(pdev, box_ctl, config); } -static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; - pci_write_config_dword(pdev, hwc->config_base, hwc->config | - SNBEP_PMON_CTL_EN); + pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } -static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -83,8 +152,7 @@ static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, pci_write_config_dword(pdev, hwc->config_base, hwc->config); } -static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, - struct perf_event *event) +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; @@ -92,14 +160,15 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); + return count; } static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, - SNBEP_PMON_BOX_CTL_INT); + + pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); } static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) @@ -112,7 +181,6 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) rdmsrl(msr, config); config |= SNBEP_PMON_BOX_CTL_FRZ; wrmsrl(msr, config); - return; } } @@ -126,12 +194,10 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) rdmsrl(msr, config); config &= ~SNBEP_PMON_BOX_CTL_FRZ; wrmsrl(msr, config); - return; } } -static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -150,68 +216,15 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, wrmsrl(hwc->config_base, hwc->config); } -static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 count; - - rdmsrl(hwc->event_base, count); - return count; -} - static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); + if (msr) wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); } -static struct event_constraint * -snbep_uncore_get_constraint(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - unsigned long flags; - bool ok = false; - - if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) - return NULL; - - er = &box->shared_regs[reg1->idx]; - raw_spin_lock_irqsave(&er->lock, flags); - if (!atomic_read(&er->ref) || er->config1 == reg1->config) { - atomic_inc(&er->ref); - er->config1 = reg1->config; - ok = true; - } - raw_spin_unlock_irqrestore(&er->lock, flags); - - if (ok) { - if (box->phys_id >= 0) - reg1->alloc = 1; - return NULL; - } - return &constraint_empty; -} - -static void snbep_uncore_put_constraint(struct intel_uncore_box *box, - struct perf_event *event) -{ - struct intel_uncore_extra_reg *er; - struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; - - if (box->phys_id < 0 || !reg1->alloc) - return; - - er = &box->shared_regs[reg1->idx]; - atomic_dec(&er->ref); - reg1->alloc = 0; -} - -static int snbep_uncore_hw_config(struct intel_uncore_box *box, - struct perf_event *event) +static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; @@ -221,14 +234,16 @@ static int snbep_uncore_hw_config(struct intel_uncore_box *box, SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; reg1->config = event->attr.config1 & SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; - } else if (box->pmu->type == &snbep_uncore_pcu) { - reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; - reg1->config = event->attr.config1 & - SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; } else { - return 0; + if (box->pmu->type == &snbep_uncore_pcu) { + reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; + reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; + } else { + return 0; + } } reg1->idx = 0; + return 0; } @@ -272,10 +287,19 @@ static struct attribute *snbep_uncore_pcu_formats_attr[] = { &format_attr_thresh5.attr, &format_attr_occ_invert.attr, &format_attr_occ_edge.attr, - &format_attr_filter_brand0.attr, - &format_attr_filter_brand1.attr, - &format_attr_filter_brand2.attr, - &format_attr_filter_brand3.attr, + &format_attr_filter_band0.attr, + &format_attr_filter_band1.attr, + &format_attr_filter_band2.attr, + &format_attr_filter_band3.attr, + NULL, +}; + +static struct attribute *snbep_uncore_qpi_formats_attr[] = { + &format_attr_event_ext.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, NULL, }; @@ -314,15 +338,20 @@ static struct attribute_group snbep_uncore_pcu_format_group = { .attrs = snbep_uncore_pcu_formats_attr, }; +static struct attribute_group snbep_uncore_qpi_format_group = { + .name = "format", + .attrs = snbep_uncore_qpi_formats_attr, +}; + static struct intel_uncore_ops snbep_uncore_msr_ops = { .init_box = snbep_uncore_msr_init_box, .disable_box = snbep_uncore_msr_disable_box, .enable_box = snbep_uncore_msr_enable_box, .disable_event = snbep_uncore_msr_disable_event, .enable_event = snbep_uncore_msr_enable_event, - .read_counter = snbep_uncore_msr_read_counter, - .get_constraint = snbep_uncore_get_constraint, - .put_constraint = snbep_uncore_put_constraint, + .read_counter = uncore_msr_read_counter, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, .hw_config = snbep_uncore_hw_config, }; @@ -485,8 +514,13 @@ static struct intel_uncore_type snbep_uncore_qpi = { .num_counters = 4, .num_boxes = 2, .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &snbep_uncore_pci_ops, .event_descs = snbep_uncore_qpi_events, - SNBEP_UNCORE_PCI_COMMON_INIT(), + .format_group = &snbep_uncore_qpi_format_group, }; @@ -603,10 +637,8 @@ static void snbep_pci2phy_map_init(void) } /* end of Sandy Bridge-EP uncore support */ - /* Sandy Bridge uncore support */ -static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -616,20 +648,11 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); } -static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { wrmsrl(event->hw.config_base, 0); } -static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, - struct perf_event *event) -{ - u64 count; - rdmsrl(event->hw.event_base, count); - return count; -} - static void snb_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { @@ -648,15 +671,15 @@ static struct attribute *snb_uncore_formats_attr[] = { }; static struct attribute_group snb_uncore_format_group = { - .name = "format", - .attrs = snb_uncore_formats_attr, + .name = "format", + .attrs = snb_uncore_formats_attr, }; static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, - .read_counter = snb_uncore_msr_read_counter, + .read_counter = uncore_msr_read_counter, }; static struct event_constraint snb_uncore_cbox_constraints[] = { @@ -697,12 +720,10 @@ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, - NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); + wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); } -static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -744,7 +765,7 @@ static struct intel_uncore_ops nhm_uncore_msr_ops = { .enable_box = nhm_uncore_msr_enable_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = nhm_uncore_msr_enable_event, - .read_counter = snb_uncore_msr_read_counter, + .read_counter = uncore_msr_read_counter, }; static struct intel_uncore_type nhm_uncore = { @@ -769,8 +790,1049 @@ static struct intel_uncore_type *nhm_msr_uncores[] = { }; /* end of Nehalem uncore support */ -static void uncore_assign_hw_event(struct intel_uncore_box *box, - struct perf_event *event, int idx) +/* Nehalem-EX uncore support */ +#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ + ((1ULL << (n)) - 1))) + +DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); +DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); +DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); + +static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); +} + +static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config &= ~((1ULL << uncore_num_counters(box)) - 1); + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + u64 config; + + if (msr) { + rdmsrl(msr, config); + config |= (1ULL << uncore_num_counters(box)) - 1; + /* WBox has a fixed counter */ + if (uncore_msr_fixed_ctl(box)) + config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; + wrmsrl(msr, config); + } +} + +static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + wrmsrl(event->hw.config_base, 0); +} + +static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + else + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +#define NHMEX_UNCORE_OPS_COMMON_INIT() \ + .init_box = nhmex_uncore_msr_init_box, \ + .disable_box = nhmex_uncore_msr_disable_box, \ + .enable_box = nhmex_uncore_msr_enable_box, \ + .disable_event = nhmex_uncore_msr_disable_event, \ + .read_counter = uncore_msr_read_counter + +static struct intel_uncore_ops nhmex_uncore_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_uncore_msr_enable_event, +}; + +static struct attribute *nhmex_uncore_ubox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_edge.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_ubox_format_group = { + .name = "format", + .attrs = nhmex_uncore_ubox_formats_attr, +}; + +static struct intel_uncore_type nhmex_uncore_ubox = { + .name = "ubox", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, + .perf_ctr = NHMEX_U_MSR_PMON_CTR, + .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_ubox_format_group +}; + +static struct attribute *nhmex_uncore_cbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_cbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_cbox_formats_attr, +}; + +/* msr offset for each instance of cbox */ +static unsigned nhmex_cbox_msr_offsets[] = { + 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, +}; + +static struct intel_uncore_type nhmex_uncore_cbox = { + .name = "cbox", + .num_counters = 6, + .num_boxes = 10, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, + .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, + .msr_offsets = nhmex_cbox_msr_offsets, + .pair_ctr_ctl = 1, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static struct uncore_event_desc nhmex_uncore_wbox_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type nhmex_uncore_wbox = { + .name = "wbox", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_W_MSR_PMON_CNT0, + .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, + .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, + .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, + .pair_ctr_ctl = 1, + .event_descs = nhmex_uncore_wbox_events, + .ops = &nhmex_uncore_ops, + .format_group = &nhmex_uncore_cbox_format_group +}; + +static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int ctr, ev_sel; + + ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> + NHMEX_B_PMON_CTR_SHIFT; + ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> + NHMEX_B_PMON_CTL_EV_SEL_SHIFT; + + /* events that do not use the match/mask registers */ + if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || + (ctr == 2 && ev_sel != 0x4) || ctr == 3) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_B0_MSR_MATCH; + else + reg1->reg = NHMEX_B1_MSR_MATCH; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, reg1->config); + wrmsrl(reg1->reg + 1, reg2->config); + } + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); +} + +/* + * The Bbox has 4 counters, but each counter monitors different events. + * Use bits 6-7 in the event config to select counter. + */ +static struct event_constraint nhmex_uncore_bbox_constraints[] = { + EVENT_CONSTRAINT(0 , 1, 0xc0), + EVENT_CONSTRAINT(0x40, 2, 0xc0), + EVENT_CONSTRAINT(0x80, 4, 0xc0), + EVENT_CONSTRAINT(0xc0, 8, 0xc0), + EVENT_CONSTRAINT_END, +}; + +static struct attribute *nhmex_uncore_bbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_counter.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_bbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_bbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_bbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_bbox_msr_enable_event, + .hw_config = nhmex_bbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_bbox = { + .name = "bbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_B0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, + .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_B_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .constraints = nhmex_uncore_bbox_constraints, + .ops = &nhmex_uncore_bbox_ops, + .format_group = &nhmex_uncore_bbox_format_group +}; + +static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + /* only TO_R_PROG_EV event uses the match/mask register */ + if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) != + NHMEX_S_EVENT_TO_R_PROG_EV) + return 0; + + if (box->pmu->pmu_idx == 0) + reg1->reg = NHMEX_S0_MSR_MM_CFG; + else + reg1->reg = NHMEX_S1_MSR_MM_CFG; + reg1->idx = 0; + reg1->config = event->attr.config1; + reg2->config = event->attr.config2; + return 0; +} + +static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + + if (reg1->idx != EXTRA_REG_NONE) { + wrmsrl(reg1->reg, 0); + wrmsrl(reg1->reg + 1, reg1->config); + wrmsrl(reg1->reg + 2, reg2->config); + wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + } + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); +} + +static struct attribute *nhmex_uncore_sbox_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_match.attr, + &format_attr_mask.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_sbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_sbox_formats_attr, +}; + +static struct intel_uncore_ops nhmex_uncore_sbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_sbox_msr_enable_event, + .hw_config = nhmex_sbox_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_S0_MSR_PMON_CTL0, + .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, + .event_mask = NHMEX_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, + .msr_offset = NHMEX_S_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 1, + .ops = &nhmex_uncore_sbox_ops, + .format_group = &nhmex_uncore_sbox_format_group +}; + +enum { + EXTRA_REG_NHMEX_M_FILTER, + EXTRA_REG_NHMEX_M_DSP, + EXTRA_REG_NHMEX_M_ISS, + EXTRA_REG_NHMEX_M_MAP, + EXTRA_REG_NHMEX_M_MSC_THR, + EXTRA_REG_NHMEX_M_PGT, + EXTRA_REG_NHMEX_M_PLD, + EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, +}; + +static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { + MBOX_INC_SEL_EXTAR_REG(0x0, DSP), + MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), + MBOX_INC_SEL_EXTAR_REG(0x9, ISS), + /* event 0xa uses two extra registers */ + MBOX_INC_SEL_EXTAR_REG(0xa, ISS), + MBOX_INC_SEL_EXTAR_REG(0xa, PLD), + MBOX_INC_SEL_EXTAR_REG(0xb, PLD), + /* events 0xd ~ 0x10 use the same extra register */ + MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), + MBOX_INC_SEL_EXTAR_REG(0x16, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), + MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), + EVENT_EXTRA_END +}; + +/* Nehalem-EX or Westmere-EX ? */ +bool uncore_nhmex; + +static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + bool ret = false; + u64 mask; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (!atomic_read(&er->ref) || er->config == config) { + atomic_inc(&er->ref); + er->config = config; + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; + } + /* + * The ZDP_CTL_FVC MSR has 4 fields which are used to control + * events 0xd ~ 0x10. Besides these 4 fields, there are additional + * fields which are shared. + */ + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (WARN_ON_ONCE(idx >= 4)) + return false; + + /* mask of the shared fields */ + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + + raw_spin_lock_irqsave(&er->lock, flags); + /* add mask of the non-shared field if it's in use */ + if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) { + if (uncore_nhmex) + mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + } + + if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { + atomic_add(1 << (idx * 8), &er->ref); + if (uncore_nhmex) + mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | + NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK | + WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + er->config &= ~mask; + er->config |= (config & mask); + ret = true; + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + return ret; +} + +static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + er = &box->shared_regs[idx]; + atomic_dec(&er->ref); + return; + } + + idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + atomic_sub(1 << (idx * 8), &er->ref); +} + +u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 config = reg1->config; + + /* get the non-shared control bits and shift them */ + idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (uncore_nhmex) + config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + else + config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); + if (new_idx > orig_idx) { + idx = new_idx - orig_idx; + config <<= 3 * idx; + } else { + idx = orig_idx - new_idx; + config >>= 3 * idx; + } + + /* add the shared control bits back */ + if (uncore_nhmex) + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + else + config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; + if (modify) { + /* adjust the main event selector */ + if (new_idx > orig_idx) + hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + else + hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; + reg1->config = config; + reg1->idx = ~0xff | new_idx; + } + return config; +} + +static struct event_constraint * +nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int i, idx[2], alloc = 0; + u64 config1 = reg1->config; + + idx[0] = __BITS_VALUE(reg1->idx, 0, 8); + idx[1] = __BITS_VALUE(reg1->idx, 1, 8); +again: + for (i = 0; i < 2; i++) { + if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) + idx[i] = 0xff; + + if (idx[i] == 0xff) + continue; + + if (!nhmex_mbox_get_shared_reg(box, idx[i], + __BITS_VALUE(config1, i, 32))) + goto fail; + alloc |= (0x1 << i); + } + + /* for the match/mask registers */ + if (reg2->idx != EXTRA_REG_NONE && + (uncore_box_is_fake(box) || !reg2->alloc) && + !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) + goto fail; + + /* + * If it's a fake box -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + */ + if (!uncore_box_is_fake(box)) { + if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) + nhmex_mbox_alter_er(event, idx[0], true); + reg1->alloc |= alloc; + if (reg2->idx != EXTRA_REG_NONE) + reg2->alloc = 1; + } + return NULL; +fail: + if (idx[0] != 0xff && !(alloc & 0x1) && + idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { + /* + * events 0xd ~ 0x10 are functional identical, but are + * controlled by different fields in the ZDP_CTL_FVC + * register. If we failed to take one field, try the + * rest 3 choices. + */ + BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); + idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + idx[0] = (idx[0] + 1) % 4; + idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; + if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { + config1 = nhmex_mbox_alter_er(event, idx[0], false); + goto again; + } + } + + if (alloc & 0x1) + nhmex_mbox_put_shared_reg(box, idx[0]); + if (alloc & 0x2) + nhmex_mbox_put_shared_reg(box, idx[1]); + return &constraint_empty; +} + +static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + + if (uncore_box_is_fake(box)) + return; + + if (reg1->alloc & 0x1) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); + if (reg1->alloc & 0x2) + nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); + reg1->alloc = 0; + + if (reg2->alloc) { + nhmex_mbox_put_shared_reg(box, reg2->idx); + reg2->alloc = 0; + } +} + +static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) +{ + if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return er->idx; + return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; +} + +static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_type *type = box->pmu->type; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + struct extra_reg *er; + unsigned msr; + int reg_idx = 0; + /* + * The mbox events may require 2 extra MSRs at the most. But only + * the lower 32 bits in these MSRs are significant, so we can use + * config1 to pass two MSRs' config. + */ + for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { + if (er->event != (event->hw.config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + + msr = er->msr + type->msr_offset * box->pmu->pmu_idx; + if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) + return -EINVAL; + + /* always use the 32~63 bits to pass the PLD config */ + if (er->idx == EXTRA_REG_NHMEX_M_PLD) + reg_idx = 1; + else if (WARN_ON_ONCE(reg_idx > 0)) + return -EINVAL; + + reg1->idx &= ~(0xff << (reg_idx * 8)); + reg1->reg &= ~(0xffff << (reg_idx * 16)); + reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); + reg1->reg |= msr << (reg_idx * 16); + reg1->config = event->attr.config1; + reg_idx++; + } + /* + * The mbox only provides ability to perform address matching + * for the PLD events. + */ + if (reg_idx == 2) { + reg2->idx = EXTRA_REG_NHMEX_M_FILTER; + if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) + reg2->config = event->attr.config2; + else + reg2->config = ~0ULL; + if (box->pmu->pmu_idx == 0) + reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; + else + reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; + } + return 0; +} + +static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) + return box->shared_regs[idx].config; + + er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + return config; +} + +static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx; + + idx = __BITS_VALUE(reg1->idx, 0, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + nhmex_mbox_shared_reg_config(box, idx)); + idx = __BITS_VALUE(reg1->idx, 1, 8); + if (idx != 0xff) + wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + nhmex_mbox_shared_reg_config(box, idx)); + + if (reg2->idx != EXTRA_REG_NONE) { + wrmsrl(reg2->reg, 0); + if (reg2->config != ~0ULL) { + wrmsrl(reg2->reg + 1, + reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); + wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); + wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + } + } + + wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); +} + +DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); +DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); +DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); +DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); +DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); +DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); +DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63"); +DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); +DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); +DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); +DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); + +static struct attribute *nhmex_uncore_mbox_formats_attr[] = { + &format_attr_count_mode.attr, + &format_attr_storage_mode.attr, + &format_attr_wrap_mode.attr, + &format_attr_flag_mode.attr, + &format_attr_inc_sel.attr, + &format_attr_set_flag_sel.attr, + &format_attr_filter_cfg_en.attr, + &format_attr_filter_match.attr, + &format_attr_filter_mask.attr, + &format_attr_dsp.attr, + &format_attr_thr.attr, + &format_attr_fvc.attr, + &format_attr_pgt.attr, + &format_attr_map.attr, + &format_attr_iss.attr, + &format_attr_pld.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_mbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_mbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), + { /* end: all zeroes */ }, +}; + +static struct uncore_event_desc wsmex_uncore_mbox_events[] = { + INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"), + INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_mbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_mbox_msr_enable_event, + .hw_config = nhmex_mbox_hw_config, + .get_constraint = nhmex_mbox_get_constraint, + .put_constraint = nhmex_mbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_mbox = { + .name = "mbox", + .num_counters = 6, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_M0_MSR_PMU_CTL0, + .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, + .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_M_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 8, + .event_descs = nhmex_uncore_mbox_events, + .ops = &nhmex_uncore_mbox_ops, + .format_group = &nhmex_uncore_mbox_format_group, +}; + +void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int port; + + /* adjust the main event selector and extra register index */ + if (reg1->idx % 2) { + reg1->idx--; + hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } else { + reg1->idx++; + hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + } + + /* adjust extra register config */ + port = reg1->idx / 6 + box->pmu->pmu_idx * 4; + switch (reg1->idx % 6) { + case 2: + /* shift the 8~15 bits to the 0~7 bits */ + reg1->config >>= 8; + break; + case 3: + /* shift the 0~7 bits to the 8~15 bits */ + reg1->config <<= 8; + break; + }; +} + +/* + * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. + * An event set consists of 6 events, the 3rd and 4th events in + * an event set use the same extra register. So an event set uses + * 5 extra registers. + */ +static struct event_constraint * +nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + struct intel_uncore_extra_reg *er; + unsigned long flags; + int idx, er_idx; + u64 config1; + bool ok = false; + + if (!uncore_box_is_fake(box) && reg1->alloc) + return NULL; + + idx = reg1->idx % 6; + config1 = reg1->config; +again: + er_idx = idx; + /* the 3rd and 4th events use the same extra register */ + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + raw_spin_lock_irqsave(&er->lock, flags); + if (idx < 2) { + if (!atomic_read(&er->ref) || er->config == reg1->config) { + atomic_inc(&er->ref); + er->config = reg1->config; + ok = true; + } + } else if (idx == 2 || idx == 3) { + /* + * these two events use different fields in a extra register, + * the 0~7 bits and the 8~15 bits respectively. + */ + u64 mask = 0xff << ((idx - 2) * 8); + if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || + !((er->config ^ config1) & mask)) { + atomic_add(1 << ((idx - 2) * 8), &er->ref); + er->config &= ~mask; + er->config |= config1 & mask; + ok = true; + } + } else { + if (!atomic_read(&er->ref) || + (er->config == (hwc->config >> 32) && + er->config1 == reg1->config && + er->config2 == reg2->config)) { + atomic_inc(&er->ref); + er->config = (hwc->config >> 32); + er->config1 = reg1->config; + er->config2 = reg2->config; + ok = true; + } + } + raw_spin_unlock_irqrestore(&er->lock, flags); + + if (!ok) { + /* + * The Rbox events are always in pairs. The paired + * events are functional identical, but use different + * extra registers. If we failed to take an extra + * register, try the alternative. + */ + if (idx % 2) + idx--; + else + idx++; + if (idx != reg1->idx % 6) { + if (idx == 2) + config1 >>= 8; + else if (idx == 3) + config1 <<= 8; + goto again; + } + } else { + if (!uncore_box_is_fake(box)) { + if (idx != reg1->idx % 6) + nhmex_rbox_alter_er(box, event); + reg1->alloc = 1; + } + return NULL; + } + return &constraint_empty; +} + +static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) +{ + struct intel_uncore_extra_reg *er; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + int idx, er_idx; + + if (uncore_box_is_fake(box) || !reg1->alloc) + return; + + idx = reg1->idx % 6; + er_idx = idx; + if (er_idx > 2) + er_idx--; + er_idx += (reg1->idx / 6) * 5; + + er = &box->shared_regs[er_idx]; + if (idx == 2 || idx == 3) + atomic_sub(1 << ((idx - 2) * 8), &er->ref); + else + atomic_dec(&er->ref); + + reg1->alloc = 0; +} + +static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; + int idx; + + idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> + NHMEX_R_PMON_CTL_EV_SEL_SHIFT; + if (idx >= 0x18) + return -EINVAL; + + reg1->idx = idx; + reg1->config = event->attr.config1; + + switch (idx % 6) { + case 4: + case 5: + hwc->config |= event->attr.config & (~0ULL << 32); + reg2->config = event->attr.config2; + break; + }; + return 0; +} + +static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx) +{ + struct intel_uncore_extra_reg *er; + unsigned long flags; + u64 config; + + er = &box->shared_regs[idx]; + + raw_spin_lock_irqsave(&er->lock, flags); + config = er->config; + raw_spin_unlock_irqrestore(&er->lock, flags); + + return config; +} + +static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + struct hw_perf_event_extra *reg2 = &hwc->branch_reg; + int idx, port; + + idx = reg1->idx; + port = idx / 6 + box->pmu->pmu_idx * 4; + + switch (idx % 6) { + case 0: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + break; + case 1: + wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + break; + case 2: + case 3: + wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + nhmex_rbox_shared_reg_config(box, 2 + (idx / 6) * 5)); + break; + case 4: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + break; + case 5: + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + hwc->config >> 32); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + break; + }; + + wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); +} + +DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); +DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); +DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); + +static struct attribute *nhmex_uncore_rbox_formats_attr[] = { + &format_attr_event5.attr, + &format_attr_xbr_mm_cfg.attr, + &format_attr_xbr_match.attr, + &format_attr_xbr_mask.attr, + &format_attr_qlx_cfg.attr, + &format_attr_iperf_cfg.attr, + NULL, +}; + +static struct attribute_group nhmex_uncore_rbox_format_group = { + .name = "format", + .attrs = nhmex_uncore_rbox_formats_attr, +}; + +static struct uncore_event_desc nhmex_uncore_rbox_events[] = { + INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), + INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), + INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhmex_uncore_rbox_ops = { + NHMEX_UNCORE_OPS_COMMON_INIT(), + .enable_event = nhmex_rbox_msr_enable_event, + .hw_config = nhmex_rbox_hw_config, + .get_constraint = nhmex_rbox_get_constraint, + .put_constraint = nhmex_rbox_put_constraint, +}; + +static struct intel_uncore_type nhmex_uncore_rbox = { + .name = "rbox", + .num_counters = 8, + .num_boxes = 2, + .perf_ctr_bits = 48, + .event_ctl = NHMEX_R_MSR_PMON_CTL0, + .perf_ctr = NHMEX_R_MSR_PMON_CNT0, + .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, + .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, + .msr_offset = NHMEX_R_MSR_OFFSET, + .pair_ctr_ctl = 1, + .num_shared_regs = 20, + .event_descs = nhmex_uncore_rbox_events, + .ops = &nhmex_uncore_rbox_ops, + .format_group = &nhmex_uncore_rbox_format_group +}; + +static struct intel_uncore_type *nhmex_msr_uncores[] = { + &nhmex_uncore_ubox, + &nhmex_uncore_cbox, + &nhmex_uncore_bbox, + &nhmex_uncore_sbox, + &nhmex_uncore_mbox, + &nhmex_uncore_rbox, + &nhmex_uncore_wbox, + NULL, +}; +/* end of Nehalem-EX uncore support */ + +static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -787,8 +1849,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, hwc->event_base = uncore_perf_ctr(box, hwc->idx); } -static void uncore_perf_event_update(struct intel_uncore_box *box, - struct perf_event *event) +static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) { u64 prev_count, new_count, delta; int shift; @@ -858,14 +1919,12 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, - int cpu) +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) { struct intel_uncore_box *box; int i, size; - size = sizeof(*box) + type->num_shared_regs * - sizeof(struct intel_uncore_extra_reg); + size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); if (!box) @@ -915,12 +1974,11 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) * perf core schedules event on the basis of cpu, uncore events are * collected by one of the cpus inside a physical package. */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), - smp_processor_id()); + return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } -static int uncore_collect_events(struct intel_uncore_box *box, - struct perf_event *leader, bool dogrp) +static int +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { struct perf_event *event; int n, max_count; @@ -952,8 +2010,7 @@ static int uncore_collect_events(struct intel_uncore_box *box, } static struct event_constraint * -uncore_get_event_constraint(struct intel_uncore_box *box, - struct perf_event *event) +uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) { struct intel_uncore_type *type = box->pmu->type; struct event_constraint *c; @@ -977,15 +2034,13 @@ uncore_get_event_constraint(struct intel_uncore_box *box, return &type->unconstrainted; } -static void uncore_put_event_constraint(struct intel_uncore_box *box, - struct perf_event *event) +static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) { if (box->pmu->type->ops->put_constraint) box->pmu->type->ops->put_constraint(box, event); } -static int uncore_assign_events(struct intel_uncore_box *box, - int assign[], int n) +static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) { unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; @@ -1256,6 +2311,7 @@ int uncore_pmu_event_init(struct perf_event *event) event->hw.idx = -1; event->hw.last_tag = ~0ULL; event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; if (event->attr.config == UNCORE_FIXED_EVENT) { /* no fixed counter */ @@ -1326,7 +2382,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) type->attr_groups[1] = NULL; } -static void uncore_types_exit(struct intel_uncore_type **types) +static void __init uncore_types_exit(struct intel_uncore_type **types) { int i; for (i = 0; types[i]; i++) @@ -1407,8 +2463,7 @@ static bool pcidrv_registered; /* * add a pci uncore device */ -static int __devinit uncore_pci_add(struct intel_uncore_type *type, - struct pci_dev *pdev) +static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) { struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; @@ -1485,6 +2540,7 @@ static int __devinit uncore_pci_probe(struct pci_dev *pdev, struct intel_uncore_type *type; type = (struct intel_uncore_type *)id->driver_data; + return uncore_pci_add(type, pdev); } @@ -1612,8 +2668,8 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) return 0; } -static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, - int old_cpu, int new_cpu) +static void __cpuinit +uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) { struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; @@ -1694,8 +2750,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu) uncore_change_context(pci_uncores, -1, cpu); } -static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int + __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; @@ -1732,12 +2788,12 @@ static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, } static struct notifier_block uncore_cpu_nb __cpuinitdata = { - .notifier_call = uncore_cpu_notifier, + .notifier_call = uncore_cpu_notifier, /* * to migrate uncore events, our notifier should be executed * before perf core's notifier. */ - .priority = CPU_PRI_PERF + 1, + .priority = CPU_PRI_PERF + 1, }; static void __init uncore_cpu_setup(void *dummy) @@ -1767,6 +2823,15 @@ static int __init uncore_cpu_init(void) snbep_uncore_cbox.num_boxes = max_cores; msr_uncores = snbep_msr_uncores; break; + case 46: /* Nehalem-EX */ + uncore_nhmex = true; + case 47: /* Westmere-EX aka. Xeon E7 */ + if (!uncore_nhmex) + nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; + if (nhmex_uncore_cbox.num_boxes > max_cores) + nhmex_uncore_cbox.num_boxes = max_cores; + msr_uncores = nhmex_msr_uncores; + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index b13e9ea81de..5b81c1856aa 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -5,9 +5,7 @@ #include "perf_event.h" #define UNCORE_PMU_NAME_LEN 32 -#define UNCORE_BOX_HASH_SIZE 8 - -#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) +#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) #define UNCORE_FIXED_EVENT 0xff #define UNCORE_PMC_IDX_MAX_GENERIC 8 @@ -115,6 +113,10 @@ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) +#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ + (SNBEP_PMON_RAW_EVENT_MASK | \ + SNBEP_PMON_CTL_EV_SEL_EXT) + /* SNB-EP pci control register */ #define SNBEP_PCI_PMON_BOX_CTL 0xf4 #define SNBEP_PCI_PMON_CTL0 0xd8 @@ -158,6 +160,188 @@ #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd +/* NHM-EX event control */ +#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff +#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 +#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) +#define NHMEX_PMON_CTL_EDGE_DET (1 << 18) +#define NHMEX_PMON_CTL_PMI_EN (1 << 20) +#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) +#define NHMEX_PMON_CTL_INVERT (1 << 23) +#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 +#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_UMASK_MASK | \ + NHMEX_PMON_CTL_EDGE_DET | \ + NHMEX_PMON_CTL_INVERT | \ + NHMEX_PMON_CTL_TRESH_MASK) + +/* NHM-EX Ubox */ +#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 +#define NHMEX_U_MSR_PMON_CTR 0xc11 +#define NHMEX_U_MSR_PMON_EV_SEL 0xc10 + +#define NHMEX_U_PMON_GLOBAL_EN (1 << 0) +#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e +#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) +#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) +#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) + +#define NHMEX_U_PMON_RAW_EVENT_MASK \ + (NHMEX_PMON_CTL_EV_SEL_MASK | \ + NHMEX_PMON_CTL_EDGE_DET) + +/* NHM-EX Cbox */ +#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 +#define NHMEX_C0_MSR_PMON_CTR0 0xd11 +#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 +#define NHMEX_C_MSR_OFFSET 0x20 + +/* NHM-EX Bbox */ +#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 +#define NHMEX_B0_MSR_PMON_CTR0 0xc31 +#define NHMEX_B0_MSR_PMON_CTL0 0xc30 +#define NHMEX_B_MSR_OFFSET 0x40 +#define NHMEX_B0_MSR_MATCH 0xe45 +#define NHMEX_B0_MSR_MASK 0xe46 +#define NHMEX_B1_MSR_MATCH 0xe4d +#define NHMEX_B1_MSR_MASK 0xe4e + +#define NHMEX_B_PMON_CTL_EN (1 << 0) +#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_B_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_B_PMON_CTR_SHIFT 6 +#define NHMEX_B_PMON_CTR_MASK \ + (0x3 << NHMEX_B_PMON_CTR_SHIFT) +#define NHMEX_B_PMON_RAW_EVENT_MASK \ + (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ + NHMEX_B_PMON_CTR_MASK) + +/* NHM-EX Sbox */ +#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 +#define NHMEX_S0_MSR_PMON_CTR0 0xc51 +#define NHMEX_S0_MSR_PMON_CTL0 0xc50 +#define NHMEX_S_MSR_OFFSET 0x80 +#define NHMEX_S0_MSR_MM_CFG 0xe48 +#define NHMEX_S0_MSR_MATCH 0xe49 +#define NHMEX_S0_MSR_MASK 0xe4a +#define NHMEX_S1_MSR_MM_CFG 0xe58 +#define NHMEX_S1_MSR_MATCH 0xe59 +#define NHMEX_S1_MSR_MASK 0xe5a + +#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) +#define NHMEX_S_EVENT_TO_R_PROG_EV 0 + +/* NHM-EX Mbox */ +#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 +#define NHMEX_M0_MSR_PMU_DSP 0xca5 +#define NHMEX_M0_MSR_PMU_ISS 0xca6 +#define NHMEX_M0_MSR_PMU_MAP 0xca7 +#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 +#define NHMEX_M0_MSR_PMU_PGT 0xca9 +#define NHMEX_M0_MSR_PMU_PLD 0xcaa +#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab +#define NHMEX_M0_MSR_PMU_CTL0 0xcb0 +#define NHMEX_M0_MSR_PMU_CNT0 0xcb1 +#define NHMEX_M_MSR_OFFSET 0x40 +#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 +#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c + +#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) +#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL +#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL +#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 + +#define NHMEX_M_PMON_CTL_EN (1 << 0) +#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) +#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 +#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 +#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ + (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) +#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) +#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) +#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 +#define NHMEX_M_PMON_CTL_INC_SEL_MASK \ + (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 +#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ + (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) +#define NHMEX_M_PMON_RAW_EVENT_MASK \ + (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ + NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ + NHMEX_M_PMON_CTL_WRAP_MODE | \ + NHMEX_M_PMON_CTL_FLAG_MODE | \ + NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) + +#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) + +#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) +#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) + +/* + * use the 9~13 bits to select event If the 7th bit is not set, + * otherwise use the 19~21 bits to select event. + */ +#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) +#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ + NHMEX_M_PMON_CTL_FLAG_MODE) +#define MBOX_INC_SEL_EXTAR_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) +#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ + EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ + MBOX_SET_FLAG_SEL_MASK, \ + (u64)-1, NHMEX_M_##r) + +/* NHM-EX Rbox */ +#define NHMEX_R_MSR_GLOBAL_CTL 0xe00 +#define NHMEX_R_MSR_PMON_CTL0 0xe10 +#define NHMEX_R_MSR_PMON_CNT0 0xe11 +#define NHMEX_R_MSR_OFFSET 0x20 + +#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ + ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) +#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) +#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ + (((n) < 4 ? 0 : 0x10) + (n) * 4) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ + (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ + (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) +#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ + (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) + +#define NHMEX_R_PMON_CTL_EN (1 << 0) +#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 +#define NHMEX_R_PMON_CTL_EV_SEL_MASK \ + (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) +#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) +#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK + +/* NHM-EX Wbox */ +#define NHMEX_W_MSR_GLOBAL_CTL 0xc80 +#define NHMEX_W_MSR_PMON_CNT0 0xc90 +#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 +#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 +#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 + +#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -178,6 +362,8 @@ struct intel_uncore_type { unsigned msr_offset; unsigned num_shared_regs:8; unsigned single_fixed:1; + unsigned pair_ctr_ctl:1; + unsigned *msr_offsets; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -213,7 +399,7 @@ struct intel_uncore_pmu { struct intel_uncore_extra_reg { raw_spinlock_t lock; - u64 config1; + u64 config, config1, config2; atomic_t ref; }; @@ -295,43 +481,47 @@ unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) return idx * 8 + box->pmu->type->perf_ctr; } -static inline -unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) +static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box) +{ + struct intel_uncore_pmu *pmu = box->pmu; + return pmu->type->msr_offsets ? + pmu->type->msr_offsets[pmu->pmu_idx] : + pmu->type->msr_offset * pmu->pmu_idx; +} + +static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) { if (!box->pmu->type->box_ctl) return 0; - return box->pmu->type->box_ctl + - box->pmu->type->msr_offset * box->pmu->pmu_idx; + return box->pmu->type->box_ctl + uncore_msr_box_offset(box); } -static inline -unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) +static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) { if (!box->pmu->type->fixed_ctl) return 0; - return box->pmu->type->fixed_ctl + - box->pmu->type->msr_offset * box->pmu->pmu_idx; + return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box); } -static inline -unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) +static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) { - return box->pmu->type->fixed_ctr + - box->pmu->type->msr_offset * box->pmu->pmu_idx; + return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box); } static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return idx + box->pmu->type->event_ctl + - box->pmu->type->msr_offset * box->pmu->pmu_idx; + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return idx + box->pmu->type->perf_ctr + - box->pmu->type->msr_offset * box->pmu->pmu_idx; + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); } static inline @@ -422,3 +612,8 @@ static inline void uncore_box_init(struct intel_uncore_box *box) box->pmu->type->ops->init_box(box); } } + +static inline bool uncore_box_is_fake(struct intel_uncore_box *box) +{ + return (box->phys_id < 0); +} diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad20..00000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null @@ -1,55 +0,0 @@ -#include <linux/sched.h> -#include <linux/math64.h> -#include <linux/percpu.h> -#include <linux/irqflags.h> - -#include <asm/cpufeature.h> -#include <asm/processor.h> - -#ifdef CONFIG_SMP - -static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); - -static unsigned long scale_aperfmperf(void) -{ - struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); - unsigned long ratio, flags; - - local_irq_save(flags); - get_aperfmperf(&val); - local_irq_restore(flags); - - ratio = calc_aperfmperf_ratio(old, &val); - *old = val; - - return ratio; -} - -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) -{ - /* - * do aperf/mperf on the cpu level because it includes things - * like turbo mode, which are relevant to full cores. - */ - if (boot_cpu_has(X86_FEATURE_APERFMPERF)) - return scale_aperfmperf(); - - /* - * maybe have something cpufreq here - */ - - return default_scale_freq_power(sd, cpu); -} - -unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) -{ - /* - * aperf/mperf already includes the smt gain - */ - if (boot_cpu_has(X86_FEATURE_APERFMPERF)) - return SCHED_LOAD_SCALE; - - return default_scale_smt_power(sd, cpu); -} - -#endif diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41857970517..ed858e9e9a7 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -944,7 +944,7 @@ void __init e820_reserve_resources(void) for (i = 0; i < e820_saved.nr_map; i++) { struct e820entry *entry = &e820_saved.map[i]; firmware_map_add_early(entry->addr, - entry->addr + entry->size - 1, + entry->addr + entry->size, e820_type_to_string(entry->type)); } } diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 111f6bbd8b3..69babd8c834 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi -#ifdef CONFIG_SMP - ALIGN - INTR_FRAME -.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ - 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 -.if NUM_INVALIDATE_TLB_VECTORS > \idx -ENTRY(invalidate_interrupt\idx) - pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) - jmp .Lcommon_invalidate_interrupt0 - CFI_ADJUST_CFA_OFFSET -8 -END(invalidate_interrupt\idx) -.endif -.endr - CFI_ENDPROC -apicinterrupt INVALIDATE_TLB_VECTOR_START, \ - invalidate_interrupt0, smp_invalidate_interrupt -#endif - apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 1f5f1d5d2a0..d44f7829968 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -270,7 +270,7 @@ void fixup_irqs(void) if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; - affinity = cpu_all_mask; + affinity = cpu_online_mask; } chip = irq_data_get_irq_chip(data); @@ -328,6 +328,7 @@ void fixup_irqs(void) chip->irq_retrigger(data); raw_spin_unlock(&desc->lock); } + __this_cpu_write(vector_irq[vector], -1); } } #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c..6e03b0d6913 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -171,79 +171,6 @@ static void __init smp_intr_init(void) */ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPIs for invalidation */ -#define ALLOC_INVTLB_VEC(NR) \ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ - invalidate_interrupt##NR) - - switch (NUM_INVALIDATE_TLB_VECTORS) { - default: - ALLOC_INVTLB_VEC(31); - case 31: - ALLOC_INVTLB_VEC(30); - case 30: - ALLOC_INVTLB_VEC(29); - case 29: - ALLOC_INVTLB_VEC(28); - case 28: - ALLOC_INVTLB_VEC(27); - case 27: - ALLOC_INVTLB_VEC(26); - case 26: - ALLOC_INVTLB_VEC(25); - case 25: - ALLOC_INVTLB_VEC(24); - case 24: - ALLOC_INVTLB_VEC(23); - case 23: - ALLOC_INVTLB_VEC(22); - case 22: - ALLOC_INVTLB_VEC(21); - case 21: - ALLOC_INVTLB_VEC(20); - case 20: - ALLOC_INVTLB_VEC(19); - case 19: - ALLOC_INVTLB_VEC(18); - case 18: - ALLOC_INVTLB_VEC(17); - case 17: - ALLOC_INVTLB_VEC(16); - case 16: - ALLOC_INVTLB_VEC(15); - case 15: - ALLOC_INVTLB_VEC(14); - case 14: - ALLOC_INVTLB_VEC(13); - case 13: - ALLOC_INVTLB_VEC(12); - case 12: - ALLOC_INVTLB_VEC(11); - case 11: - ALLOC_INVTLB_VEC(10); - case 10: - ALLOC_INVTLB_VEC(9); - case 9: - ALLOC_INVTLB_VEC(8); - case 8: - ALLOC_INVTLB_VEC(7); - case 7: - ALLOC_INVTLB_VEC(6); - case 6: - ALLOC_INVTLB_VEC(5); - case 5: - ALLOC_INVTLB_VEC(4); - case 4: - ALLOC_INVTLB_VEC(3); - case 3: - ALLOC_INVTLB_VEC(2); - case 2: - ALLOC_INVTLB_VEC(1); - case 1: - ALLOC_INVTLB_VEC(0); - break; - } - /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 1d5d31ea686..dc1404bf8e4 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) { struct setup_data_node *node; struct setup_data *data; - int error = -ENOMEM; + int error; struct dentry *d; struct page *pg; u64 pa_data; @@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent) while (pa_data) { node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) + if (!node) { + error = -ENOMEM; goto err_dir; + } pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); if (PageHighMem(pg)) { diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 8a2ce8fd41c..82746f942cd 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -143,11 +143,12 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, unsigned int *current_size) { struct microcode_header_amd *mc_hdr; - unsigned int actual_size; + unsigned int actual_size, patch_size; u16 equiv_cpu_id; /* size of the current patch we're staring at */ - *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; + patch_size = *(u32 *)(ucode_ptr + 4); + *current_size = patch_size + SECTION_HDR_SIZE; equiv_cpu_id = find_equiv_id(); if (!equiv_cpu_id) @@ -174,7 +175,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, /* * now that the header looks sane, verify its size */ - actual_size = verify_ucode_size(cpu, *current_size, leftover_size); + actual_size = verify_ucode_size(cpu, patch_size, leftover_size); if (!actual_size) return 0; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa27218..5cdff035774 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -21,7 +21,7 @@ #include <asm/cpu.h> #include <asm/stackprotector.h> -DEFINE_PER_CPU(int, cpu_number); +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c1a310fb830..7c5a8c314c0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -106,17 +106,17 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); -DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |