summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_iommu.c6
-rw-r--r--arch/x86/kernel/amd_iommu_init.c163
-rw-r--r--arch/x86/kernel/aperture_64.c18
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c18
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c292
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c298
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c40
-rw-r--r--arch/x86/kernel/irq.c8
-rw-r--r--arch/x86/kernel/irq_work.c30
-rw-r--r--arch/x86/kernel/irqinit.c6
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/module.c6
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/tsc.c8
32 files changed, 811 insertions, 356 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 25dc82d3e76..81f37703379 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+CFLAGS_REMOVE_pvclock.o = -pg
+CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
@@ -32,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
-obj-y += setup.o x86_init.o i8259.o irqinit.o
+obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb7a5f052e2..fb16f17e59b 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -61,7 +61,7 @@ struct cstate_entry {
unsigned int ecx;
} states[ACPI_PROCESSOR_MAX_POWER];
};
-static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */
+static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c..a36bb90aef5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
-static void *text_poke_early(void *addr, const void *opcode, size_t len);
+void *text_poke_early(void *addr, const void *opcode, size_t len);
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
-static void *__init_or_module text_poke_early(void *addr, const void *opcode,
+void *__init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
{
unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
tpp.len = len;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
- stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ /* Use __stop_machine() because the caller already got online_cpus. */
+ __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
return addr;
}
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+
+unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+
+void __init arch_init_ideal_nop5(void)
+{
+ extern const unsigned char ftrace_test_p6nop[];
+ extern const unsigned char ftrace_test_nop5[];
+ extern const unsigned char ftrace_test_jmp[];
+ int faulted = 0;
+
+ /*
+ * There is no good nop for all x86 archs.
+ * We will default to using the P6_NOP5, but first we
+ * will test to make sure that the nop will actually
+ * work on this CPU. If it faults, we will then
+ * go to a lesser efficient 5 byte nop. If that fails
+ * we then just use a jmp as our nop. This isn't the most
+ * efficient nop, but we can not use a multi part nop
+ * since we would then risk being preempted in the middle
+ * of that nop, and if we enabled tracing then, it might
+ * cause a system crash.
+ *
+ * TODO: check the cpuid to determine the best nop.
+ */
+ asm volatile (
+ "ftrace_test_jmp:"
+ "jmp ftrace_test_p6nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n" /* 2 byte jmp + 3 bytes */
+ "ftrace_test_p6nop:"
+ P6_NOP5
+ "jmp 1f\n"
+ "ftrace_test_nop5:"
+ ".byte 0x66,0x66,0x66,0x66,0x90\n"
+ "1:"
+ ".section .fixup, \"ax\"\n"
+ "2: movl $1, %0\n"
+ " jmp ftrace_test_nop5\n"
+ "3: movl $2, %0\n"
+ " jmp 1b\n"
+ ".previous\n"
+ _ASM_EXTABLE(ftrace_test_p6nop, 2b)
+ _ASM_EXTABLE(ftrace_test_nop5, 3b)
+ : "=r"(faulted) : "0" (faulted));
+
+ switch (faulted) {
+ case 0:
+ pr_info("converting mcount calls to 0f 1f 44 00 00\n");
+ memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
+ break;
+ case 1:
+ pr_info("converting mcount calls to 66 66 66 66 90\n");
+ memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
+ break;
+ case 2:
+ pr_info("converting mcount calls to jmp . + 5\n");
+ memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
+ break;
+ }
+
+}
+#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa044e1e30a..d2fdb0826df 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
@@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
size_t size,
int dir)
{
+ dma_addr_t flush_addr;
dma_addr_t i, start;
unsigned int pages;
@@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
(dma_addr + size > dma_dom->aperture_size))
return;
+ flush_addr = dma_addr;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
- iommu_flush_pages(&dma_dom->domain, dma_addr, size);
+ iommu_flush_pages(&dma_dom->domain, flush_addr, size);
dma_dom->need_flush = false;
}
}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cc63e2b8dd..3cb482e123d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
* Author: Joerg Roedel <joerg.roedel@amd.com>
* Leo Duran <leo.duran@amd.com>
*
@@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size)
return 1UL << shift;
}
+/* Access to l1 and l2 indexed register spaces */
+
+static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
+{
+ u32 val;
+
+ pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
+ pci_read_config_dword(iommu->dev, 0xfc, &val);
+ return val;
+}
+
+static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
+{
+ pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
+ pci_write_config_dword(iommu->dev, 0xfc, val);
+ pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
+}
+
+static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
+{
+ u32 val;
+
+ pci_write_config_dword(iommu->dev, 0xf0, address);
+ pci_read_config_dword(iommu->dev, 0xf4, &val);
+ return val;
+}
+
+static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
+{
+ pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
+ pci_write_config_dword(iommu->dev, 0xf4, val);
+}
+
/****************************************************************************
*
* AMD IOMMU MMIO register space handling functions
@@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
u32 range, misc;
+ int i, j;
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
@@ -632,6 +666,30 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->last_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_LD(range));
iommu->evt_msi_num = MMIO_MSI_NUM(misc);
+
+ if (!is_rd890_iommu(iommu->dev))
+ return;
+
+ /*
+ * Some rd890 systems may not be fully reconfigured by the BIOS, so
+ * it's necessary for us to store this information so it can be
+ * reprogrammed on resume
+ */
+
+ pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
+ &iommu->stored_addr_lo);
+ pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
+ &iommu->stored_addr_hi);
+
+ /* Low bit locks writes to configuration space */
+ iommu->stored_addr_lo &= ~1;
+
+ for (i = 0; i < 6; i++)
+ for (j = 0; j < 0x12; j++)
+ iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
+
+ for (i = 0; i < 0x83; i++)
+ iommu->stored_l2[i] = iommu_read_l2(iommu, i);
}
/*
@@ -649,29 +707,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
struct ivhd_entry *e;
/*
- * First set the recommended feature enable bits from ACPI
- * into the IOMMU control registers
- */
- h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
- iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
-
- h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
-
- h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
-
- h->flags & IVHD_FLAG_ISOC_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
- iommu_feature_disable(iommu, CONTROL_ISOC_EN);
-
- /*
- * make IOMMU memory accesses cache coherent
+ * First save the recommended feature enable bits from ACPI
*/
- iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+ iommu->acpi_flags = h->flags;
/*
* Done. Now parse the device entries
@@ -1116,6 +1154,79 @@ static void init_device_table(void)
}
}
+static void iommu_init_flags(struct amd_iommu *iommu)
+{
+ iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
+ iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
+ iommu_feature_disable(iommu, CONTROL_ISOC_EN);
+
+ /*
+ * make IOMMU memory accesses cache coherent
+ */
+ iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+}
+
+static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
+{
+ int i, j;
+ u32 ioc_feature_control;
+ struct pci_dev *pdev = NULL;
+
+ /* RD890 BIOSes may not have completely reconfigured the iommu */
+ if (!is_rd890_iommu(iommu->dev))
+ return;
+
+ /*
+ * First, we need to ensure that the iommu is enabled. This is
+ * controlled by a register in the northbridge
+ */
+ pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
+
+ if (!pdev)
+ return;
+
+ /* Select Northbridge indirect register 0x75 and enable writing */
+ pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
+ pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
+
+ /* Enable the iommu */
+ if (!(ioc_feature_control & 0x1))
+ pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
+
+ pci_dev_put(pdev);
+
+ /* Restore the iommu BAR */
+ pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
+ iommu->stored_addr_lo);
+ pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
+ iommu->stored_addr_hi);
+
+ /* Restore the l1 indirect regs for each of the 6 l1s */
+ for (i = 0; i < 6; i++)
+ for (j = 0; j < 0x12; j++)
+ iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
+
+ /* Restore the l2 indirect regs */
+ for (i = 0; i < 0x83; i++)
+ iommu_write_l2(iommu, i, iommu->stored_l2[i]);
+
+ /* Lock PCI setup registers */
+ pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
+ iommu->stored_addr_lo | 1);
+}
+
/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
@@ -1126,6 +1237,7 @@ static void enable_iommus(void)
for_each_iommu(iommu) {
iommu_disable(iommu);
+ iommu_init_flags(iommu);
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
@@ -1150,6 +1262,11 @@ static void disable_iommus(void)
static int amd_iommu_resume(struct sys_device *dev)
{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu)
+ iommu_apply_resume_quirks(iommu);
+
/* re-load the hardware */
enable_iommus();
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index e91e042a5c8..377f5db3b8b 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void)
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
- aper_enabled = ctl & AMD64_GARTEN;
+ aper_enabled = ctl & GARTEN;
aper_order = (ctl >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
@@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void)
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
- ctl &= ~AMD64_GARTEN;
+ ctl &= ~GARTEN;
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
}
}
@@ -505,8 +505,13 @@ out:
/* Fix up the north bridges */
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
- int bus;
- int dev_base, dev_limit;
+ int bus, dev_base, dev_limit;
+
+ /*
+ * Don't enable translation yet but enable GART IO and CPU
+ * accesses and set DISTLBWALKPRB since GART table memory is UC.
+ */
+ u32 ctl = DISTLBWALKPRB | aper_order << 1;
bus = bus_dev_ranges[i].bus;
dev_base = bus_dev_ranges[i].dev_base;
@@ -515,10 +520,7 @@ out:
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
- /* Don't enable translation yet. That is done later.
- Assume this BIOS didn't initialise the GART so
- just overwrite all previous bits */
- write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
+ write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
}
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f1efebaf551..5c5b8f3dddb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
old_cfg = old_desc->chip_data;
- memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ cfg->vector = old_cfg->vector;
+ cfg->move_in_progress = old_cfg->move_in_progress;
+ cpumask_copy(cfg->domain, old_cfg->domain);
+ cpumask_copy(cfg->old_domain, old_cfg->old_domain);
init_copy_irq_2_pin(old_cfg, cfg, node);
}
-static void free_irq_cfg(struct irq_cfg *old_cfg)
+static void free_irq_cfg(struct irq_cfg *cfg)
{
- kfree(old_cfg);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 490dac63c2d..f2f9ac7da25 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
+void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
u32 ebx;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3624e8a0f71..f668bb1f7d4 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void get_cpu_cap(struct cpuinfo_x86 *c);
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 994230d4dc4..4f6f679f279 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
return -ENODEV;
out_obj = output.pointer;
- if (out_obj->type != ACPI_TYPE_BUFFER)
- return -ENODEV;
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
- if (errors)
- return -ENODEV;
+ if (errors) {
+ ret = -ENODEV;
+ goto out_free;
+ }
supported = *((u32 *)(out_obj->buffer.pointer + 4));
- if (!(supported & 0x1))
- return -ENODEV;
+ if (!(supported & 0x1)) {
+ ret = -ENODEV;
+ goto out_free;
+ }
out_free:
kfree(output.pointer);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 85f69cdeae1..b4389441efb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
c->cpuid_level = cpuid_eax(0);
+ get_cpu_cap(c);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5e975298fa8..39aaee5c1ab 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
address = (low & MASK_BLKPTR_LO) >> 21;
if (!address)
break;
+
address += MCG_XBLK_ADDR;
} else
++address;
@@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (rdmsr_safe(address, &low, &high))
break;
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
+ if (!(high & MASK_VALID_HI))
+ continue;
if (!(high & MASK_CNTP_HI) ||
(high & MASK_LOCKED_HI))
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d9368eeda30..169d8804a9f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -216,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_core_power_limit_count.attr,
thermal_attr_group.name);
- if (cpu_has(c, X86_FEATURE_PTS))
+ if (cpu_has(c, X86_FEATURE_PTS)) {
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_package_throttle_count.attr,
thermal_attr_group.name);
@@ -224,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_package_power_limit_count.attr,
thermal_attr_group.name);
+ }
return err;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3efdf2870a3..fe73c1844a9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -102,6 +102,7 @@ struct cpu_hw_events {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events;
@@ -530,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
/*
* Setup the hardware configuration for a given attr_type
*/
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
@@ -583,7 +584,7 @@ static void x86_pmu_disable_all(void)
}
}
-void hw_perf_disable(void)
+static void x86_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -618,7 +619,7 @@ static void x86_pmu_enable_all(int added)
}
}
-static const struct pmu pmu;
+static struct pmu pmu;
static inline int is_x86_event(struct perf_event *event)
{
@@ -800,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
hwc->last_tag == cpuc->tags[i];
}
-static int x86_pmu_start(struct perf_event *event);
-static void x86_pmu_stop(struct perf_event *event);
+static void x86_pmu_start(struct perf_event *event, int flags);
+static void x86_pmu_stop(struct perf_event *event, int flags);
-void hw_perf_enable(void)
+static void x86_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct perf_event *event;
@@ -839,7 +840,14 @@ void hw_perf_enable(void)
match_prev_assignment(hwc, cpuc, i))
continue;
- x86_pmu_stop(event);
+ /*
+ * Ensure we don't accidentally enable a stopped
+ * counter simply because we rescheduled.
+ */
+ if (hwc->state & PERF_HES_STOPPED)
+ hwc->state |= PERF_HES_ARCH;
+
+ x86_pmu_stop(event, PERF_EF_UPDATE);
}
for (i = 0; i < cpuc->n_events; i++) {
@@ -851,7 +859,10 @@ void hw_perf_enable(void)
else if (i < n_running)
continue;
- x86_pmu_start(event);
+ if (hwc->state & PERF_HES_ARCH)
+ continue;
+
+ x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
perf_events_lapic_init();
@@ -952,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
}
/*
- * activate a single event
+ * Add a single event to the PMU.
*
* The event is added to the group of enabled events
* but only if it can be scehduled with existing events.
- *
- * Called with PMU disabled. If successful and return value 1,
- * then guaranteed to call perf_enable() and hw_perf_enable()
*/
-static int x86_pmu_enable(struct perf_event *event)
+static int x86_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc;
@@ -969,57 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
hwc = &event->hw;
+ perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
- n = collect_events(cpuc, event, false);
- if (n < 0)
- return n;
+ ret = n = collect_events(cpuc, event, false);
+ if (ret < 0)
+ goto out;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
- * at commit time(->commit_txn) as a whole
+ * at commit time (->commit_txn) as a whole
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
- goto out;
+ goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
- return ret;
+ goto out;
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
-out:
+done_collect:
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
- return 0;
+ ret = 0;
+out:
+ perf_pmu_enable(event->pmu);
+ return ret;
}
-static int x86_pmu_start(struct perf_event *event)
+static void x86_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx = event->hw.idx;
- if (idx == -1)
- return -EAGAIN;
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD) {
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+ x86_perf_event_set_period(event);
+ }
+
+ event->hw.state = 0;
- x86_perf_event_set_period(event);
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
+ __set_bit(idx, cpuc->running);
x86_pmu.enable(event);
perf_event_update_userpage(event);
-
- return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_event *event)
-{
- int ret = x86_pmu_start(event);
- WARN_ON_ONCE(ret);
}
void perf_event_print_debug(void)
@@ -1076,27 +1094,29 @@ void perf_event_print_debug(void)
local_irq_restore(flags);
}
-static void x86_pmu_stop(struct perf_event *event)
+static void x86_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
- if (!__test_and_clear_bit(idx, cpuc->active_mask))
- return;
-
- x86_pmu.disable(event);
-
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- x86_perf_event_update(event);
+ if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+ x86_pmu.disable(event);
+ cpuc->events[hwc->idx] = NULL;
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
- cpuc->events[idx] = NULL;
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ x86_perf_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
}
-static void x86_pmu_disable(struct perf_event *event)
+static void x86_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
@@ -1109,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
if (cpuc->group_flag & PERF_EVENT_TXN)
return;
- x86_pmu_stop(event);
+ x86_pmu_stop(event, PERF_EF_UPDATE);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i]) {
@@ -1132,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct perf_event *event;
- struct hw_perf_event *hwc;
int idx, handled = 0;
u64 val;
@@ -1141,11 +1160,18 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /*
+ * Though we deactivated the counter some cpus
+ * might still deliver spurious interrupts still
+ * in flight. Catch them:
+ */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
continue;
+ }
event = cpuc->events[idx];
- hwc = &event->hw;
val = x86_perf_event_update(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1161,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
if (perf_event_overflow(event, 1, &data, regs))
- x86_pmu_stop(event);
+ x86_pmu_stop(event, 0);
}
if (handled)
@@ -1170,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
return handled;
}
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
- irq_enter();
- ack_APIC_irq();
- inc_irq_stat(apic_pending_irqs);
- perf_event_do_pending();
- irq_exit();
-}
-
-void set_perf_event_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- if (!x86_pmu.apic || !x86_pmu_initialized())
- return;
-
- apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
void perf_events_lapic_init(void)
{
if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1378,7 +1385,6 @@ void __init init_hw_perf_events(void)
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
}
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
- perf_max_events = x86_pmu.num_counters;
if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1414,6 +1420,7 @@ void __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
+ perf_pmu_register(&pmu);
perf_cpu_notifier(x86_pmu_notifier);
}
@@ -1427,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ perf_pmu_disable(pmu);
cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
@@ -1440,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1450,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
*/
cpuc->n_added -= cpuc->n_txn;
cpuc->n_events -= cpuc->n_txn;
+ perf_pmu_enable(pmu);
}
/*
@@ -1457,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
* Perform the group schedulability test as a whole
* Return 0 if success
*/
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int assign[X86_PMC_IDX_MAX];
@@ -1479,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
memcpy(cpuc->assign, assign, n*sizeof(int));
cpuc->group_flag &= ~PERF_EVENT_TXN;
-
+ perf_pmu_enable(pmu);
return 0;
}
-static const struct pmu pmu = {
- .enable = x86_pmu_enable,
- .disable = x86_pmu_disable,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
- .unthrottle = x86_pmu_unthrottle,
- .start_txn = x86_pmu_start_txn,
- .cancel_txn = x86_pmu_cancel_txn,
- .commit_txn = x86_pmu_commit_txn,
-};
-
/*
* validate that we can schedule this event
*/
@@ -1569,12 +1566,22 @@ out:
return ret;
}
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
{
- const struct pmu *tmp;
+ struct pmu *tmp;
int err;
- err = __hw_perf_event_init(event);
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ err = __x86_pmu_event_init(event);
if (!err) {
/*
* we temporarily connect event to its pmu
@@ -1594,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
if (err) {
if (event->destroy)
event->destroy(event);
- return ERR_PTR(err);
}
- return &pmu;
+ return err;
}
-/*
- * callchain support
- */
+static struct pmu pmu = {
+ .pmu_enable = x86_pmu_enable,
+ .pmu_disable = x86_pmu_disable,
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
+ .event_init = x86_pmu_event_init,
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+ .add = x86_pmu_add,
+ .del = x86_pmu_del,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
+ .start_txn = x86_pmu_start_txn,
+ .cancel_txn = x86_pmu_cancel_txn,
+ .commit_txn = x86_pmu_commit_txn,
+};
+
+/*
+ * callchain support
+ */
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1635,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry *entry = data;
- callchain_store(entry, addr);
+ perf_callchain_store(entry, addr);
}
static const struct stacktrace_ops backtrace_ops = {
@@ -1646,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
- callchain_store(entry, PERF_CONTEXT_KERNEL);
- callchain_store(entry, regs->ip);
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
+ perf_callchain_store(entry, regs->ip);
dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
}
@@ -1679,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (fp < compat_ptr(regs->sp))
break;
- callchain_store(entry, frame.return_address);
+ perf_callchain_store(entry, frame.return_address);
fp = compat_ptr(frame.next_frame);
}
return 1;
@@ -1692,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
}
#endif
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stack_frame frame;
const void __user *fp;
- if (!user_mode(regs))
- regs = task_pt_regs(current);
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
fp = (void __user *)regs->bp;
- callchain_store(entry, PERF_CONTEXT_USER);
- callchain_store(entry, regs->ip);
+ perf_callchain_store(entry, regs->ip);
if (perf_callchain_user32(regs, entry))
return;
@@ -1721,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
if ((unsigned long)fp < regs->sp)
break;
- callchain_store(entry, frame.return_address);
+ perf_callchain_store(entry, frame.return_address);
fp = frame.next_frame;
}
}
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
- int is_user;
-
- if (!regs)
- return;
-
- is_user = user_mode(regs);
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- if (!is_user)
- perf_callchain_kernel(regs, entry);
-
- if (current->mm)
- perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
- struct perf_callchain_entry *entry;
-
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- /* TODO: We don't support guest os callchain now */
- return NULL;
- }
-
- if (in_nmi())
- entry = &__get_cpu_var(pmc_nmi_entry);
- else
- entry = &__get_cpu_var(pmc_irq_entry);
-
- entry->nr = 0;
-
- perf_do_callchain(regs, entry);
-
- return entry;
-}
-
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3..46d58448c3a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
- [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
+ [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
- [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
+ [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d..c8f5c088cad 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
int bit, loops;
u64 status;
- int handled = 0;
+ int handled;
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
intel_pmu_disable_all();
- intel_pmu_drain_bts_buffer();
+ handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
if (!status) {
intel_pmu_enable_all(0);
- return 0;
+ return handled;
}
loops = 0;
@@ -763,7 +763,7 @@ again:
data.period = event->hw.last_period;
if (perf_event_overflow(event, 1, &data, regs))
- x86_pmu_stop(event);
+ x86_pmu_stop(event, 0);
}
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311c..4977f9c400e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
update_debugctlmsr(debugctlmsr);
}
-static void intel_pmu_drain_bts_buffer(void)
+static int intel_pmu_drain_bts_buffer(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
struct pt_regs regs;
if (!event)
- return;
+ return 0;
if (!ds)
- return;
+ return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;
if (top <= at)
- return;
+ return 0;
ds->bts_index = ds->bts_buffer_base;
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
perf_prepare_sample(&header, &data, event, &regs);
if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
- return;
+ return 1;
for (; at < top; at++) {
data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
/* There's new data available. */
event->hw.interrupts++;
event->pending_kill = POLL_IN;
+ return 1;
}
/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.flags &= ~PERF_EFLAGS_EXACT;
if (perf_event_overflow(event, 1, &data, &regs))
- x86_pmu_stop(event);
+ x86_pmu_stop(event, 0);
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index b560db3305b..81400b93e69 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
struct p4_event_bind {
unsigned int opcode; /* Event code and ESCR selector */
unsigned int escr_msr[2]; /* ESCR MSR for this event */
+ unsigned int escr_emask; /* valid ESCR EventMask bits */
+ unsigned int shared; /* event is shared across threads */
char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
};
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
[P4_EVENT_TC_DELIVER_MODE] = {
.opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+ .shared = 1,
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_BPU_FETCH_REQUEST] = {
.opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
.escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_ITLB_REFERENCE] = {
.opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_MEMORY_CANCEL] = {
.opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_MEMORY_COMPLETE] = {
.opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_LOAD_PORT_REPLAY] = {
.opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
.escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_STORE_PORT_REPLAY] = {
.opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_MOB_LOAD_REPLAY] = {
.opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
.escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_PAGE_WALK_TYPE] = {
.opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
.escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+ .shared = 1,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_BSQ_CACHE_REFERENCE] = {
.opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_IOQ_ALLOCATION] = {
.opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
.opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
.escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
.cntr = { {2, -1, -1}, {3, -1, -1} },
},
[P4_EVENT_FSB_DATA_ACTIVITY] = {
.opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+ .shared = 1,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
.opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
.cntr = { {0, -1, -1}, {1, -1, -1} },
},
[P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
.opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
.escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
.cntr = { {2, -1, -1}, {3, -1, -1} },
},
[P4_EVENT_SSE_INPUT_ASSIST] = {
.opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_PACKED_SP_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_PACKED_DP_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_SCALAR_SP_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_SCALAR_DP_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_64BIT_MMX_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_128BIT_MMX_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_X87_FP_UOP] = {
.opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_TC_MISC] = {
.opcode = P4_OPCODE(P4_EVENT_TC_MISC),
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_GLOBAL_POWER_EVENTS] = {
.opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_TC_MS_XFER] = {
.opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_UOP_QUEUE_WRITES] = {
.opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
.opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_RETIRED_BRANCH_TYPE] = {
.opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
.cntr = { {4, 5, -1}, {6, 7, -1} },
},
[P4_EVENT_RESOURCE_STALL] = {
.opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
.escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_WC_BUFFER] = {
.opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+ .shared = 1,
.cntr = { {8, 9, -1}, {10, 11, -1} },
},
[P4_EVENT_B2B_CYCLES] = {
.opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_BNR] = {
.opcode = P4_OPCODE(P4_EVENT_BNR),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_SNOOP] = {
.opcode = P4_OPCODE(P4_EVENT_SNOOP),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_RESPONSE] = {
.opcode = P4_OPCODE(P4_EVENT_RESPONSE),
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+ .escr_emask = 0,
.cntr = { {0, -1, -1}, {2, -1, -1} },
},
[P4_EVENT_FRONT_END_EVENT] = {
.opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_EXECUTION_EVENT] = {
.opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_REPLAY_EVENT] = {
.opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_INSTR_RETIRED] = {
.opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_UOPS_RETIRED] = {
.opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_UOP_TYPE] = {
.opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_BRANCH_RETIRED] = {
.opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_X87_ASSIST] = {
.opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_MACHINE_CLEAR] = {
.opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
[P4_EVENT_INSTR_COMPLETED] = {
.opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+ .escr_emask =
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
.cntr = { {12, 13, 16}, {14, 15, 17} },
},
};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
return config;
}
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+ /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+ if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+ if (boot_cpu_data.x86_model != 3 &&
+ boot_cpu_data.x86_model != 4 &&
+ boot_cpu_data.x86_model != 6)
+ return false;
+ }
+
+ /*
+ * For info
+ * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+ */
+
+ return true;
+}
+
static int p4_validate_raw_event(struct perf_event *event)
{
- unsigned int v;
+ unsigned int v, emask;
- /* user data may have out-of-bound event index */
+ /* User data may have out-of-bound event index */
v = p4_config_unpack_event(event->attr.config);
- if (v >= ARRAY_SIZE(p4_event_bind_map)) {
- pr_warning("P4 PMU: Unknown event code: %d\n", v);
+ if (v >= ARRAY_SIZE(p4_event_bind_map))
+ return -EINVAL;
+
+ /* It may be unsupported: */
+ if (!p4_event_match_cpu_model(v))
return -EINVAL;
+
+ /*
+ * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+ * in Architectural Performance Monitoring, it means not
+ * on _which_ logical cpu to count but rather _when_, ie it
+ * depends on logical cpu state -- count event if one cpu active,
+ * none, both or any, so we just allow user to pass any value
+ * desired.
+ *
+ * In turn we always set Tx_OS/Tx_USR bits bound to logical
+ * cpu without their propagation to another cpu
+ */
+
+ /*
+ * if an event is shared accross the logical threads
+ * the user needs special permissions to be able to use it
+ */
+ if (p4_event_bind_map[v].shared) {
+ if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+ return -EACCES;
}
+ /* ESCR EventMask bits may be invalid */
+ emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+ if (emask & ~p4_event_bind_map[v].escr_emask)
+ return -EINVAL;
+
/*
- * it may have some screwed PEBS bits
+ * it may have some invalid PEBS bits
*/
- if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
- pr_warning("P4 PMU: PEBS are not supported yet\n");
+ if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
return -EINVAL;
- }
+
v = p4_config_unpack_metric(event->attr.config);
- if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
- pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+ if (v >= ARRAY_SIZE(p4_pebs_bind_map))
return -EINVAL;
- }
return 0;
}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW) {
+ /*
+ * Clear bits we reserve to be managed by kernel itself
+ * and never allowed from a user space
+ */
+ event->attr.config &= P4_CONFIG_MASK;
+
rc = p4_validate_raw_event(event);
if (rc)
goto out;
/*
- * We don't control raw events so it's up to the caller
- * to pass sane values (and we don't count the thread number
- * on HT machine but allow HT-compatible specifics to be
- * passed on)
- *
* Note that for RAW events we allow user to use P4_CCCR_RESERVED
* bits since we keep additional info here (for cache events and etc)
- *
- * XXX: HT wide things should check perf_paranoid_cpu() &&
- * CAP_SYS_ADMIN
*/
- event->hw.config |= event->attr.config &
- (p4_config_pack_escr(P4_ESCR_MASK_HT) |
- p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
-
- event->hw.config &= ~P4_CCCR_FORCE_OVF;
+ event->hw.config |= event->attr.config;
}
rc = x86_setup_perfctr(event);
@@ -660,8 +904,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
int overflow;
- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /* catch in-flight IRQs */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
continue;
+ }
event = cpuc->events[idx];
hwc = &event->hw;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 2c77931473f..c7f64e6f537 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
+ { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbb..c375c79065f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
- perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+ irq_work_interrupt smp_irq_work_interrupt
#endif
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54e..3afb33f14d2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
return mod_code_status;
}
-
-
-
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
-
static unsigned char *ftrace_nop_replace(void)
{
- return ftrace_nop;
+ return ideal_nop5;
}
static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int __init ftrace_dyn_arch_init(void *data)
{
- extern const unsigned char ftrace_test_p6nop[];
- extern const unsigned char ftrace_test_nop5[];
- extern const unsigned char ftrace_test_jmp[];
- int faulted = 0;
-
- /*
- * There is no good nop for all x86 archs.
- * We will default to using the P6_NOP5, but first we
- * will test to make sure that the nop will actually
- * work on this CPU. If it faults, we will then
- * go to a lesser efficient 5 byte nop. If that fails
- * we then just use a jmp as our nop. This isn't the most
- * efficient nop, but we can not use a multi part nop
- * since we would then risk being preempted in the middle
- * of that nop, and if we enabled tracing then, it might
- * cause a system crash.
- *
- * TODO: check the cpuid to determine the best nop.
- */
- asm volatile (
- "ftrace_test_jmp:"
- "jmp ftrace_test_p6nop\n"
- "nop\n"
- "nop\n"
- "nop\n" /* 2 byte jmp + 3 bytes */
- "ftrace_test_p6nop:"
- P6_NOP5
- "jmp 1f\n"
- "ftrace_test_nop5:"
- ".byte 0x66,0x66,0x66,0x66,0x90\n"
- "1:"
- ".section .fixup, \"ax\"\n"
- "2: movl $1, %0\n"
- " jmp ftrace_test_nop5\n"
- "3: movl $2, %0\n"
- " jmp 1b\n"
- ".previous\n"
- _ASM_EXTABLE(ftrace_test_p6nop, 2b)
- _ASM_EXTABLE(ftrace_test_nop5, 3b)
- : "=r"(faulted) : "0" (faulted));
-
- switch (faulted) {
- case 0:
- pr_info("converting mcount calls to 0f 1f 44 00 00\n");
- memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
- break;
- case 1:
- pr_info("converting mcount calls to 66 66 66 66 90\n");
- memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
- break;
- case 2:
- pr_info("converting mcount calls to jmp . + 5\n");
- memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
- break;
- }
-
/* The return code is retured via data */
*(unsigned long *)data = 0;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 410fdb3f193..7494999141b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -506,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
{
unsigned int irq;
- irq = create_irq();
+ irq = create_irq_nr(0, -1);
if (!irq)
return -EINVAL;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a474ec37c32..ff15c9dcc25 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
- /* Len */
- switch (x86_len) {
- case X86_BREAKPOINT_LEN_X:
+ /* Type */
+ switch (x86_type) {
+ case X86_BREAKPOINT_EXECUTE:
+ if (x86_len != X86_BREAKPOINT_LEN_X)
+ return -EINVAL;
+
+ *gen_type = HW_BREAKPOINT_X;
*gen_len = sizeof(long);
+ return 0;
+ case X86_BREAKPOINT_WRITE:
+ *gen_type = HW_BREAKPOINT_W;
break;
+ case X86_BREAKPOINT_RW:
+ *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Len */
+ switch (x86_len) {
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
return -EINVAL;
}
- /* Type */
- switch (x86_type) {
- case X86_BREAKPOINT_EXECUTE:
- *gen_type = HW_BREAKPOINT_X;
- break;
- case X86_BREAKPOINT_WRITE:
- *gen_type = HW_BREAKPOINT_W;
- break;
- case X86_BREAKPOINT_RW:
- *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
@@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
- case X86_BREAKPOINT_LEN_X:
- align = sizeof(long) -1;
- break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18..44edb03fc9e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "PND");
+ seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
- seq_printf(p, " Performance pending work\n");
+ seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+ seq_printf(p, " IRQ work interrupts\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
- sum += irq_stats(cpu)->apic_pending_irqs;
+ sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 00000000000..ca8f703a1e7
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
+/*
+ * x86 specific code for irq_work
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/apic.h>
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_enter();
+ ack_APIC_irq();
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_work_run();
+ irq_exit();
+}
+
+void arch_irq_work_raise(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!cpu_has_apic)
+ return;
+
+ apic->send_IPI_self(IRQ_WORK_VECTOR);
+ apic_wait_icr_idle();
+#endif
+}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc57..713969b9266 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
- /* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_EVENTS
- alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+ /* IRQ work interrupts: */
+# ifdef CONFIG_IRQ_WORK
+ alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif
#endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 00000000000..961b6b30ba9
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
+/*
+ * jump label x86 support
+ *
+ * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ *
+ */
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/cpu.h>
+#include <asm/kprobes.h>
+#include <asm/alternative.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+union jump_code_union {
+ char code[JUMP_LABEL_NOP_SIZE];
+ struct {
+ char jump;
+ int offset;
+ } __attribute__((packed));
+};
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ union jump_code_union code;
+
+ if (type == JUMP_LABEL_ENABLE) {
+ code.jump = 0xe9;
+ code.offset = entry->target -
+ (entry->code + JUMP_LABEL_NOP_SIZE);
+ } else
+ memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+ get_online_cpus();
+ mutex_lock(&text_mutex);
+ text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+ mutex_unlock(&text_mutex);
+ put_online_cpus();
+}
+
+void arch_jump_label_text_poke_early(jump_label_t addr)
+{
+ text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+}
+
+#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e..1cbd54c0df9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
return 0;
}
-/* Dummy buffers for kallsyms_lookup */
-static char __dummy_buf[KSYM_NAME_LEN];
-
/* Check if paddr is at an instruction boundary */
static int __kprobes can_probe(unsigned long paddr)
{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+ if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
return 0;
/* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
*(unsigned long *)addr = val;
}
-void __kprobes kprobes_optinsn_template_holder(void)
+static void __used __kprobes kprobes_optinsn_template_holder(void)
{
asm volatile (
".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
}
/* Check whether the address range is reserved */
if (ftrace_text_reserved(src, src + len - 1) ||
- alternatives_text_reserved(src, src + len - 1))
+ alternatives_text_reserved(src, src + len - 1) ||
+ jump_label_text_reserved(src, src + len - 1))
return -EBUSY;
return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
unsigned long addr, size = 0, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- /* Dummy buffers for lookup_symbol_attrs */
- static char __dummy_buf[KSYM_NAME_LEN];
/* Lookup symbol including addr */
- if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+ if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
return 0;
/* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e0bc186d750..8f295609173 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,11 +239,13 @@ int module_finalize(const Elf_Ehdr *hdr,
apply_paravirt(pseg, pseg + para->sh_size);
}
- return module_bug_finalize(hdr, sechdrs, me);
+ /* make jump label nops */
+ jump_label_apply_nops(me);
+
+ return 0;
}
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
- module_bug_cleanup(mod);
}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 67e5665ce42..c562207b1b3 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -607,7 +607,7 @@ static void gart_fixup_northbridges(struct sys_device *dev)
* Don't enable translations just yet. That is the next
* step. Restore the pre-suspend aperture settings.
*/
- pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1);
+ gart_set_size_and_enable(dev, aperture_order);
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
}
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 77eea0362a4..df770ad99b3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
+#include <asm/alternative.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
{
int acpi = 0;
int k8 = 0;
+ unsigned long flags;
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.banner();
mcheck_init();
+
+ local_irq_save(flags);
+ arch_init_ideal_nop5();
+ local_irq_restore(flags);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4496315eb22..0c40d8b7241 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
+static int no_sched_irq_time;
+
static int __init tsc_setup(char *str)
{
if (!strcmp(str, "reliable"))
tsc_clocksource_reliable = 1;
+ if (!strncmp(str, "noirqtime", 9))
+ no_sched_irq_time = 1;
return 1;
}
@@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason)
if (!tsc_unstable) {
tsc_unstable = 1;
sched_clock_stable = 0;
+ disable_sched_clock_irqtime();
printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */
if (clocksource_tsc.mult)
@@ -929,6 +934,9 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
+ if (!no_sched_irq_time)
+ enable_sched_clock_irqtime();
+
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
lpj_fine = lpj;