summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/acpi/sleep.c24
-rw-r--r--arch/x86/kernel/amd_iommu.c54
-rw-r--r--arch/x86/kernel/amd_iommu_init.c12
-rw-r--r--arch/x86/kernel/aperture_64.c11
-rw-r--r--arch/x86/kernel/apic/Makefile2
-rw-r--r--arch/x86/kernel/apic/apic.c38
-rw-r--r--arch/x86/kernel/apic/apic_noop.c200
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c13
-rw-r--r--arch/x86/kernel/apic/es7000_32.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c417
-rw-r--r--arch/x86/kernel/apic/nmi.c19
-rw-r--r--arch/x86/kernel/apic/numaq_32.c18
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/summit_32.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c27
-rw-r--r--arch/x86/kernel/apm_32.c14
-rw-r--r--arch/x86/kernel/bios_uv.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c15
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c52
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c45
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c32
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c2
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c8
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c88
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c112
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c45
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c51
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.c238
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/cpuid.c17
-rw-r--r--arch/x86/kernel/ds.c4
-rw-r--r--arch/x86/kernel/dumpstack.c15
-rw-r--r--arch/x86/kernel/dumpstack_32.c9
-rw-r--r--arch/x86/kernel/dumpstack_64.c79
-rw-r--r--arch/x86/kernel/e820.c11
-rw-r--r--arch/x86/kernel/entry_32.S31
-rw-r--r--arch/x86/kernel/entry_64.S35
-rw-r--r--arch/x86/kernel/ftrace.c101
-rw-r--r--arch/x86/kernel/geode_32.c196
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S18
-rw-r--r--arch/x86/kernel/head_64.S7
-rw-r--r--arch/x86/kernel/hpet.c77
-rw-r--r--arch/x86/kernel/hw_breakpoint.c554
-rw-r--r--arch/x86/kernel/irq.c126
-rw-r--r--arch/x86/kernel/irq_32.c45
-rw-r--r--arch/x86/kernel/irq_64.c58
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kgdb.c21
-rw-r--r--arch/x86/kernel/kprobes.c261
-rw-r--r--arch/x86/kernel/machine_kexec_32.c8
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c410
-rw-r--r--arch/x86/kernel/microcode_amd.c97
-rw-r--r--arch/x86/kernel/microcode_core.c36
-rw-r--r--arch/x86/kernel/microcode_intel.c47
-rw-r--r--arch/x86/kernel/mpparse.c47
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/olpc.c4
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c6
-rw-r--r--arch/x86/kernel/pci-dma.c5
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/process_32.c24
-rw-r--r--arch/x86/kernel/process_64.c58
-rw-r--r--arch/x86/kernel/ptrace.c464
-rw-r--r--arch/x86/kernel/quirks.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c3
-rw-r--r--arch/x86/kernel/setup.c116
-rw-r--r--arch/x86/kernel/setup_percpu.c13
-rw-r--r--arch/x86/kernel/signal.c12
-rw-r--r--arch/x86/kernel/smpboot.c58
-rw-r--r--arch/x86/kernel/sys_i386_32.c27
-rw-r--r--arch/x86/kernel/sys_x86_64.c17
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline.c20
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kernel/tsc_sync.c23
-rw-r--r--arch/x86/kernel/uv_irq.c239
-rw-r--r--arch/x86/kernel/uv_time.c80
-rw-r--r--arch/x86/kernel/visws_quirks.c10
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S38
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c5
-rw-r--r--arch/x86/kernel/x86_init.c2
106 files changed, 3178 insertions, 2247 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd67..d87f09bc5a5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
@@ -89,7 +89,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 67e929b8987..fb1035cd9a6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -624,6 +624,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
}
hpet_address = hpet_tbl->address.address;
+ hpet_blockid = hpet_tbl->sequence;
/*
* Some broken BIOSes advertise HPET at 0x0. We really do not
@@ -1122,7 +1123,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
if (!acpi_sci_override_gsi)
acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0);
- /* Fill in identity legacy mapings where no override */
+ /* Fill in identity legacy mappings where no override */
mp_config_acpi_legacy_irqs();
count =
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ca93638ba43..82e508677b9 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
- header->pmode_efer_low = nx_enabled;
- if (header->pmode_efer_low & 1) {
- /* This is strange, why not save efer, always? */
- rdmsr(MSR_EFER, header->pmode_efer_low,
- header->pmode_efer_high);
- }
+ if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
+ &header->pmode_efer_high))
+ header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void)
/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
*
* We allocate a page from the first 1MB of memory for the wakeup
* routine for when we come back from a sleep state. The
* runtime allocator allows specification of <16MB pages, but not
* <1MB pages.
*/
-void __init acpi_reserve_bootmem(void)
+void __init acpi_reserve_wakeup_memory(void)
{
+ unsigned long mem;
+
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
printk(KERN_ERR
"ACPI: Wakeup code way too big, S3 disabled.\n");
return;
}
- acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
+ mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
- if (!acpi_realmode) {
+ if (mem == -1L) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
return;
}
-
- acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
+ acpi_realmode = (unsigned long) phys_to_virt(mem);
+ acpi_wakeup_address = mem;
+ reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 32fb09102a1..23824fef789 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -19,7 +19,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
@@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev)
{
kfree(dev->archdata.iommu);
}
+
+void __init amd_iommu_uninit_devices(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev) {
+
+ if (!check_device(&pdev->dev))
+ continue;
+
+ iommu_uninit_device(&pdev->dev);
+ }
+}
+
+int __init amd_iommu_init_devices(void)
+{
+ struct pci_dev *pdev = NULL;
+ int ret = 0;
+
+ for_each_pci_dev(pdev) {
+
+ if (!check_device(&pdev->dev))
+ continue;
+
+ ret = iommu_init_device(&pdev->dev);
+ if (ret)
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+
+ amd_iommu_uninit_devices();
+
+ return ret;
+}
#ifdef CONFIG_AMD_IOMMU_STATS
/*
@@ -1125,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
- iommu_area_free(range->bitmap, address, pages);
+ bitmap_clear(range->bitmap, address, pages);
}
@@ -1587,6 +1624,11 @@ static struct notifier_block device_nb = {
.notifier_call = device_change_notifier,
};
+void amd_iommu_init_notifier(void)
+{
+ bus_register_notifier(&pci_bus_type, &device_nb);
+}
+
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
@@ -1783,7 +1825,7 @@ retry:
goto out;
/*
- * aperture was sucessfully enlarged by 128 MB, try
+ * aperture was successfully enlarged by 128 MB, try
* allocation again
*/
goto retry;
@@ -2145,8 +2187,6 @@ static void prealloc_protection_domains(void)
if (!check_device(&dev->dev))
continue;
- iommu_init_device(&dev->dev);
-
/* Is there already any domain for it? */
if (domain_for_device(&dev->dev))
continue;
@@ -2215,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void)
register_iommu(&amd_iommu_ops);
- bus_register_notifier(&pci_bus_type, &device_nb);
-
amd_iommu_stats_init();
return 0;
@@ -2490,7 +2528,7 @@ int __init amd_iommu_init_passthrough(void)
struct pci_dev *dev = NULL;
u16 devid;
- /* allocate passthroug domain */
+ /* allocate passthrough domain */
pt_domain = protection_domain_alloc();
if (!pt_domain)
return -ENOMEM;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 7ffc3996523..1dca9c34eae 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void)
if (ret)
goto free;
+ ret = amd_iommu_init_devices();
+ if (ret)
+ goto free;
+
if (iommu_pass_through)
ret = amd_iommu_init_passthrough();
else
@@ -1281,6 +1285,8 @@ static int __init amd_iommu_init(void)
if (ret)
goto free;
+ amd_iommu_init_notifier();
+
enable_iommus();
if (iommu_pass_through)
@@ -1296,6 +1302,9 @@ out:
return ret;
free:
+
+ amd_iommu_uninit_devices();
+
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID/8));
@@ -1336,6 +1345,9 @@ void __init amd_iommu_detect(void)
iommu_detected = 1;
amd_iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
+
+ /* Make sure ACS will be enabled */
+ pci_request_acs();
}
}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index e0dfb6856aa..3704997e8b2 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -280,7 +280,8 @@ void __init early_gart_iommu_check(void)
* or BIOS forget to put that in reserved.
* try to update e820 to make that region as reserved.
*/
- int i, fix, slot;
+ u32 agp_aper_base = 0, agp_aper_order = 0;
+ int i, fix, slot, valid_agp = 0;
u32 ctl;
u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
u64 aper_base = 0, last_aper_base = 0;
@@ -290,6 +291,8 @@ void __init early_gart_iommu_check(void)
return;
/* This is mostly duplicate of iommu_hole_init */
+ agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
+
fix = 0;
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
@@ -342,10 +345,10 @@ void __init early_gart_iommu_check(void)
}
}
- if (!fix)
+ if (valid_agp)
return;
- /* different nodes have different setting, disable them all at first*/
+ /* disable them all at first */
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
int dev_base, dev_limit;
@@ -458,8 +461,6 @@ out:
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
- } else if (!valid_agp) {
- /* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
valid_agp ||
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index da7b7b9f8bd..565c1bfc507 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,7 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 894aa97f071..aa57c079c98 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -241,28 +241,13 @@ static int modern_apic(void)
}
/*
- * bare function to substitute write operation
- * and it's _that_ fast :)
- */
-static void native_apic_write_dummy(u32 reg, u32 v)
-{
- WARN_ON_ONCE((cpu_has_apic || !disable_apic));
-}
-
-static u32 native_apic_read_dummy(u32 reg)
-{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
- return 0;
-}
-
-/*
- * right after this call apic->write/read doesn't do anything
- * note that there is no restore operation it works one way
+ * right after this call apic become NOOP driven
+ * so apic->write/read doesn't do anything
*/
void apic_disable(void)
{
- apic->read = native_apic_read_dummy;
- apic->write = native_apic_write_dummy;
+ pr_info("APIC: switched to apic NOOP\n");
+ apic = &apic_noop;
}
void native_apic_wait_icr_idle(void)
@@ -459,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
- apic_write(APIC_TMICT, 0xffffffff);
+ apic_write(APIC_TMICT, 0);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -662,7 +647,7 @@ static int __init calibrate_APIC_clock(void)
calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
- apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+ apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
calibration_result);
@@ -1356,7 +1341,7 @@ void enable_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
- pr_info("Enabling x2apic\n");
+ printk_once(KERN_INFO "Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
@@ -1392,14 +1377,11 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
struct IO_APIC_route_entry **ioapic_entries = NULL;
int ret, x2apic_enabled = 0;
- int dmar_table_init_ret = 0;
+ int dmar_table_init_ret;
-#ifdef CONFIG_INTR_REMAP
dmar_table_init_ret = dmar_table_init();
- if (dmar_table_init_ret)
- pr_debug("dmar_table_init() failed with %d:\n",
- dmar_table_init_ret);
-#endif
+ if (dmar_table_init_ret && !x2apic_supported())
+ return;
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
new file mode 100644
index 00000000000..e31b9ffe25f
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -0,0 +1,200 @@
+/*
+ * NOOP APIC driver.
+ *
+ * Does almost nothing and should be substituted by a real apic driver via
+ * probe routine.
+ *
+ * Though in case if apic is disabled (for some reason) we try
+ * to not uglify the caller's code and allow to call (some) apic routines
+ * like self-ipi, etc...
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_allbutself(int vector) { }
+static void noop_send_IPI_all(int vector) { }
+static void noop_send_IPI_self(int vector) { }
+static void noop_apic_wait_icr_idle(void) { }
+static void noop_apic_icr_write(u32 low, u32 id) { }
+
+static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+{
+ return -1;
+}
+
+static u32 noop_safe_apic_wait_icr_idle(void)
+{
+ return 0;
+}
+
+static u64 noop_apic_icr_read(void)
+{
+ return 0;
+}
+
+static int noop_cpu_to_logical_apicid(int cpu)
+{
+ return 0;
+}
+
+static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+ return 0;
+}
+
+static unsigned int noop_get_apic_id(unsigned long x)
+{
+ return 0;
+}
+
+static int noop_probe(void)
+{
+ /*
+ * NOOP apic should not ever be
+ * enabled via probe routine
+ */
+ return 0;
+}
+
+static int noop_apic_id_registered(void)
+{
+ /*
+ * if we would be really "pedantic"
+ * we should pass read_apic_id() here
+ * but since NOOP suppose APIC ID = 0
+ * lets save a few cycles
+ */
+ return physid_isset(0, phys_cpu_present_map);
+}
+
+static const struct cpumask *noop_target_cpus(void)
+{
+ /* only BSP here */
+ return cpumask_of(0);
+}
+
+static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
+{
+ return physid_isset(apicid, *map);
+}
+
+static unsigned long noop_check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ if (cpu != 0)
+ pr_warning("APIC: Vector allocated for non-BSP cpu\n");
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+int noop_apicid_to_node(int logical_apicid)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+
+static u32 noop_apic_read(u32 reg)
+{
+ WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ return 0;
+}
+
+static void noop_apic_write(u32 reg, u32 v)
+{
+ WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+}
+
+struct apic apic_noop = {
+ .name = "noop",
+ .probe = noop_probe,
+ .acpi_madt_oem_check = NULL,
+
+ .apic_id_registered = noop_apic_id_registered,
+
+ .irq_delivery_mode = dest_LowestPrio,
+ /* logical delivery broadcast to all CPUs: */
+ .irq_dest_mode = 1,
+
+ .target_cpus = noop_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = APIC_DEST_LOGICAL,
+ .check_apicid_used = noop_check_apicid_used,
+ .check_apicid_present = noop_check_apicid_present,
+
+ .vector_allocation_domain = noop_vector_allocation_domain,
+ .init_apic_ldr = noop_init_apic_ldr,
+
+ .ioapic_phys_id_map = default_ioapic_phys_id_map,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .apicid_to_node = noop_apicid_to_node,
+
+ .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
+
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+
+ .phys_pkg_id = noop_phys_pkg_id,
+
+ .mps_oem_check = NULL,
+
+ .get_apic_id = noop_get_apic_id,
+ .set_apic_id = NULL,
+ .apic_id_mask = 0x0F << 24,
+
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = noop_send_IPI_mask,
+ .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = noop_send_IPI_allbutself,
+ .send_IPI_all = noop_send_IPI_all,
+ .send_IPI_self = noop_send_IPI_self,
+
+ .wakeup_secondary_cpu = noop_wakeup_secondary_cpu,
+
+ /* should be safe */
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+ .wait_for_init_deassert = NULL,
+
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL,
+
+ .read = noop_apic_read,
+ .write = noop_apic_write,
+ .icr_read = noop_apic_icr_read,
+ .icr_write = noop_apic_icr_write,
+ .wait_icr_idle = noop_apic_wait_icr_idle,
+ .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 77a06413b6b..38dcecfa581 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void)
#endif
}
-static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
-{
- return physid_mask_of_physid(phys_apicid);
-}
-
/* Mapping from cpu number to logical apicid */
static inline int bigsmp_cpu_to_logical_apicid(int cpu)
{
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
return cpu_physical_id(cpu);
}
-static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xFFL);
+ physids_promote(0xFFL, retmap);
}
static int bigsmp_check_phys_apicid_present(int phys_apicid)
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = {
.apicid_to_node = bigsmp_apicid_to_node,
.cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
- .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = bigsmp_check_phys_apicid_present,
.enable_apic_mode = NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 89174f847b4..dd2b5f26464 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -27,6 +27,9 @@
*
* http://www.unisys.com
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
@@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr)
mip_addr = val;
mip = (struct mip_reg *)val;
mip_reg = __va(mip);
- pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
+ pr_debug("host_reg = 0x%lx\n",
(unsigned long)host_reg);
- pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
+ pr_debug("mip_reg = 0x%lx\n",
(unsigned long)mip_reg);
success++;
break;
@@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void)
if (!es7000_plat)
return;
- printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
+ pr_info("Enabling APIC mode.\n");
memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
es7000_mip_reg.off_0x00 = MIP_SW_APIC;
es7000_mip_reg.off_0x38 = MIP_VALID;
@@ -466,11 +469,11 @@ static const struct cpumask *es7000_target_cpus(void)
return cpumask_of(smp_processor_id());
}
-static unsigned long
-es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
+
static unsigned long es7000_check_apicid_present(int bit)
{
return physid_isset(bit, phys_cpu_present_map);
@@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void)
{
int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
- printk(KERN_INFO
- "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
"Physical Cluster" : "Logical Cluster",
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
@@ -539,14 +541,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu)
static int cpu_id;
-static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
+static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
{
- physid_mask_t mask;
-
- mask = physid_mask_of_physid(cpu_id);
+ physid_set_mask_of_physid(cpu_id, retmap);
++cpu_id;
-
- return mask;
}
/* Mapping from cpu number to logical apicid */
@@ -561,10 +559,10 @@ static int es7000_cpu_to_logical_apicid(int cpu)
#endif
}
-static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
+static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0xff);
+ physids_promote(0xFFL, retmap);
}
static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index dc69f28489f..11a5851f1f5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -60,8 +60,6 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
#include <asm/apic.h>
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */
#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -555,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
add_pin_to_irq_node(cfg, node, newapic, newpin);
}
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ unsigned int reg, pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+ reg &= mask_and;
+ reg |= mask_or;
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+ if (final)
+ final(entry);
+}
+
static void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
- int pin;
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
- reg &= mask_and;
- reg |= mask_or;
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
- if (final)
- final(entry);
- }
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -595,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
- IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
- IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
struct irq_cfg *cfg = desc->chip_data;
@@ -1177,7 +1167,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
int cpu, err;
cpumask_var_t tmp_mask;
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ if (cfg->move_in_progress)
return -EBUSY;
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1237,8 +1227,7 @@ next:
return err;
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -1599,9 +1588,6 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_desc *desc;
unsigned int irq;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1708,9 +1694,6 @@ __apicdebuginit(void) print_APIC_field(int base)
{
int i;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG);
for (i = 0; i < 8; i++)
@@ -1724,9 +1707,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
unsigned int i, v, ver, maxlvt;
u64 icr;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
@@ -1824,13 +1804,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk("\n");
}
-__apicdebuginit(void) print_all_local_APICs(void)
+__apicdebuginit(void) print_local_APICs(int maxcpu)
{
int cpu;
+ if (!maxcpu)
+ return;
+
preempt_disable();
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (cpu >= maxcpu)
+ break;
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ }
preempt_enable();
}
@@ -1839,7 +1825,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
+ if (!nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1866,21 +1852,41 @@ __apicdebuginit(void) print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
{
+ int num = -1;
+
+ if (strcmp(arg, "all") == 0) {
+ show_lapic = CONFIG_NR_CPUS;
+ } else {
+ get_option(&arg, &num);
+ if (num >= 0)
+ show_lapic = num;
+ }
+
+ return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
+{
+ if (apic_verbosity == APIC_QUIET)
+ return 0;
+
print_PIC();
/* don't print out if apic is not there */
if (!cpu_has_apic && !apic_from_smp_config())
return 0;
- print_all_local_APICs();
+ print_local_APICs(show_lapic);
print_IO_APIC();
return 0;
}
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
/* Where if anywhere is the i8259 connect in external int mode */
@@ -2031,7 +2037,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
@@ -2058,7 +2064,7 @@ void __init setup_ioapic_ids_from_mpc(void)
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(phys_id_present_map,
+ if (apic->check_apicid_used(&phys_id_present_map,
mp_ioapics[apic_id].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic_id, mp_ioapics[apic_id].apicid);
@@ -2073,7 +2079,7 @@ void __init setup_ioapic_ids_from_mpc(void)
mp_ioapics[apic_id].apicid = i;
} else {
physid_mask_t tmp;
- tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+ apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
mp_ioapics[apic_id].apicid);
@@ -2228,20 +2234,16 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
@@ -2272,15 +2274,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
}
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
/*
* Either sets desc->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
* leaves desc->affinity untouched.
*/
-static unsigned int
+unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
@@ -2432,9 +2431,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
continue;
cfg = irq_cfg(irq);
- spin_lock(&desc->lock);
- if (!cfg->move_cleanup_count)
- goto unlock;
+ raw_spin_lock(&desc->lock);
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
@@ -2452,29 +2449,40 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
goto unlock;
}
__get_cpu_var(vector_irq)[vector] = -1;
- cfg->move_cleanup_count--;
unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
irq_exit();
}
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
{
struct irq_desc *desc = *descp;
struct irq_cfg *cfg = desc->chip_data;
- unsigned vector, me;
+ unsigned me;
if (likely(!cfg->move_in_progress))
return;
- vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
}
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+ __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+
+ __irq_complete_move(&desc, cfg->vector);
+}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
@@ -2490,6 +2498,59 @@ static void ack_apic_edge(unsigned int irq)
atomic_t irq_mis_count;
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ * 0Xh 82489DX
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
+ * 30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ if (mp_ioapics[entry->apic].apicver >= 0x20) {
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ if (irq_remapped(irq))
+ io_apic_eoi(entry->apic, entry->pin);
+ else
+ io_apic_eoi(entry->apic, cfg->vector);
+ } else {
+ __mask_and_edge_IO_APIC_irq(entry);
+ __unmask_and_level_IO_APIC_irq(entry);
+ }
+ }
+}
+
+static void eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2525,6 +2586,19 @@ static void ack_apic_level(unsigned int irq)
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
+ *
+ * Also in the case when cpu goes offline, fixup_irqs() will forward
+ * any unhandled interrupt on the offlined cpu to the new cpu
+ * destination that is handling the corresponding interrupt. This
+ * interrupt forwarding is done via IPI's. Hence, in this case also
+ * level-triggered io-apic interrupt will be seen as an edge
+ * interrupt in the IRR. And we can't rely on the cpu's EOI
+ * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+ * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+ * supporting EOI register, we do an explicit EOI to clear the
+ * remote IRR and on IO-APIC's which don't have an EOI register,
+ * we use the above logic (mask+edge followed by unmask+level) from
+ * Manfred Spraul to clear the remote IRR.
*/
cfg = desc->chip_data;
i = cfg->vector;
@@ -2536,6 +2610,19 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
+ /*
+ * Tail end of clearing remote IRR bit (either by delivering the EOI
+ * message via io-apic EOI register write or simulating it using
+ * mask+edge followed by unnask+level logic) manually when the
+ * level triggered interrupt is seen as the edge triggered interrupt
+ * at the cpu.
+ */
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+
+ eoi_ioapic_irq(desc);
+ }
+
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2569,41 +2656,9 @@ static void ack_apic_level(unsigned int irq)
move_masked_irq(irq);
unmask_IO_APIC_irq_desc(desc);
}
-
- /* Tail end of version 0x11 I/O APIC bug workaround */
- if (!(v & (1 << (i & 0x1f)))) {
- atomic_inc(&irq_mis_count);
- spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(cfg);
- __unmask_and_level_IO_APIC_irq(cfg);
- spin_unlock(&ioapic_lock);
- }
}
#ifdef CONFIG_INTR_REMAP
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
-
- for_each_irq_pin(entry, cfg->irq_2_pin)
- io_apic_eoi(entry->apic, entry->pin);
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
static void ir_ack_apic_edge(unsigned int irq)
{
ack_APIC_irq();
@@ -3157,6 +3212,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
continue;
desc_new = move_irq_desc(desc_new, node);
+ cfg_new = desc_new->chip_data;
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
@@ -3211,7 +3267,8 @@ void destroy_irq(unsigned int irq)
* MSI message composition
*/
#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg;
int err;
@@ -3245,7 +3302,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
irte.dest_id = IRTE_DEST(dest);
/* Set source-id of interrupt request */
- set_msi_sid(&irte, pdev);
+ if (pdev)
+ set_msi_sid(&irte, pdev);
+ else
+ set_hpet_sid(&irte, hpet_id);
modify_irte(irq, &irte);
@@ -3410,7 +3470,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(dev, irq, &msg);
+ ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
@@ -3543,7 +3603,7 @@ int arch_setup_dmar_msi(unsigned int irq)
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(NULL, irq, &msg);
+ ret = msi_compose_msg(NULL, irq, &msg, -1);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
@@ -3583,6 +3643,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
#endif /* CONFIG_SMP */
+static struct irq_chip ir_hpet_msi_type = {
+ .name = "IR-HPET_MSI",
+ .unmask = hpet_msi_unmask,
+ .mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+ .ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.unmask = hpet_msi_unmask,
@@ -3594,20 +3667,36 @@ static struct irq_chip hpet_msi_type = {
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
int ret;
struct msi_msg msg;
struct irq_desc *desc = irq_to_desc(irq);
- ret = msi_compose_msg(NULL, irq, &msg);
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_hpet_to_ir(id);
+ int index;
+
+ if (!iommu)
+ return -1;
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ return -1;
+ }
+
+ ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
desc->status |= IRQ_MOVE_PCNTXT;
- set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
- "edge");
+ if (irq_remapped(irq))
+ set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+ handle_edge_irq, "edge");
+ else
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+ handle_edge_irq, "edge");
return 0;
}
@@ -3708,75 +3797,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset)
-{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg;
- int mmr_pnode;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long flags;
- int err;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- cfg = irq_cfg(irq);
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- spin_lock_irqsave(&vector_lock, flags);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->mask = 1;
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -3944,7 +3964,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
*/
if (physids_empty(apic_id_map))
- apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
@@ -3960,10 +3980,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
* Every APIC in a system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(apic_id_map, apic_id)) {
+ if (apic->check_apicid_used(&apic_id_map, apic_id)) {
for (i = 0; i < get_physical_broadcast(); i++) {
- if (!apic->check_apicid_used(apic_id_map, i))
+ if (!apic->check_apicid_used(&apic_id_map, i))
break;
}
@@ -3976,7 +3996,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
apic_id = i;
}
- tmp = apic->apicid_to_cpu_present(apic_id);
+ apic->apicid_to_cpu_present(apic_id, &tmp);
physids_or(apic_id_map, apic_id_map, tmp);
if (reg_00.bits.ID != apic_id) {
@@ -4106,7 +4126,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
+ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
@@ -4140,18 +4160,17 @@ void __init ioapic_init_mappings(void)
#ifdef CONFIG_X86_32
fake_ioapic_page:
#endif
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE,
- "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+ ioapic_phys);
idx++;
ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 7ff61d6a188..0159a69396c 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,8 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
-static cpumask_t backtrace_mask __read_mostly;
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -360,7 +361,7 @@ void stop_apic_nmi_watchdog(void *unused)
*/
static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(local_t, alert_counter);
+static DEFINE_PER_CPU(long, alert_counter);
static DEFINE_PER_CPU(int, nmi_touch);
void touch_nmi_watchdog(void)
@@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
- if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
@@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
show_regs(regs);
dump_stack();
spin_unlock(&lock);
- cpumask_clear_cpu(cpu, &backtrace_mask);
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
rc = 1;
}
@@ -437,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
- local_inc(&__get_cpu_var(alert_counter));
- if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+ __this_cpu_inc(per_cpu_var(alert_counter));
+ if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
/*
* die_nmi will return ONLY if NOTIFY_STOP happens..
*/
@@ -446,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
regs, panic_on_timeout);
} else {
__get_cpu_var(last_irq_sum) = sum;
- local_set(&__get_cpu_var(alert_counter), 0);
+ __this_cpu_write(per_cpu_var(alert_counter), 0);
}
/* see if the nmi watchdog went off */
@@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void)
{
int i;
- cpumask_copy(&backtrace_mask, cpu_online_mask);
+ cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(&backtrace_mask))
+ if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index efa00e2b850..98c4665f251 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
static __init void early_check_numaq(void)
{
/*
- * Find possible boot-time SMP configuration:
- */
- early_find_smp_config();
-
- /*
* get boot-time SMP configuration:
*/
if (smp_found_config)
@@ -334,10 +329,9 @@ static inline const struct cpumask *numaq_target_cpus(void)
return cpu_all_mask;
}
-static inline unsigned long
-numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
{
- return physid_isset(apicid, bitmap);
+ return physid_isset(apicid, *map);
}
static inline unsigned long numaq_check_apicid_present(int bit)
@@ -371,10 +365,10 @@ static inline int numaq_multi_timer_check(int apic, int irq)
return apic != 0 && irq == 0;
}
-static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* We don't have a good way to do this yet - hack */
- return physids_promote(0xFUL);
+ return physids_promote(0xFUL, retmap);
}
static inline int numaq_cpu_to_logical_apicid(int cpu)
@@ -402,12 +396,12 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
-static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
+static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
int cpu = __ffs(logical_apicid & 0xf);
- return physid_mask_of_physid(cpu + 4*node);
+ physid_set_mask_of_physid(cpu + 4*node, retmap);
}
/* Where the IO area was mapped on multiquad, always 0 otherwise */
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0c0182cc947..1a6559f6768 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -108,7 +108,7 @@ struct apic apic_default = {
.apicid_to_node = default_apicid_to_node,
.cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
- .apicid_to_cpu_present = default_apicid_to_cpu_present,
+ .apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
.check_phys_apicid_present = default_check_phys_apicid_present,
.enable_apic_mode = NULL,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 645ecc4ff0b..9b419263d90 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void)
return cpumask_of(0);
}
-static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
}
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
+static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
- return physids_promote(0x0F);
+ physids_promote(0x0FL, retmap);
}
-static physid_mask_t summit_apicid_to_cpu_present(int apicid)
+static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
{
- return physid_mask_of_physid(0);
+ physid_set_mask_of_physid(0, retmap);
}
static int summit_check_phys_apicid_present(int physical_apicid)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 326c25477d3..b684bb303cb 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -30,10 +30,22 @@
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/smp.h>
+#include <asm/x86_init.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
+static u64 gru_start_paddr, gru_end_paddr;
+
+static inline bool is_GRU_range(u64 start, u64 end)
+{
+ return start >= gru_start_paddr && end <= gru_end_paddr;
+}
+
+static bool uv_is_untracked_pat_range(u64 start, u64 end)
+{
+ return is_ISA_range(start, end) || is_GRU_range(start, end);
+}
static int early_get_nodeid(void)
{
@@ -49,6 +61,7 @@ static int early_get_nodeid(void)
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC;
else if (!strcmp(oem_table_id, "UVX"))
@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode)
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
- if (gru.s.enable)
+ if (gru.s.enable) {
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)gru.s.base << shift);
+ gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
+
+ }
}
static __init void map_mmr_high(int max_pnode)
@@ -409,6 +426,12 @@ static __init void map_mmioh_high(int max_pnode)
map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
}
+static __init void map_low_mmrs(void)
+{
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
static __init void uv_rtc_init(void)
{
long status;
@@ -550,6 +573,8 @@ void __init uv_system_init(void)
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
+ map_low_mmrs();
+
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 151ace69a5a..b5b6b23bce5 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,7 +204,6 @@
#include <linux/module.h>
#include <linux/poll.h>
-#include <linux/smp_lock.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/timer.h>
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
+static DEFINE_MUTEX(apm_mutex);
/*
* Set up a segment that references the real mode segment 0x40
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
return -EPERM;
switch (cmd) {
case APM_IOC_STANDBY:
- lock_kernel();
+ mutex_lock(&apm_mutex);
if (as->standbys_read > 0) {
as->standbys_read--;
as->standbys_pending--;
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_STANDBY, as);
if (standbys_pending <= 0)
standby();
- unlock_kernel();
+ mutex_unlock(&apm_mutex);
break;
case APM_IOC_SUSPEND:
- lock_kernel();
+ mutex_lock(&apm_mutex);
if (as->suspends_read > 0) {
as->suspends_read--;
as->suspends_pending--;
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
queue_event(APM_USER_SUSPEND, as);
if (suspends_pending <= 0) {
ret = suspend(1);
+ mutex_unlock(&apm_mutex);
} else {
as->suspend_wait = 1;
+ mutex_unlock(&apm_mutex);
wait_event_interruptible(apm_suspend_waitqueue,
as->suspend_wait == 0);
ret = as->suspend_result;
}
- unlock_kernel();
return ret;
default:
return -ENOTTY;
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp)
{
struct apm_user *as;
- lock_kernel();
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
sizeof(*as));
- unlock_kernel();
return -ENOMEM;
}
as->magic = APM_BIOS_MAGIC;
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp)
user_list = as;
spin_unlock(&user_list_lock);
filp->private_data = as;
- unlock_kernel();
return 0;
}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 63a88e1f987..b0206a211b0 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -101,21 +101,17 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
}
int
-uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
+uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
unsigned long *intr_mmr_offset)
{
- union uv_watchlist_u size_blade;
u64 watchlist;
s64 ret;
- size_blade.size = mq_size;
- size_blade.blade = blade;
-
/*
* bios returns watchlist number or negative error number.
*/
ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
- size_blade.val, (u64)intr_mmr_offset,
+ mq_size, (u64)intr_mmr_offset,
(u64)&watchlist, 0);
if (ret < BIOS_STATUS_SUCCESS)
return ret;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 68537e957a9..1d2cb383410 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
# Don't trace early stages of a secondary CPU boot
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_common.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
endif
# Make sure load_percpu_segment has no stackprotector
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c965e521271..468489b57aa 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -74,6 +74,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
+ static bool printed;
if (c->cpuid_level < 0xb)
return;
@@ -127,12 +128,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
+ if (!printed) {
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ if (c->x86_max_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ printed = 1;
+ }
return;
#endif
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c910a716a71..8dc3ea145c9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -375,8 +375,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
node = nearby_node(apicid);
}
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
@@ -535,7 +533,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c95e831bb09..e58d978e075 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
}
enum {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b4a56..4868e4a951e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
}
}
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
c->x86_cache_size = (ecx>>24) + (edx>>24);
#ifdef CONFIG_X86_64
/* On K8 L1 TLB is inclusive, so don't count it */
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
#endif
c->x86_cache_size = l2size;
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -432,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
+ static bool printed;
if (!cpu_has(c, X86_FEATURE_HT))
return;
@@ -447,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -474,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
((1 << core_bits) - 1);
out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
c->phys_proc_id);
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
c->cpu_core_id);
+ printed = 1;
}
#endif
}
@@ -659,24 +656,31 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
+#ifdef PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
+#endif
+
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
const struct cpu_dev *cpudev = *cdev;
- unsigned int j;
if (count >= X86_VENDOR_NUM)
break;
cpu_devs[count] = cpudev;
count++;
- for (j = 0; j < 2; j++) {
- if (!cpudev->c_ident[j])
- continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
- cpudev->c_ident[j]);
+#ifdef PROCESSOR_SELECT
+ {
+ unsigned int j;
+
+ for (j = 0; j < 2; j++) {
+ if (!cpudev->c_ident[j])
+ continue;
+ printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ cpudev->c_ident[j]);
+ }
}
+#endif
}
-
early_identify_cpu(&boot_cpu_data);
}
@@ -837,10 +841,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
-#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
- mcheck_init(c);
-#endif
+ mcheck_cpu_init(c);
select_idle_routine(c);
@@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void)
void __cpuinit cpu_init(void)
{
- struct orig_ist *orig_ist;
+ struct orig_ist *oist;
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
@@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void)
cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
- orig_ist = &per_cpu(orig_ist, cpu);
+ oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void)
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
panic("CPU#%d already initialized!\n", cpu);
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_debug("Initializing CPU#%d\n", cpu);
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
@@ -1136,19 +1138,19 @@ void __cpuinit cpu_init(void)
wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier();
- check_efer();
+ x86_configure_nx();
if (cpu != 0)
enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
- if (!orig_ist->ist[0]) {
+ if (!oist->ist[0]) {
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] =
+ oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 6de9a908e40..3624e8a0f71 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,6 @@ struct cpu_dev {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
-extern void display_cacheinfo(struct cpuinfo_x86 *c);
+extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
#endif
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index dca325c0399..b368cd86299 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,9 +30,9 @@
#include <asm/apic.h>
#include <asm/desc.h>
-static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
-static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
-static DEFINE_PER_CPU(int, cpu_priv_count);
+static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr);
+static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr);
+static DEFINE_PER_CPU(int, cpud_priv_count);
static DEFINE_MUTEX(cpu_debug_lock);
@@ -531,7 +531,7 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
/* Already intialized */
if (file == CPU_INDEX_BIT)
- if (per_cpu(cpu_arr[type].init, cpu))
+ if (per_cpu(cpud_arr[type].init, cpu))
return 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -543,8 +543,8 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
priv->reg = reg;
priv->file = file;
mutex_lock(&cpu_debug_lock);
- per_cpu(priv_arr[type], cpu) = priv;
- per_cpu(cpu_priv_count, cpu)++;
+ per_cpu(cpud_priv_arr[type], cpu) = priv;
+ per_cpu(cpud_priv_count, cpu)++;
mutex_unlock(&cpu_debug_lock);
if (file)
@@ -552,10 +552,10 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
dentry, (void *)priv, &cpu_fops);
else {
debugfs_create_file(cpu_base[type].name, S_IRUGO,
- per_cpu(cpu_arr[type].dentry, cpu),
+ per_cpu(cpud_arr[type].dentry, cpu),
(void *)priv, &cpu_fops);
mutex_lock(&cpu_debug_lock);
- per_cpu(cpu_arr[type].init, cpu) = 1;
+ per_cpu(cpud_arr[type].init, cpu) = 1;
mutex_unlock(&cpu_debug_lock);
}
@@ -615,7 +615,7 @@ static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
if (!is_typeflag_valid(cpu, cpu_base[type].flag))
continue;
cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
- per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
+ per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry;
if (type < CPU_TSS_BIT)
err = cpu_init_msr(cpu, type, cpu_dentry);
@@ -647,11 +647,11 @@ static int cpu_init_cpu(void)
err = cpu_init_allreg(cpu, cpu_dentry);
pr_info("cpu%d(%d) debug files %d\n",
- cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
- if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
+ cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu));
+ if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) {
pr_err("Register files count %d exceeds limit %d\n",
- per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
- per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
+ per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES);
+ per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES;
err = -ENFILE;
}
if (err)
@@ -676,8 +676,8 @@ static void __exit cpu_debug_exit(void)
debugfs_remove_recursive(cpu_debugfs_dir);
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
- kfree(per_cpu(priv_arr[i], cpu));
+ for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++)
+ kfree(per_cpu(cpud_priv_arr[i], cpu));
}
module_init(cpu_debug_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b581d3905c..f28decf8dde 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -68,9 +68,9 @@ struct acpi_cpufreq_data {
unsigned int cpu_feature;
};
-static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -214,14 +214,14 @@ static u32 get_cur_val(const struct cpumask *mask)
if (unlikely(cpumask_empty(mask)))
return 0;
- switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
+ switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
- perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
+ perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
@@ -268,8 +268,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
return 0;
- ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
- per_cpu(old_perf, cpu) = perf;
+ ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+ per_cpu(acfreq_old_perf, cpu) = perf;
retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
@@ -278,7 +278,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
unsigned int freq;
unsigned int cached_freq;
@@ -322,7 +322,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
struct acpi_processor_performance *perf;
struct cpufreq_freqs freqs;
struct drv_cmd cmd;
@@ -416,7 +416,7 @@ out:
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_verify\n");
@@ -574,7 +574,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
return -ENOMEM;
data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
- per_cpu(drv_data, cpu) = data;
+ per_cpu(acfreq_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -725,20 +725,20 @@ err_unreg:
acpi_processor_unregister_performance(perf, cpu);
err_free:
kfree(data);
- per_cpu(drv_data, cpu) = NULL;
+ per_cpu(acfreq_data, cpu) = NULL;
return result;
}
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_cpu_exit\n");
if (data) {
cpufreq_frequency_table_put_attr(policy->cpu);
- per_cpu(drv_data, policy->cpu) = NULL;
+ per_cpu(acfreq_data, policy->cpu) = NULL;
acpi_processor_unregister_performance(data->acpi_data,
policy->cpu);
kfree(data);
@@ -749,7 +749,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_resume\n");
@@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
};
static struct cpufreq_driver acpi_cpufreq_driver = {
- .verify = acpi_cpufreq_verify,
- .target = acpi_cpufreq_target,
- .init = acpi_cpufreq_cpu_init,
- .exit = acpi_cpufreq_cpu_exit,
- .resume = acpi_cpufreq_resume,
- .name = "acpi-cpufreq",
- .owner = THIS_MODULE,
- .attr = acpi_cpufreq_attr,
+ .verify = acpi_cpufreq_verify,
+ .target = acpi_cpufreq_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = acpi_cpufreq_cpu_init,
+ .exit = acpi_cpufreq_cpu_exit,
+ .resume = acpi_cpufreq_resume,
+ .name = "acpi-cpufreq",
+ .owner = THIS_MODULE,
+ .attr = acpi_cpufreq_attr,
};
static int __init acpi_cpufreq_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cabd2fa3fc9..7e7eea4f826 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
/* Find ACPI data for processor */
acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, &longhaul_walk_callback,
+ ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
NULL, (void *)&pr);
/* Check ACPI support for C3 state */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index f10dea409f4..cb01dac267d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
}
/* cpuinfo and default policy values */
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cpuinfo.transition_latency = 200000;
policy->cur = busfreq * max_multiplier;
result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index d47c775eb0a..9a97116f89e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = {
};
static struct cpufreq_driver powernow_driver = {
- .verify = powernow_verify,
- .target = powernow_target,
- .get = powernow_get,
- .init = powernow_cpu_init,
- .exit = powernow_cpu_exit,
- .name = "powernow-k7",
- .owner = THIS_MODULE,
- .attr = powernow_table_attr,
+ .verify = powernow_verify,
+ .target = powernow_target,
+ .get = powernow_get,
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+ .bios_limit = acpi_processor_get_bios_limit,
+#endif
+ .init = powernow_cpu_init,
+ .exit = powernow_cpu_exit,
+ .name = "powernow-k7",
+ .owner = THIS_MODULE,
+ .attr = powernow_table_attr,
};
static int __init powernow_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 3f12dabeab5..a9df9441a9a 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
static int powernowk8_target(struct cpufreq_policy *pol,
unsigned targfreq, unsigned relation)
{
- cpumask_t oldmask;
+ cpumask_var_t oldmask;
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
u32 checkfid;
u32 checkvid;
@@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol,
checkfid = data->currfid;
checkvid = data->currvid;
- /* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
+ /* only run on specific CPU from here on. */
+ /* This is poor form: use a workqueue or smp_call_function_single */
+ if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(oldmask, tsk_cpumask(current));
+ set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,
ret = 0;
err_out:
- set_cpus_allowed_ptr(current, &oldmask);
+ set_cpus_allowed_ptr(current, oldmask);
+ free_cpumask_var(oldmask);
return ret;
}
@@ -1393,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = {
};
static struct cpufreq_driver cpufreq_amd64_driver = {
- .verify = powernowk8_verify,
- .target = powernowk8_target,
- .init = powernowk8_cpu_init,
- .exit = __devexit_p(powernowk8_cpu_exit),
- .get = powernowk8_get,
- .name = "powernow-k8",
- .owner = THIS_MODULE,
- .attr = powernow_k8_attr,
+ .verify = powernowk8_verify,
+ .target = powernowk8_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = powernowk8_cpu_init,
+ .exit = __devexit_p(powernowk8_cpu_exit),
+ .get = powernowk8_get,
+ .name = "powernow-k8",
+ .owner = THIS_MODULE,
+ .attr = powernow_k8_attr,
};
/* driver entry point for init */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 3ae5a7a3a50..2ce8e0b5cc5 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
/* speedstep_processor
*/
-static unsigned int speedstep_processor;
+static enum speedstep_processor speedstep_processor;
static u32 pmbase;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index f4c290b8482..ad0083abfa2 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -34,7 +34,7 @@ static int relaxed_check;
* GET PROCESSOR CORE SPEED IN KHZ *
*********************************************************************/
-static unsigned int pentium3_get_frequency(unsigned int processor)
+static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
{
/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
struct {
@@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void)
/* Warning: may get called from smp_call_function_single. */
-unsigned int speedstep_get_frequency(unsigned int processor)
+unsigned int speedstep_get_frequency(enum speedstep_processor processor)
{
switch (processor) {
case SPEEDSTEP_CPU_PCORE:
@@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor);
* DETECT SPEEDSTEP SPEEDS *
*********************************************************************/
-unsigned int speedstep_get_freqs(unsigned int processor,
+unsigned int speedstep_get_freqs(enum speedstep_processor processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index 2b6c04e5a30..70d9cea1219 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -11,18 +11,18 @@
/* processors */
-
-#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */
-#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */
-#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */
-#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */
-
+enum speedstep_processor {
+ SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
+ SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
/* the following processors are not speedstep-capable and are not auto-detected
* in speedstep_detect_processor(). However, their speed can be detected using
* the speedstep_get_frequency() call. */
-#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */
-#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */
-#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */
+ SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
+ SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
+ SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
+};
/* speedstep states -- only two of them */
@@ -31,10 +31,10 @@
/* detect a speedstep-capable processor */
-extern unsigned int speedstep_detect_processor (void);
+extern enum speedstep_processor speedstep_detect_processor(void);
/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_frequency(unsigned int processor);
+extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
/* detect the low and high speeds of the processor. The callback
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor);
* SPEEDSTEP_LOW; the second argument is zero so that no
* cpufreq_notify_transition calls are initiated.
*/
-extern unsigned int speedstep_get_freqs(unsigned int processor,
+extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index befea088e4f..04d73c114e4 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -35,7 +35,7 @@ static int smi_cmd;
static unsigned int smi_sig;
/* info about the processor */
-static unsigned int speedstep_processor;
+static enum speedstep_processor speedstep_processor;
/*
* There are only two frequency states for each processor. Values
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 19807b89f05..4fbd384fb64 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
/* Handle the GX (Formally known as the GX2) */
if (c->x86 == 5 && c->x86_model == 5)
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
else
init_cyrix(c);
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 40e1835b35e..9c31e8b09d2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -263,11 +263,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE || !node_online(node))
+ if (node == NUMA_NO_NODE)
node = first_node(node_online_map);
+ else if (!node_online(node)) {
+ /* reuse the value from init_cpu_to_node() */
+ node = cpu_to_node(cpu);
+ }
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 804c40e2bc3..fc6c8ef92dc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -94,7 +94,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */
{ 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */
{ 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */
+ { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */
{ 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
{ 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */
{ 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
@@ -102,6 +102,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */
{ 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
{ 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
+ { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */
+ { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */
+ { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */
{ 0x00, 0, 0}
};
@@ -488,22 +491,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
#endif
}
- if (trace)
- printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if (l1i)
- printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
-
- if (l1d)
- printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
- else
- printk(KERN_CONT "\n");
-
- if (l2)
- printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-
- if (l3)
- printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;
@@ -512,26 +499,27 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
#ifdef CONFIG_SYSFS
/* pointer to _cpuid4_info array (for each cache leaf) */
-static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf, *sibling_leaf;
unsigned long num_threads_sharing;
- int index_msb, i;
+ int index_msb, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- struct cpuinfo_x86 *d;
- for_each_online_cpu(i) {
- if (!per_cpu(cpuid4_info, i))
+ for_each_cpu(i, c->llc_shared_map) {
+ if (!per_cpu(ici_cpuid4_info, i))
continue;
- d = &cpu_data(i);
this_leaf = CPUID4_INFO_IDX(i, index);
- cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
- d->llc_shared_map);
+ for_each_cpu(sibling, c->llc_shared_map) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
}
return;
}
@@ -548,7 +536,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
c->apicid >> index_msb) {
cpumask_set_cpu(i,
to_cpumask(this_leaf->shared_cpu_map));
- if (i != cpu && per_cpu(cpuid4_info, i)) {
+ if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
sibling_leaf =
CPUID4_INFO_IDX(i, index);
cpumask_set_cpu(cpu, to_cpumask(
@@ -587,8 +575,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
for (i = 0; i < num_cache_leaves; i++)
cache_remove_shared_cpu_map(cpu, i);
- kfree(per_cpu(cpuid4_info, cpu));
- per_cpu(cpuid4_info, cpu) = NULL;
+ kfree(per_cpu(ici_cpuid4_info, cpu));
+ per_cpu(ici_cpuid4_info, cpu) = NULL;
}
static int
@@ -627,15 +615,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
if (num_cache_leaves == 0)
return -ENOENT;
- per_cpu(cpuid4_info, cpu) = kzalloc(
+ per_cpu(ici_cpuid4_info, cpu) = kzalloc(
sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (per_cpu(cpuid4_info, cpu) == NULL)
+ if (per_cpu(ici_cpuid4_info, cpu) == NULL)
return -ENOMEM;
smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
if (retval) {
- kfree(per_cpu(cpuid4_info, cpu));
- per_cpu(cpuid4_info, cpu) = NULL;
+ kfree(per_cpu(ici_cpuid4_info, cpu));
+ per_cpu(ici_cpuid4_info, cpu) = NULL;
}
return retval;
@@ -647,7 +635,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
/* pointer to kobject for cpuX/cache */
-static DEFINE_PER_CPU(struct kobject *, cache_kobject);
+static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
struct _index_kobject {
struct kobject kobj;
@@ -656,8 +644,8 @@ struct _index_kobject {
};
/* pointer to array of kobjects for cpuX/cache/indexY */
-static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
+static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -876,10 +864,10 @@ static struct kobj_type ktype_percpu_entry = {
static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
- kfree(per_cpu(cache_kobject, cpu));
- kfree(per_cpu(index_kobject, cpu));
- per_cpu(cache_kobject, cpu) = NULL;
- per_cpu(index_kobject, cpu) = NULL;
+ kfree(per_cpu(ici_cache_kobject, cpu));
+ kfree(per_cpu(ici_index_kobject, cpu));
+ per_cpu(ici_cache_kobject, cpu) = NULL;
+ per_cpu(ici_index_kobject, cpu) = NULL;
free_cache_attributes(cpu);
}
@@ -895,14 +883,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
return err;
/* Allocate all required memory */
- per_cpu(cache_kobject, cpu) =
+ per_cpu(ici_cache_kobject, cpu) =
kzalloc(sizeof(struct kobject), GFP_KERNEL);
- if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
+ if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
goto err_out;
- per_cpu(index_kobject, cpu) = kzalloc(
+ per_cpu(ici_index_kobject, cpu) = kzalloc(
sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(per_cpu(index_kobject, cpu) == NULL))
+ if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
goto err_out;
return 0;
@@ -926,7 +914,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
if (unlikely(retval < 0))
return retval;
- retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
+ retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
&ktype_percpu_entry,
&sys_dev->kobj, "%s", "cache");
if (retval < 0) {
@@ -940,12 +928,12 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
this_object->index = i;
retval = kobject_init_and_add(&(this_object->kobj),
&ktype_cache,
- per_cpu(cache_kobject, cpu),
+ per_cpu(ici_cache_kobject, cpu),
"index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
- kobject_put(per_cpu(cache_kobject, cpu));
+ kobject_put(per_cpu(ici_cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
return retval;
}
@@ -953,7 +941,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
- kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
+ kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
return 0;
}
@@ -962,7 +950,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
unsigned int cpu = sys_dev->id;
unsigned long i;
- if (per_cpu(cpuid4_info, cpu) == NULL)
+ if (per_cpu(ici_cpuid4_info, cpu) == NULL)
return;
if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
return;
@@ -970,7 +958,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
- kobject_put(per_cpu(cache_kobject, cpu));
+ kobject_put(per_cpu(ici_cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 721a77ca811..a8aacd4b513 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@
#include "mce-internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
int mce_disabled __read_mostly;
#define MISC_MCELOG_MINOR 227
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+ return NOTIFY_STOP;
}
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+ .notifier_call = default_decode_mce,
+ .priority = -1,
+};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;
+ /* Emit the trace record: */
+ trace_mce_record(mce);
+
mce->finished = 0;
wmb();
for (;;) {
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
- * (if the CPU has an implementation for that):
+ * (if the CPU has an implementation for that)
*/
- x86_mce_decode_callback(m);
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int mce_banks_init(void)
+static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!mce_banks) {
- int err = mce_banks_init();
+ int err = __mcheck_cpu_mce_banks_init();
if (err)
return err;
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@@ -1273,7 +1287,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@@ -1369,18 +1383,19 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
+ setup_timer(t, mce_start_timer, smp_processor_id());
+
if (mce_ignore_ce)
return;
*n = check_interval * HZ;
if (!*n)
return;
- setup_timer(t, mcheck_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1400,27 +1415,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
- mce_ancient_init(c);
+ __mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
- if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+ if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
- mce_init();
- mce_cpu_features(c);
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
}
/*
@@ -1640,6 +1656,15 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
+int __init mcheck_init(void)
+{
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+ mcheck_intel_therm_init();
+
+ return 0;
+}
+
/*
* Sysfs support
*/
@@ -1648,7 +1673,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
{
int i;
@@ -1663,12 +1688,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
/*
@@ -1678,8 +1703,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
- mce_init();
- mce_cpu_features(&current_cpu_data);
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(&current_cpu_data);
return 0;
}
@@ -1689,8 +1714,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(&current_cpu_data))
return;
- mce_init();
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@@ -1716,7 +1741,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
- mce_init_timer();
+ __mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@@ -1904,7 +1929,7 @@ error2:
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
sysdev_unregister(&per_cpu(mce_dev, cpu));
@@ -1929,13 +1954,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void __cpuinit mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(&current_cpu_data))
return;
+
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
@@ -1946,7 +1972,7 @@ static void mce_disable_cpu(void *h)
}
}
-static void mce_reenable_cpu(void *h)
+static void __cpuinit mce_reenable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@@ -1991,9 +2017,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies +
+ if (!mce_ignore_ce && check_interval) {
+ t->expires = round_jiffies(jiffies +
__get_cpu_var(mce_next_interval));
- add_timer_on(t, cpu);
+ add_timer_on(t, cpu);
+ }
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
case CPU_POST_DEAD:
@@ -2025,7 +2053,7 @@ static __init void mce_init_banks(void)
}
}
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@@ -2053,7 +2081,7 @@ static __init int mce_init_device(void)
return err;
}
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2129,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@@ -2115,5 +2143,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
#endif
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index b3a1dba7533..81c499eceb2 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
+static u32 lvtthmr_init __read_mostly;
+
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,14 +256,34 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
}
+/* Thermal monitoring depends on APIC, ACPI and clock modulation */
+static int intel_thermal_supported(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_apic)
+ return 0;
+ if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ return 0;
+ return 1;
+}
+
+void __init mcheck_intel_therm_init(void)
+{
+ /*
+ * This function is only called on boot CPU. Save the init thermal
+ * LVT value on BSP and use that value to restore APs' thermal LVT
+ * entry BIOS programmed later
+ */
+ if (intel_thermal_supported(&boot_cpu_data))
+ lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
int tm2 = 0;
u32 l, h;
- /* Thermal monitoring depends on ACPI and clock modulation*/
- if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ if (!intel_thermal_supported(c))
return;
/*
@@ -270,7 +292,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
+
+ /*
+ * The initial value of thermal LVT entries on all APs always reads
+ * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+ * sequence to them and LVT registers are reset to 0s except for
+ * the mask bits which are set to 1s when APs receive INIT IPI.
+ * Always restore the value that BIOS has programmed on AP based on
+ * BSP's info we saved since BIOS is always setting the same value
+ * for all threads/cores
+ */
+ apic_write(APIC_LVTTHMR, lvtthmr_init);
+
+ h = lvtthmr_init;
+
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
@@ -312,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
+ printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 73c86db5acb..09b1698e046 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2)
return start1 - start2;
}
+static int __init clean_sort_range(struct res_range *range, int az)
+{
+ int i, j, k = az - 1, nr_range = 0;
+
+ for (i = 0; i < k; i++) {
+ if (range[i].end)
+ continue;
+ for (j = k; j > i; j--) {
+ if (range[j].end) {
+ k = j;
+ break;
+ }
+ }
+ if (j == i)
+ break;
+ range[i].start = range[k].start;
+ range[i].end = range[k].end;
+ range[k].start = 0;
+ range[k].end = 0;
+ k--;
+ }
+ /* count it */
+ for (i = 0; i < az; i++) {
+ if (!range[i].end) {
+ nr_range = i;
+ break;
+ }
+ }
+
+ /* sort them */
+ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
+ return nr_range;
+}
+
#define BIOS_BUG_MSG KERN_WARNING \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
subtract_range(range, extra_remove_base,
extra_remove_base + extra_remove_size - 1);
- /* get new range num */
- nr_range = 0;
- for (i = 0; i < RANGE_NUM; i++) {
- if (!range[i].end)
- continue;
- nr_range++;
- }
if (debug_print) {
printk(KERN_DEBUG "After UC checking\n");
- for (i = 0; i < nr_range; i++)
+ for (i = 0; i < RANGE_NUM; i++) {
+ if (!range[i].end)
+ continue;
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1);
+ }
}
/* sort the ranges */
- sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+ nr_range = clean_sort_range(range, RANGE_NUM);
if (debug_print) {
printk(KERN_DEBUG "After sorting\n");
for (i = 0; i < nr_range; i++)
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void)
continue;
if (!size)
type = MTRR_NUM_TYPES;
- if (type == MTRR_TYPE_WRPROT)
- type = MTRR_TYPE_UNCACHABLE;
num[type]++;
}
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 3c1b12d461d..e006e56f699 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -4,6 +4,7 @@
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/ctype.h>
+#include <linux/string.h>
#include <linux/init.h>
#define LINE_SIZE 80
@@ -133,8 +134,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EINVAL;
base = simple_strtoull(line + 5, &ptr, 0);
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr);
if (strncmp(ptr, "size=", 5))
return -EINVAL;
@@ -142,14 +142,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr);
if (strncmp(ptr, "type=", 5))
return -EINVAL;
- ptr += 5;
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr + 5);
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b5801c31184..45506d5dd8d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -77,6 +77,18 @@ struct cpu_hw_events {
struct debug_store *ds;
};
+struct event_constraint {
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ int code;
+};
+
+#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
+#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
+
+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->idxmsk[0]; (e)++)
+
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -102,6 +114,8 @@ struct x86_pmu {
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
+ int (*get_event_idx)(struct cpu_hw_events *cpuc,
+ struct hw_perf_event *hwc);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
+static const struct event_constraint *event_constraints;
+
/*
* Not sure about some of these
*/
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
return hw_event & P6_EVNTSEL_MASK;
}
+static const struct event_constraint intel_p6_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT_END
+};
/*
* Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
+static const struct event_constraint intel_core_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ EVENT_CONSTRAINT_END
+};
+
+static const struct event_constraint intel_nehalem_event_constraints[] =
+{
+ EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
+ EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
+ EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
-static const u64 nehalem_hw_cache_event_ids
+static __initconst u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids
},
};
-static const u64 core2_hw_cache_event_ids
+static __initconst u64 core2_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids
},
};
-static const u64 atom_hw_cache_event_ids
+static __initconst u64 atom_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
(CORE_EVNTSEL_EVENT_MASK | \
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
return hw_event & CORE_EVNTSEL_MASK;
}
-static const u64 amd_hw_cache_event_ids
+static __initconst u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+ hwc->idx = -1;
+
/*
* Count user and OS events unless requested not to.
*/
@@ -1229,7 +1286,7 @@ x86_perf_event_set_period(struct perf_event *event,
return 0;
/*
- * If we are way outside a reasoable range then just skip forward:
+ * If we are way outside a reasonable range then just skip forward:
*/
if (unlikely(left <= -period)) {
left = period;
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
x86_pmu_enable_event(hwc, idx);
}
-static int
-fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+static int fixed_mode_idx(struct hw_perf_event *hwc)
{
unsigned int hw_event;
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
if (!x86_pmu.num_events_fixed)
return -1;
+ /*
+ * fixed counters do not take all possible filters
+ */
+ if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
+ return -1;
+
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
}
/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * generic counter allocator: get next free counter
*/
-static int x86_pmu_enable(struct perf_event *event)
+static int
+gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ int idx;
+
+ idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
+ return idx == x86_pmu.num_events ? -1 : idx;
+}
+
+/*
+ * intel-specific counter allocator: check event constraints
+ */
+static int
+intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+ const struct event_constraint *event_constraint;
+ int i, code;
+
+ if (!event_constraints)
+ goto skip;
+
+ code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
+
+ for_each_event_constraint(event_constraint, event_constraints) {
+ if (code == event_constraint->code) {
+ for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_and_set_bit(i, cpuc->used_mask))
+ return i;
+ }
+ return -1;
+ }
+ }
+skip:
+ return gen_get_event_idx(cpuc, hwc);
+}
+
+static int
+x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
int idx;
- idx = fixed_mode_idx(event, hwc);
+ idx = fixed_mode_idx(hwc);
if (idx == X86_PMC_IDX_FIXED_BTS) {
/* BTS is already occupied. */
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
hwc->config_base = 0;
- hwc->event_base = 0;
+ hwc->event_base = 0;
hwc->idx = idx;
} else if (idx >= 0) {
/*
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
} else {
idx = hwc->idx;
/* Try to get the previous generic event again */
- if (test_and_set_bit(idx, cpuc->used_mask)) {
+ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
- idx = find_first_zero_bit(cpuc->used_mask,
- x86_pmu.num_events);
- if (idx == x86_pmu.num_events)
+ idx = x86_pmu.get_event_idx(cpuc, hwc);
+ if (idx == -1)
return -EAGAIN;
set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
}
+ return idx;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ idx = x86_schedule_event(cpuc, hwc);
+ if (idx < 0)
+ return idx;
+
perf_events_lapic_init();
x86_pmu.disable(hwc, idx);
@@ -1520,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
data.period = event->hw.last_period;
data.addr = 0;
+ data.raw = NULL;
regs.ip = 0;
/*
@@ -1637,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
u64 val;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1682,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 ack, status;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1745,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
u64 val;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1852,7 +1968,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.priority = 1
};
-static struct x86_pmu p6_pmu = {
+static __initconst struct x86_pmu p6_pmu = {
.name = "p6",
.handle_irq = p6_pmu_handle_irq,
.disable_all = p6_pmu_disable_all,
@@ -1877,9 +1993,10 @@ static struct x86_pmu p6_pmu = {
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
+ .get_event_idx = intel_get_event_idx,
};
-static struct x86_pmu intel_pmu = {
+static __initconst struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.disable_all = intel_pmu_disable_all,
@@ -1900,9 +2017,10 @@ static struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
+ .get_event_idx = intel_get_event_idx,
};
-static struct x86_pmu amd_pmu = {
+static __initconst struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
@@ -1920,9 +2038,10 @@ static struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
+ .get_event_idx = gen_get_event_idx,
};
-static int p6_pmu_init(void)
+static __init int p6_pmu_init(void)
{
switch (boot_cpu_data.x86_model) {
case 1:
@@ -1932,10 +2051,12 @@ static int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
+ event_constraints = intel_p6_event_constraints;
break;
case 9:
case 13:
/* Pentium M */
+ event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
@@ -1945,16 +2066,10 @@ static int p6_pmu_init(void)
x86_pmu = p6_pmu;
- if (!cpu_has_apic) {
- pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
- pr_info("no hardware sampling interrupt available.\n");
- x86_pmu.apic = 0;
- }
-
return 0;
}
-static int intel_pmu_init(void)
+static __init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
@@ -2007,12 +2122,14 @@ static int intel_pmu_init(void)
sizeof(hw_cache_event_ids));
pr_cont("Core2 events, ");
+ event_constraints = intel_core_event_constraints;
break;
default:
case 26:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
@@ -2025,7 +2142,7 @@ static int intel_pmu_init(void)
return 0;
}
-static int amd_pmu_init(void)
+static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
@@ -2040,6 +2157,16 @@ static int amd_pmu_init(void)
return 0;
}
+static void __init pmu_check_apic(void)
+{
+ if (cpu_has_apic)
+ return;
+
+ x86_pmu.apic = 0;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+}
+
void __init init_hw_perf_events(void)
{
int err;
@@ -2061,6 +2188,8 @@ void __init init_hw_perf_events(void)
return;
}
+ pmu_check_apic();
+
pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
@@ -2105,11 +2234,47 @@ static const struct pmu pmu = {
.unthrottle = x86_pmu_unthrottle,
};
+static int
+validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event fake_event = event->hw;
+
+ if (event->pmu && event->pmu != &pmu)
+ return 0;
+
+ return x86_schedule_event(cpuc, &fake_event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct cpu_hw_events fake_pmu;
+
+ memset(&fake_pmu, 0, sizeof(fake_pmu));
+
+ if (!validate_event(&fake_pmu, leader))
+ return -ENOSPC;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+ if (!validate_event(&fake_pmu, sibling))
+ return -ENOSPC;
+ }
+
+ if (!validate_event(&fake_pmu, event))
+ return -ENOSPC;
+
+ return 0;
+}
+
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err;
err = __hw_perf_event_init(event);
+ if (!err) {
+ if (event->group_leader != event)
+ err = validate_group(event);
+ }
if (err) {
if (event->destroy)
event->destroy(event);
@@ -2132,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_nmi_frame);
+static DEFINE_PER_CPU(int, in_ignored_frame);
static void
@@ -2148,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
- per_cpu(in_nmi_frame, smp_processor_id()) =
- x86_is_stack_id(NMI_STACK, name);
+ per_cpu(in_ignored_frame, smp_processor_id()) =
+ x86_is_stack_id(NMI_STACK, name) ||
+ x86_is_stack_id(DEBUG_STACK, name);
return 0;
}
@@ -2158,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry *entry = data;
- if (per_cpu(in_nmi_frame, smp_processor_id()))
+ if (per_cpu(in_ignored_frame, smp_processor_id()))
return;
if (reliable)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fab786f60ed..898df9719af 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
- boot_cpu_data.x86 != 16)
+ boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
return;
wd_ops = &k7_wd_ops;
break;
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index bb62b3e5caa..28000743bbb 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
early_init_transmeta(c);
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
/* Print CMS and CPU revision */
max = cpuid_eax(0x80860000);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 6a52d4b36a3..7ef24a79699 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -116,21 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file)
{
unsigned int cpu;
struct cpuinfo_x86 *c;
- int ret = 0;
-
- lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- ret = -ENXIO; /* No such CPU */
- goto out;
- }
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO; /* No such CPU */
+
c = &cpu_data(cpu);
if (c->cpuid_level < 0)
- ret = -EIO; /* CPUID not supported */
-out:
- unlock_kernel();
- return ret;
+ return -EIO; /* CPUID not supported */
+
+ return 0;
}
/*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ef42a038f1a..1c47390dd0e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -265,13 +265,13 @@ struct ds_context {
int cpu;
};
-static DEFINE_PER_CPU(struct ds_context *, cpu_context);
+static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
{
struct ds_context **p_context =
- (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
+ (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
struct ds_context *context = NULL;
struct ds_context *new_context = NULL;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2d8a371d433..0a0aa1cec8f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -188,7 +188,7 @@ void dump_stack(void)
}
EXPORT_SYMBOL(dump_stack);
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -207,11 +207,11 @@ unsigned __kprobes long oops_begin(void)
/* racy, but better than risking deadlock. */
raw_local_irq_save(flags);
cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
+ if (!arch_spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
- __raw_spin_lock(&die_lock);
+ arch_spin_lock(&die_lock);
}
die_nest_count++;
die_owner = cpu;
@@ -231,7 +231,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
die_nest_count--;
if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
+ arch_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
oops_exit();
@@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
show_registers(regs);
#ifdef CONFIG_X86_32
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
+ if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
+ } else {
+ sp = kernel_stack_pointer(regs);
+ savesegment(ss, ss);
}
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
print_symbol("%s", regs->ip);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index f7dd2a7c3bf..e0ed4c7abb6 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -10,9 +10,9 @@
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
+#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>
-#include <linux/sysfs.h>
#include <asm/stacktrace.h>
@@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
+
stack = &dummy;
if (task && task != current)
stack = (unsigned long *)task->thread.sp;
@@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- bp = print_context_stack(context, stack, bp, ops,
- data, NULL, &graph);
+ bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index a071e6be177..b13af53883a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -10,26 +10,28 @@
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
+#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>
-#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include "dumpstack.h"
+#define N_EXCEPTION_STACKS_END \
+ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
static char x86_stack_ids[][8] = {
- [DEBUG_STACK - 1] = "#DB",
- [NMI_STACK - 1] = "NMI",
- [DOUBLEFAULT_STACK - 1] = "#DF",
- [STACKFAULT_STACK - 1] = "#SS",
- [MCE_STACK - 1] = "#MC",
+ [ DEBUG_STACK-1 ] = "#DB",
+ [ NMI_STACK-1 ] = "NMI",
+ [ DOUBLEFAULT_STACK-1 ] = "#DF",
+ [ STACKFAULT_STACK-1 ] = "#SS",
+ [ MCE_STACK-1 ] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ...
- N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ [ N_EXCEPTION_STACKS ...
+ N_EXCEPTION_STACKS_END ] = "#DB[?]"
#endif
- };
+};
int x86_is_stack_id(int id, char *name)
{
@@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name)
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
- unsigned *usedp, char **idp)
+ unsigned *usedp, char **idp)
{
unsigned k;
@@ -101,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
return NULL;
}
+static inline int
+in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
+ unsigned long *irq_stack_end)
+{
+ return (stack >= irq_stack && stack < irq_stack_end);
+}
+
+/*
+ * We are returning from the irq stack and go to the previous one.
+ * If the previous stack is also in the irq stack, then bp in the first
+ * frame of the irq stack points to the previous, interrupted one.
+ * Otherwise we have another level of indirection: We first save
+ * the bp of the previous stack, then we switch the stack to the irq one
+ * and save a new bp that links to the previous one.
+ * (See save_args())
+ */
+static inline unsigned long
+fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
+ unsigned long *irq_stack, unsigned long *irq_stack_end)
+{
+#ifdef CONFIG_FRAME_POINTER
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ if (!in_irq_stack(stack, irq_stack, irq_stack_end))
+ return (unsigned long)frame->next_frame;
+#endif
+ return bp;
+}
+
/*
* x86-64 can have up to three kernel stacks:
* process stack
@@ -173,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
- if (stack >= irq_stack && stack < irq_stack_end) {
+ if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
@@ -184,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (irq_stack_end[-1]);
+ bp = fixup_bp_irq_link(bp, stack, irq_stack,
+ irq_stack_end);
irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
@@ -202,21 +235,24 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, unsigned long bp, char *log_lvl)
{
+ unsigned long *irq_stack_end;
+ unsigned long *irq_stack;
unsigned long *stack;
+ int cpu;
int i;
- const int cpu = smp_processor_id();
- unsigned long *irq_stack_end =
- (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
- unsigned long *irq_stack =
- (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+ irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
- * debugging aid: "show_stack(NULL, NULL);" prints the
- * back trace for this cpu.
+ * Debugging aid: "show_stack(NULL, NULL);" prints the
+ * back trace for this cpu:
*/
-
if (sp == NULL) {
if (task)
sp = (unsigned long *)task->thread.sp;
@@ -240,6 +276,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
+ preempt_enable();
+
printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -303,4 +341,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d17d482a04f..f50447d961c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -732,7 +732,16 @@ struct early_res {
char overlap_ok;
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
+#ifdef CONFIG_X86_32
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
+
{}
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c097e7d607c..50b9c220e12 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork)
END(ret_from_fork)
/*
+ * Interrupt exit functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+/*
* Return to user mode is not as complex as all this looks,
* but we want the default path for a system call return to
* go as quickly as possible which is why some of this is
@@ -383,6 +387,10 @@ need_resched:
END(resume_kernel)
#endif
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -513,6 +521,10 @@ sysexit_audit:
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
@@ -705,6 +717,10 @@ syscall_badsys:
jmp resume_userspace
END(syscall_badsys)
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
+/*
+ * Irq entries should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
+/*
+ * End of kprobes section
+ */
+ .popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
@@ -1185,17 +1209,14 @@ END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
- pushl $0
pushl %eax
- pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
- movl %eax, 0xc(%esp)
+ movl %eax, %ecx
popl %edx
- popl %ecx
popl %eax
- ret
+ jmp *%ecx
#endif
.section .rodata,"a"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b5c061f8f35..673f693fb45 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
- movq %rax, 16(%rsp)
+ movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
- addq $16, %rsp
- retq
+ addq $24, %rsp
+ jmp *%rdi
#endif
@@ -803,6 +803,10 @@ END(interrupt)
call \func
.endm
+/*
+ * Interrupt entry/exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_interrupt.
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
CFI_ENDPROC
END(common_interrupt)
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* APIC interrupts.
@@ -969,8 +977,8 @@ apicinterrupt UV_BAU_MESSAGE \
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt GENERIC_INTERRUPT_VECTOR \
- generic_interrupt smp_generic_interrupt
+apicinterrupt X86_PLATFORM_IPI_VECTOR \
+ x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
@@ -1068,10 +1076,10 @@ ENTRY(\sym)
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
- PER_CPU(init_tss, %rbp)
- subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ PER_CPU(init_tss, %r12)
+ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
call \do_sym
- addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
@@ -1491,12 +1499,17 @@ error_kernelspace:
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
- movl %ecx,%ecx /* zero extend */
- cmpq %rcx,RIP+8(%rsp)
- je error_swapgs
+ movl %ecx,%eax /* zero extend */
+ cmpq %rax,RIP+8(%rsp)
+ je bstep_iret
cmpq $gs_change,RIP+8(%rsp)
je error_swapgs
jmp error_sti
+
+bstep_iret:
+ /* Fix truncated RIP */
+ movq %rcx,RIP+8(%rsp)
+ jmp error_swapgs
END(error_entry)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 9dbb527e165..30968924543 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -9,6 +9,8 @@
* the dangers of modifying code on the run.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
@@ -187,9 +189,26 @@ static void wait_for_nmi(void)
nmi_wait_count++;
}
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
static int
do_ftrace_mod_code(unsigned long ip, void *new_code)
{
+ /*
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
+ */
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa(ip));
+
mod_code_ip = (void *)ip;
mod_code_newcode = new_code;
@@ -336,15 +355,15 @@ int __init ftrace_dyn_arch_init(void *data)
switch (faulted) {
case 0:
- pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+ pr_info("converting mcount calls to 0f 1f 44 00 00\n");
memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
- pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+ pr_info("converting mcount calls to 66 66 66 66 90\n");
memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
- pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+ pr_info("converting mcount calls to jmp . + 5\n");
memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
@@ -468,82 +487,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_FTRACE_SYSCALLS
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
extern unsigned long *sys_call_table;
-static struct syscall_metadata **syscalls_metadata;
-
-static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
-{
- struct syscall_metadata *start;
- struct syscall_metadata *stop;
- char str[KSYM_SYMBOL_LEN];
-
-
- start = (struct syscall_metadata *)__start_syscalls_metadata;
- stop = (struct syscall_metadata *)__stop_syscalls_metadata;
- kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
-
- for ( ; start < stop; start++) {
- if (start->name && !strcmp(start->name, str))
- return start;
- }
- return NULL;
-}
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
- if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
- return NULL;
-
- return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
- int i;
-
- if (!syscalls_metadata)
- return -1;
-
- for (i = 0; i < NR_syscalls; i++) {
- if (syscalls_metadata[i]) {
- if (!strcmp(syscalls_metadata[i]->name, name))
- return i;
- }
- }
- return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
- syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
{
- syscalls_metadata[num]->exit_id = id;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
- int i;
- struct syscall_metadata *meta;
- unsigned long **psys_syscall_table = &sys_call_table;
-
- syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
- NR_syscalls, GFP_KERNEL);
- if (!syscalls_metadata) {
- WARN_ON(1);
- return -ENOMEM;
- }
-
- for (i = 0; i < NR_syscalls; i++) {
- meta = find_syscall_meta(psys_syscall_table[i]);
- syscalls_metadata[i] = meta;
- }
- return 0;
+ return (unsigned long)(&sys_call_table)[nr];
}
-arch_initcall(arch_init_ftrace_syscalls);
#endif
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
deleted file mode 100644
index 9b08e852fd1..00000000000
--- a/arch/x86/kernel/geode_32.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * AMD Geode southbridge support code
- * Copyright (C) 2006, Advanced Micro Devices, Inc.
- * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <asm/msr.h>
-#include <asm/geode.h>
-
-static struct {
- char *name;
- u32 msr;
- int size;
- u32 base;
-} lbars[] = {
- { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
- { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
- { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
- { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
-};
-
-static void __init init_lbars(void)
-{
- u32 lo, hi;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(lbars); i++) {
- rdmsr(lbars[i].msr, lo, hi);
- if (hi & 0x01)
- lbars[i].base = lo & 0x0000ffff;
-
- if (lbars[i].base == 0)
- printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
- lbars[i].name);
- }
-}
-
-int geode_get_dev_base(unsigned int dev)
-{
- BUG_ON(dev >= ARRAY_SIZE(lbars));
- return lbars[dev].base;
-}
-EXPORT_SYMBOL_GPL(geode_get_dev_base);
-
-/* === GPIO API === */
-
-void geode_gpio_set(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
-
- if (!base)
- return;
-
- /* low bank register */
- if (gpio & 0xFFFF)
- outl(gpio & 0xFFFF, base + reg);
- /* high bank register */
- gpio >>= 16;
- if (gpio)
- outl(gpio, base + 0x80 + reg);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_set);
-
-void geode_gpio_clear(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
-
- if (!base)
- return;
-
- /* low bank register */
- if (gpio & 0xFFFF)
- outl((gpio & 0xFFFF) << 16, base + reg);
- /* high bank register */
- gpio &= (0xFFFF << 16);
- if (gpio)
- outl(gpio, base + 0x80 + reg);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_clear);
-
-int geode_gpio_isset(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
- u32 val;
-
- if (!base)
- return 0;
-
- /* low bank register */
- if (gpio & 0xFFFF) {
- val = inl(base + reg) & (gpio & 0xFFFF);
- if ((gpio & 0xFFFF) == val)
- return 1;
- }
- /* high bank register */
- gpio >>= 16;
- if (gpio) {
- val = inl(base + 0x80 + reg) & gpio;
- if (gpio == val)
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(geode_gpio_isset);
-
-void geode_gpio_set_irq(unsigned int group, unsigned int irq)
-{
- u32 lo, hi;
-
- if (group > 7 || irq > 15)
- return;
-
- rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
-
- lo &= ~(0xF << (group * 4));
- lo |= (irq & 0xF) << (group * 4);
-
- wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
-
-void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
- u32 offset, shift, val;
-
- if (gpio >= 24)
- offset = GPIO_MAP_W;
- else if (gpio >= 16)
- offset = GPIO_MAP_Z;
- else if (gpio >= 8)
- offset = GPIO_MAP_Y;
- else
- offset = GPIO_MAP_X;
-
- shift = (gpio % 8) * 4;
-
- val = inl(base + offset);
-
- /* Clear whatever was there before */
- val &= ~(0xF << shift);
-
- /* And set the new value */
-
- val |= ((pair & 7) << shift);
-
- /* Set the PME bit if this is a PME event */
-
- if (pme)
- val |= (1 << (shift + 3));
-
- outl(val, base + offset);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
-
-int geode_has_vsa2(void)
-{
- static int has_vsa2 = -1;
-
- if (has_vsa2 == -1) {
- u16 val;
-
- /*
- * The VSA has virtual registers that we can query for a
- * signature.
- */
- outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
- outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
-
- val = inw(VSA_VRC_DATA);
- has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
- }
-
- return has_vsa2;
-}
-EXPORT_SYMBOL_GPL(geode_has_vsa2);
-
-static int __init geode_southbridge_init(void)
-{
- if (!is_geode())
- return -ENODEV;
-
- init_lbars();
- (void) mfgpt_timer_setup();
- return 0;
-}
-
-postcore_initcall(geode_southbridge_init);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 4f8e2507e8f..5051b94c906 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0b06cd778fd..b5a9896ca1e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 050c278481b..7fd318bac59 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -18,6 +18,8 @@
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
+#include <asm/cpufeature.h>
#include <asm/percpu.h>
/* Physical address */
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp)
orl %edx,%eax
movl %eax,%cr4
- btl $5, %eax # check if PAE is enabled
- jnc 6f
+ testb $X86_CR4_PAE, %al # check if PAE is enabled
+ jz 6f
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
- cmpl $0x80000000, %eax
- jbe 6f
+ /* Value must be in the range 0x80000001 to 0x8000ffff */
+ subl $0x80000001, %eax
+ cmpl $(0x8000ffff-0x80000001), %eax
+ ja 6f
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
- btl $20, %edx
+ btl $(X86_FEATURE_NX & 31), %edx
jnc 6f
/* Setup EFER (Extended Feature Enable Register) */
- movl $0xc0000080, %ecx
+ movl $MSR_EFER, %ecx
rdmsr
- btsl $11, %eax
+ btsl $_EFER_NX, %eax
/* Make changes effective */
wrmsr
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 780cd928fcd..2d8b5035371 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64)
*/
lgdt early_gdt_descr(%rip)
- /* set up data segments. actually 0 would do too */
- movl $__KERNEL_DS,%eax
+ /* set up data segments */
+ xorl %eax,%eax
movl %eax,%ds
movl %eax,%ss
movl %eax,%es
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64)
.quad x86_64_start_kernel
ENTRY(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
- __FINITDATA
ENTRY(stack_start)
.quad init_thread_union+THREAD_SIZE-8
.word 0
+ __FINITDATA
bad_address:
jmp bad_address
@@ -340,6 +340,7 @@ ENTRY(name)
i = i + 1 ; \
.endr
+ .data
/*
* This default setting generates an ident mapping at address 0x100000
* and a mapping for the kernel that precisely maps virtual address
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dedc2bddf7a..ba6e6588460 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,6 +33,7 @@
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
unsigned long hpet_address;
+u8 hpet_blockid; /* OS timer block num */
#ifdef CONFIG_PCI_MSI
static unsigned long hpet_num_timers;
#endif
@@ -47,12 +48,12 @@ struct hpet_dev {
char name[10];
};
-unsigned long hpet_readl(unsigned long a)
+inline unsigned int hpet_readl(unsigned int a)
{
return readl(hpet_virt_address + a);
}
-static inline void hpet_writel(unsigned long d, unsigned long a)
+static inline void hpet_writel(unsigned int d, unsigned int a)
{
writel(d, hpet_virt_address + a);
}
@@ -167,7 +168,7 @@ do { \
static void hpet_reserve_msi_timers(struct hpet_data *hd);
-static void hpet_reserve_platform_timers(unsigned long id)
+static void hpet_reserve_platform_timers(unsigned int id)
{
struct hpet __iomem *hpet = hpet_virt_address;
struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
@@ -205,7 +206,7 @@ static void hpet_reserve_platform_timers(unsigned long id)
}
#else
-static void hpet_reserve_platform_timers(unsigned long id) { }
+static void hpet_reserve_platform_timers(unsigned int id) { }
#endif
/*
@@ -246,7 +247,7 @@ static void hpet_reset_counter(void)
static void hpet_start_counter(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -271,7 +272,7 @@ static void hpet_resume_counter(void)
static void hpet_enable_legacy_int(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
@@ -314,7 +315,7 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg, cmp, now;
+ unsigned int cfg, cmp, now;
uint64_t delta;
switch (mode) {
@@ -323,7 +324,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
- cmp = now + (unsigned long) delta;
+ cmp = now + (unsigned int) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
@@ -339,7 +340,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
break;
@@ -383,13 +384,24 @@ static int hpet_next_event(unsigned long delta,
hpet_writel(cnt, HPET_Tn_CMP(timer));
/*
- * We need to read back the CMP register to make sure that
- * what we wrote hit the chip before we compare it to the
- * counter.
+ * We need to read back the CMP register on certain HPET
+ * implementations (ATI chipsets) which seem to delay the
+ * transfer of the compare register into the internal compare
+ * logic. With small deltas this might actually be too late as
+ * the counter could already be higher than the compare value
+ * at that point and we would wait for the next hpet interrupt
+ * forever. We found out that reading the CMP register back
+ * forces the transfer so we can rely on the comparison with
+ * the counter register below. If the read back from the
+ * compare register does not match the value we programmed
+ * then we might have a real hardware problem. We can not do
+ * much about it here, but at least alert the user/admin with
+ * a prominent warning.
*/
- WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
+ WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
+ KERN_WARNING "hpet: compare register read back failed.\n");
- return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
+ return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
static void hpet_legacy_set_mode(enum clock_event_mode mode,
@@ -415,7 +427,7 @@ static struct hpet_dev *hpet_devs;
void hpet_msi_unmask(unsigned int irq)
{
struct hpet_dev *hdev = get_irq_data(irq);
- unsigned long cfg;
+ unsigned int cfg;
/* unmask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
@@ -425,7 +437,7 @@ void hpet_msi_unmask(unsigned int irq)
void hpet_msi_mask(unsigned int irq)
{
- unsigned long cfg;
+ unsigned int cfg;
struct hpet_dev *hdev = get_irq_data(irq);
/* mask it */
@@ -467,7 +479,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
- if (arch_setup_hpet_msi(irq)) {
+ if (arch_setup_hpet_msi(irq, hpet_blockid)) {
destroy_irq(irq);
return -EINVAL;
}
@@ -584,6 +596,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
unsigned int num_timers_used = 0;
int i;
+ if (boot_cpu_has(X86_FEATURE_ARAT))
+ return;
id = hpet_readl(HPET_ID);
num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
@@ -598,7 +612,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
struct hpet_dev *hdev = &hpet_devs[num_timers_used];
- unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+ unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
/* Only consider HPET timer with MSI support */
if (!(cfg & HPET_TN_FSB_CAP))
@@ -813,7 +827,7 @@ static int hpet_clocksource_register(void)
*/
int __init hpet_enable(void)
{
- unsigned long id;
+ unsigned int id;
int i;
if (!is_hpet_capable())
@@ -872,10 +886,8 @@ int __init hpet_enable(void)
if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register();
- hpet_msi_capability_lookup(2);
return 1;
}
- hpet_msi_capability_lookup(0);
return 0;
out_nohpet:
@@ -908,9 +920,17 @@ static __init int hpet_late_init(void)
if (!hpet_virt_address)
return -ENODEV;
+ if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP)
+ hpet_msi_capability_lookup(2);
+ else
+ hpet_msi_capability_lookup(0);
+
hpet_reserve_platform_timers(hpet_readl(HPET_ID));
hpet_print_config();
+ if (boot_cpu_has(X86_FEATURE_ARAT))
+ return 0;
+
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
}
@@ -925,7 +945,7 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
if (is_hpet_capable()) {
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
@@ -965,8 +985,8 @@ static int hpet_prev_update_sec;
static struct rtc_time hpet_alarm_time;
static unsigned long hpet_pie_count;
static u32 hpet_t1_cmp;
-static unsigned long hpet_default_delta;
-static unsigned long hpet_pie_delta;
+static u32 hpet_default_delta;
+static u32 hpet_pie_delta;
static unsigned long hpet_pie_limit;
static rtc_irq_handler irq_handler;
@@ -1017,7 +1037,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
*/
int hpet_rtc_timer_init(void)
{
- unsigned long cfg, cnt, delta, flags;
+ unsigned int cfg, cnt, delta;
+ unsigned long flags;
if (!is_hpet_enabled())
return 0;
@@ -1027,7 +1048,7 @@ int hpet_rtc_timer_init(void)
clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
- hpet_default_delta = (unsigned long) clc;
+ hpet_default_delta = clc;
}
if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
@@ -1113,7 +1134,7 @@ int hpet_set_periodic_freq(unsigned long freq)
clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
do_div(clc, freq);
clc >>= hpet_clockevent.shift;
- hpet_pie_delta = (unsigned long) clc;
+ hpet_pie_delta = clc;
}
return 1;
}
@@ -1127,7 +1148,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
static void hpet_rtc_timer_reinit(void)
{
- unsigned long cfg, delta;
+ unsigned int cfg, delta;
int lost_ints = -1;
if (unlikely(!hpet_rtc_flags)) {
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 00000000000..05d5fec64a9
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,554 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ * Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, cpu_dr7);
+EXPORT_PER_CPU_SYMBOL(cpu_dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
+
+
+static inline unsigned long
+__encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+ unsigned long bp_info;
+
+ bp_info = (len | type) & 0xf;
+ bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+ bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
+
+ return bp_info;
+}
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+ return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
+}
+
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
+{
+ int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (bp_info & 0xc) | 0x40;
+ *type = (bp_info & 0x3) | 0x80;
+
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return -EBUSY;
+
+ set_debugreg(info->address, i);
+ __get_cpu_var(cpu_debugreg[i]) = info->address;
+
+ dr7 = &__get_cpu_var(cpu_dr7);
+ *dr7 |= encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return;
+
+ dr7 = &__get_cpu_var(cpu_dr7);
+ *dr7 &= ~__encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+ unsigned int len_in_bytes = 0;
+
+ switch (hbp_len) {
+ case X86_BREAKPOINT_LEN_1:
+ len_in_bytes = 1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ len_in_bytes = 2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ len_in_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ len_in_bytes = 8;
+ break;
+#endif
+ }
+ return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ /*
+ * For kernel-addresses, either the address or symbol name can be
+ * specified.
+ */
+ if (info->name)
+ info->address = (unsigned long)
+ kallsyms_lookup_name(info->name);
+ if (info->address)
+ return 0;
+
+ return -EINVAL;
+}
+
+int arch_bp_generic_fields(int x86_len, int x86_type,
+ int *gen_len, int *gen_type)
+{
+ /* Len */
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ *gen_len = HW_BREAKPOINT_LEN_1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ *gen_len = HW_BREAKPOINT_LEN_2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ *gen_len = HW_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ *gen_len = HW_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (x86_type) {
+ case X86_BREAKPOINT_EXECUTE:
+ *gen_type = HW_BREAKPOINT_X;
+ break;
+ case X86_BREAKPOINT_WRITE:
+ *gen_type = HW_BREAKPOINT_W;
+ break;
+ case X86_BREAKPOINT_RW:
+ *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ info->address = bp->attr.bp_addr;
+
+ /* Len */
+ switch (bp->attr.bp_len) {
+ case HW_BREAKPOINT_LEN_1:
+ info->len = X86_BREAKPOINT_LEN_1;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ info->len = X86_BREAKPOINT_LEN_2;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ info->len = X86_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ info->len = X86_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+ struct task_struct *tsk)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned int align;
+ int ret;
+
+
+ ret = arch_build_bp_info(bp);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+
+ if (info->type == X86_BREAKPOINT_EXECUTE)
+ /*
+ * Ptrace-refactoring code
+ * For now, we'll allow instruction breakpoint only for user-space
+ * addresses
+ */
+ if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+ info->len != X86_BREAKPOINT_EXECUTE)
+ return ret;
+
+ switch (info->len) {
+ case X86_BREAKPOINT_LEN_1:
+ align = 0;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ align = 1;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ align = 3;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ align = 7;
+ break;
+#endif
+ default:
+ return ret;
+ }
+
+ ret = arch_store_info(bp);
+
+ if (ret < 0)
+ return ret;
+ /*
+ * Check that the low-order bits of the address are appropriate
+ * for the alignment implied by len.
+ */
+ if (info->address & align)
+ return -EINVAL;
+
+ /* Check that the virtual address is in the proper range */
+ if (tsk) {
+ if (!arch_check_va_in_userspace(info->address, info->len))
+ return -EFAULT;
+ } else {
+ if (!arch_check_va_in_kernelspace(info->address, info->len))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Dump the debug register contents to the user.
+ * We can't dump our per cpu values because it
+ * may contain cpu wide breakpoint, something that
+ * doesn't belong to the current task.
+ *
+ * TODO: include non-ptrace user breakpoints (perf)
+ */
+void aout_dump_debugregs(struct user *dump)
+{
+ int i;
+ int dr7 = 0;
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
+ struct thread_struct *thread = &current->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ bp = thread->ptrace_bps[i];
+
+ if (bp && !bp->attr.disabled) {
+ dump->u_debugreg[i] = bp->attr.bp_addr;
+ info = counter_arch_bp(bp);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ } else {
+ dump->u_debugreg[i] = 0;
+ }
+ }
+
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+
+ dump->u_debugreg[7] = dr7;
+}
+EXPORT_SYMBOL_GPL(aout_dump_debugregs);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *t = &tsk->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
+}
+
+void hw_breakpoint_restore(void)
+{
+ set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+ set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+ set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+ set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+ set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(__get_cpu_var(cpu_dr7), 7);
+}
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
+ *
+ * NOTIFY_DONE returned if one of the following conditions is true.
+ * i) When the causative address is from user-space and the exception
+ * is a valid one, i.e. not triggered as a result of lazy debug register
+ * switching
+ * ii) When there are more bits than trap<n> set in DR6 register (such
+ * as BD, BS or BT) indicating that more than one debug condition is
+ * met and requires some more action in do_debug().
+ *
+ * NOTIFY_STOP returned for all other cases
+ *
+ */
+static int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int i, cpu, rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ unsigned long dr7, dr6;
+ unsigned long *dr6_p;
+
+ /* The DR6 value is pointed by args->err */
+ dr6_p = (unsigned long *)ERR_PTR(args->err);
+ dr6 = *dr6_p;
+
+ /* Do an early return if no trap bits are set in DR6 */
+ if ((dr6 & DR_TRAP_BITS) == 0)
+ return NOTIFY_DONE;
+
+ get_debugreg(dr7, 7);
+ /* Disable breakpoints during exception handling */
+ set_debugreg(0UL, 7);
+ /*
+ * Assert that local interrupts are disabled
+ * Reset the DRn bits in the virtualized register value.
+ * The ptrace trigger routine will add in whatever is needed.
+ */
+ current->thread.debugreg6 &= ~DR_TRAP_BITS;
+ cpu = get_cpu();
+
+ /* Handle all the breakpoints that were triggered */
+ for (i = 0; i < HBP_NUM; ++i) {
+ if (likely(!(dr6 & (DR_TRAP0 << i))))
+ continue;
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = per_cpu(bp_per_reg[i], cpu);
+ if (bp)
+ rc = NOTIFY_DONE;
+ /*
+ * Reset the 'i'th TRAP bit in dr6 to denote completion of
+ * exception handling
+ */
+ (*dr6_p) &= ~(DR_TRAP0 << i);
+ /*
+ * bp can be NULL due to lazy debug register switching
+ * or due to concurrent perf counter removing.
+ */
+ if (!bp) {
+ rcu_read_unlock();
+ break;
+ }
+
+ perf_bp_event(bp, args->regs);
+
+ rcu_read_unlock();
+ }
+ if (dr6 & (~DR_TRAP_BITS))
+ rc = NOTIFY_DONE;
+
+ set_debugreg(dr7, 7);
+ put_cpu();
+
+ return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ if (val != DIE_DEBUG)
+ return NOTIFY_DONE;
+
+ return hw_breakpoint_handler(data);
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 04bbd527856..91fd0c70a18 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,7 +18,7 @@
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
-void (*generic_interrupt_extension)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) = NULL;
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
@@ -72,10 +72,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
seq_printf(p, " Performance pending work\n");
#endif
- if (generic_interrupt_extension) {
+ if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+ seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
seq_printf(p, " Platform interrupts\n");
}
#ifdef CONFIG_SMP
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
-# endif
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc)
return 0;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
for_each_online_cpu(j)
any_count |= kstat_irqs_cpu(i, j);
action = desc->action;
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
out:
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -187,18 +187,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs;
#endif
- if (generic_interrupt_extension)
- sum += irq_stats(cpu)->generic_irqs;
+ if (x86_platform_ipi_callback)
+ sum += irq_stats(cpu)->x86_platform_ipis;
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
sum += irq_stats(cpu)->irq_tlb_count;
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_MCE_THRESHOLD
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
-# endif
#endif
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
@@ -251,9 +251,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
}
/*
- * Handler for GENERIC_INTERRUPT_VECTOR.
+ * Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void smp_generic_interrupt(struct pt_regs *regs)
+void smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -263,10 +263,10 @@ void smp_generic_interrupt(struct pt_regs *regs)
irq_enter();
- inc_irq_stat(generic_irqs);
+ inc_irq_stat(x86_platform_ipis);
- if (generic_interrupt_extension)
- generic_interrupt_extension();
+ if (x86_platform_ipi_callback)
+ x86_platform_ipi_callback();
irq_exit();
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs)
}
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
+{
+ unsigned int irq, vector;
+ static int warned;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(irq, desc) {
+ int break_affinity = 0;
+ int set_affinity = 1;
+ const struct cpumask *affinity;
+
+ if (!desc)
+ continue;
+ if (irq == 2)
+ continue;
+
+ /* interrupt's are disabled at this point */
+ raw_spin_lock(&desc->lock);
+
+ affinity = desc->affinity;
+ if (!irq_has_action(irq) ||
+ cpumask_equal(affinity, cpu_online_mask)) {
+ raw_spin_unlock(&desc->lock);
+ continue;
+ }
+
+ /*
+ * Complete the irq move. This cpu is going down and for
+ * non intr-remapping case, we can't wait till this interrupt
+ * arrives at this cpu before completing the irq move.
+ */
+ irq_force_complete_move(irq);
+
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ break_affinity = 1;
+ affinity = cpu_all_mask;
+ }
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
+ desc->chip->mask(irq);
+
+ if (desc->chip->set_affinity)
+ desc->chip->set_affinity(irq, affinity);
+ else if (!(warned++))
+ set_affinity = 0;
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
+ desc->chip->unmask(irq);
+
+ raw_spin_unlock(&desc->lock);
+
+ if (break_affinity && set_affinity)
+ printk("Broke affinity for irq %i\n", irq);
+ else if (!set_affinity)
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ /*
+ * We can remove mdelay() and then send spuriuous interrupts to
+ * new cpu targets for all the irqs that were handled previously by
+ * this cpu. While it works, I have seen spurious interrupt messages
+ * (nothing wrong but still...).
+ *
+ * So for now, retain mdelay(1) and check the IRR and then send those
+ * interrupts to new targets as this cpu is already offlined...
+ */
+ mdelay(1);
+
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+ unsigned int irr;
+
+ if (__get_cpu_var(vector_irq)[vector] < 0)
+ continue;
+
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ if (irr & (1 << (vector % 32))) {
+ irq = __get_cpu_var(vector_irq)[vector];
+
+ desc = irq_to_desc(irq);
+ raw_spin_lock(&desc->lock);
+ if (desc->chip->retrigger)
+ desc->chip->retrigger(irq);
+ raw_spin_unlock(&desc->lock);
+ }
+ }
+}
+#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 7d35d0fe232..10709f29d16 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
-void fixup_irqs(void)
-{
- unsigned int irq;
- struct irq_desc *desc;
-
- for_each_irq_desc(irq, desc) {
- const struct cpumask *affinity;
-
- if (!desc)
- continue;
- if (irq == 2)
- continue;
-
- affinity = desc->affinity;
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- printk("Breaking affinity for irq %i\n", irq);
- affinity = cpu_all_mask;
- }
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (desc->action)
- printk_once("Cannot set affinity for irq %i\n", irq);
- }
-
-#if 0
- barrier();
- /* Ingo Molnar says: "after the IO-APIC masks have been redirected
- [note the nop - the interrupt-enable boundary on x86 is two
- instructions from sti] - to flush out pending hardirqs and
- IPIs. After this point nothing is supposed to reach this CPU." */
- __asm__ __volatile__("sti; nop; cli");
- barrier();
-#else
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-#endif
-}
-#endif
-
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 977d8b43a0d..acf8fbf8fbd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-#ifdef CONFIG_HOTPLUG_CPU
-/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
-void fixup_irqs(void)
-{
- unsigned int irq;
- static int warned;
- struct irq_desc *desc;
-
- for_each_irq_desc(irq, desc) {
- int break_affinity = 0;
- int set_affinity = 1;
- const struct cpumask *affinity;
-
- if (!desc)
- continue;
- if (irq == 2)
- continue;
-
- /* interrupt's are disabled at this point */
- spin_lock(&desc->lock);
-
- affinity = desc->affinity;
- if (!irq_has_action(irq) ||
- cpumask_equal(affinity, cpu_online_mask)) {
- spin_unlock(&desc->lock);
- continue;
- }
-
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- break_affinity = 1;
- affinity = cpu_all_mask;
- }
-
- if (desc->chip->mask)
- desc->chip->mask(irq);
-
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (!(warned++))
- set_affinity = 0;
-
- if (desc->chip->unmask)
- desc->chip->unmask(irq);
-
- spin_unlock(&desc->lock);
-
- if (break_affinity && set_affinity)
- printk("Broke affinity for irq %i\n", irq);
- else if (!set_affinity)
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
- /* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
-}
-#endif
extern void call_softirq(void);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 40f30773fb2..d5932226614 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -200,8 +200,8 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
- /* generic IPI for platform specific use */
- alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+ /* IPI for X86 platform specific use */
+ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3..dd74fe7273b 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
#include <linux/smp.h>
#include <linux/nmi.h>
+#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/system.h>
@@ -85,10 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_DS] = regs->ds;
gdb_regs[GDB_ES] = regs->es;
gdb_regs[GDB_CS] = regs->cs;
- gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
- gdb_regs[GDB_SP] = (int)&regs->sp;
+ if (user_mode_vm(regs)) {
+ gdb_regs[GDB_SS] = regs->ss;
+ gdb_regs[GDB_SP] = regs->sp;
+ } else {
+ gdb_regs[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
+ }
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -101,7 +107,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
- gdb_regs[GDB_SP] = regs->sp;
+ gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
#endif
}
@@ -220,8 +226,7 @@ static void kgdb_correct_hw_break(void)
dr7 |= ((breakinfo[breakno].len << 2) |
breakinfo[breakno].type) <<
((breakno << 2) + 16);
- if (breakno >= 0 && breakno <= 3)
- set_debugreg(breakinfo[breakno].addr, breakno);
+ set_debugreg(breakinfo[breakno].addr, breakno);
} else {
if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
@@ -395,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
/* set the trace bit if we're stepping */
if (remcomInBuffer[0] == 's') {
linux_regs->flags |= X86_EFLAGS_TF;
- kgdb_single_step = 1;
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
}
@@ -434,6 +438,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b00..5b8c7505b3b 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,31 +48,22 @@
#include <linux/preempt.h>
#include <linux/module.h>
#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#ifdef CONFIG_X86_64
-#define stack_addr(regs) ((unsigned long *)regs->sp)
-#else
-/*
- * "&regs->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
- * don't save the ss and esp registers if the CPU is already in kernel
- * mode when it traps. So for kprobes, regs->sp and regs->ss are not
- * the [nonexistent] saved stack pointer and ss register, but rather
- * the top 8 bytes of the pre-int3 stack. So &regs->sp happens to
- * point to the top of the pre-int3 stack.
- */
-#define stack_addr(regs) ((unsigned long *)&regs->sp)
-#endif
+#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -106,50 +97,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
-static const u32 onebyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
- W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
- W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
- W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
- W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
- W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
- W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
- W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
- W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
- W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
- W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
- W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
- W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
- W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
- W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
- W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
- W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
- W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
- W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
- W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
- W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
- W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
- W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
- W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
- W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
- W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
- W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
- W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
- W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
#undef W
struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +191,75 @@ retry:
}
}
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct kprobe *kp;
+ kp = get_kprobe((void *)addr);
+ if (!kp)
+ return -EINVAL;
+
+ /*
+ * Basically, kp->ainsn.insn has an original instruction.
+ * However, RIP-relative instruction can not do single-stepping
+ * at different place, fix_riprel() tweaks the displacement of
+ * that instruction. In that case, we can't recover the instruction
+ * from the kp->ainsn.insn.
+ *
+ * On the other hand, kp->opcode has a copy of the first byte of
+ * the probed instruction, which is overwritten by int3. And
+ * the instruction at kp->addr is not modified by kprobes except
+ * for the first byte, we can recover the original instruction
+ * from it and kp->opcode.
+ */
+ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ buf[0] = kp->opcode;
+ return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+ int ret;
+ unsigned long addr, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr) {
+ kernel_insn_init(&insn, (void *)addr);
+ insn_get_opcode(&insn);
+
+ /*
+ * Check if the instruction has been modified by another
+ * kprobe, in which case we replace the breakpoint by the
+ * original instruction in our buffer.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf, addr);
+ if (ret)
+ /*
+ * Another debugging subsystem might insert
+ * this breakpoint. In that case, we can't
+ * recover it.
+ */
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ insn_get_length(&insn);
+ addr += insn.length;
+ }
+
+ return (addr == paddr);
+}
+
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
@@ -277,68 +293,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
static void __kprobes fix_riprel(struct kprobe *p)
{
#ifdef CONFIG_X86_64
- u8 *insn = p->ainsn.insn;
- s64 disp;
- int need_modrm;
-
- /* Skip legacy instruction prefixes. */
- while (1) {
- switch (*insn) {
- case 0x66:
- case 0x67:
- case 0x2e:
- case 0x3e:
- case 0x26:
- case 0x64:
- case 0x65:
- case 0x36:
- case 0xf0:
- case 0xf3:
- case 0xf2:
- ++insn;
- continue;
- }
- break;
- }
+ struct insn insn;
+ kernel_insn_init(&insn, p->ainsn.insn);
- /* Skip REX instruction prefix. */
- if (is_REX_prefix(insn))
- ++insn;
-
- if (*insn == 0x0f) {
- /* Two-byte opcode. */
- ++insn;
- need_modrm = test_bit(*insn,
- (unsigned long *)twobyte_has_modrm);
- } else
- /* One-byte opcode. */
- need_modrm = test_bit(*insn,
- (unsigned long *)onebyte_has_modrm);
-
- if (need_modrm) {
- u8 modrm = *++insn;
- if ((modrm & 0xc7) == 0x05) {
- /* %rip+disp32 addressing mode */
- /* Displacement follows ModRM byte. */
- ++insn;
- /*
- * The copied instruction uses the %rip-relative
- * addressing mode. Adjust the displacement for the
- * difference between the original location of this
- * instruction and the location of the copy that will
- * actually be run. The tricky bit here is making sure
- * that the sign extension happens correctly in this
- * calculation, since we need a signed 32-bit result to
- * be sign-extended to 64 bits when it's added to the
- * %rip value and yield the same 64-bit result that the
- * sign-extension of the original signed 32-bit
- * displacement would have given.
- */
- disp = (u8 *) p->addr + *((s32 *) insn) -
- (u8 *) p->ainsn.insn;
- BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
- *(s32 *)insn = (s32) disp;
- }
+ if (insn_rip_relative(&insn)) {
+ s64 newdisp;
+ u8 *disp;
+ insn_get_displacement(&insn);
+ /*
+ * The copied instruction uses the %rip-relative addressing
+ * mode. Adjust the displacement for the difference between
+ * the original location of this instruction and the location
+ * of the copy that will actually be run. The tricky bit here
+ * is making sure that the sign extension happens correctly in
+ * this calculation, since we need a signed 32-bit result to
+ * be sign-extended to 64 bits when it's added to the %rip
+ * value and yield the same 64-bit result that the sign-
+ * extension of the original signed 32-bit displacement would
+ * have given.
+ */
+ newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
+ (u8 *) p->ainsn.insn;
+ BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
+ disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+ *(s32 *) disp = (s32) newdisp;
}
#endif
}
@@ -359,6 +337,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
+ if (!can_probe((unsigned long)p->addr))
+ return -EILSEQ;
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -472,17 +452,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
-#ifdef CONFIG_X86_64
- /* TODO: Provide re-entrancy from post_kprobes_handler() and
- * avoid exception stack corruption while single-stepping on
- * the instruction of the new probe.
- */
- arch_disarm_kprobe(p);
- regs->ip = (unsigned long)p->addr;
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
-#endif
case KPROBE_HIT_ACTIVE:
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
@@ -491,18 +460,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_status = KPROBE_REENTER;
break;
case KPROBE_HIT_SS:
- if (p == kprobe_running()) {
- regs->flags &= ~X86_EFLAGS_TF;
- regs->flags |= kcb->kprobe_saved_flags;
- return 0;
- } else {
- /* A probe has been hit in the codepath leading up
- * to, or just after, single-stepping of a probed
- * instruction. This entire codepath should strictly
- * reside in .kprobes.text section. Raise a warning
- * to highlight this peculiar case.
- */
- }
+ /* A probe has been hit in the codepath leading up to, or just
+ * after, single-stepping of a probed instruction. This entire
+ * codepath should strictly reside in .kprobes.text section.
+ * Raise a BUG or we'll continue in an endless reentering loop
+ * and eventually a stack overflow.
+ */
+ printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
+ p->addr);
+ dump_kprobe(p);
+ BUG();
default:
/* impossible cases */
WARN_ON(1);
@@ -514,7 +481,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
+ * remain disabled throughout this function.
*/
static int __kprobes kprobe_handler(struct pt_regs *regs)
{
@@ -851,7 +818,7 @@ no_change:
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.
+ * remain disabled throughout this function.
*/
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
@@ -967,8 +934,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(args->regs)) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
+ }
break;
case DIE_GPF:
/*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d0013..a3fa43ba5d3 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
+#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
@@ -157,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image)
{
int error;
- if (nx_enabled)
- set_pages_x(image->control_code_page, 1);
+ set_pages_x(image->control_code_page, 1);
error = machine_kexec_alloc_page_tables(image);
if (error)
return error;
@@ -172,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
- if (nx_enabled)
- set_pages_nx(image->control_code_page, 1);
+ set_pages_nx(image->control_code_page, 1);
machine_kexec_free_page_tables(image);
}
@@ -202,6 +201,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e9..4a8bb82248a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
deleted file mode 100644
index 2a62d843f01..00000000000
--- a/arch/x86/kernel/mfgpt_32.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT)
- *
- * Copyright (C) 2006, Advanced Micro Devices, Inc.
- * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
- */
-
-/*
- * We are using the 32.768kHz input clock - it's the only one that has the
- * ranges we find desirable. The following table lists the suitable
- * divisors and the associated Hz, minimum interval and the maximum interval:
- *
- * Divisor Hz Min Delta (s) Max Delta (s)
- * 1 32768 .00048828125 2.000
- * 2 16384 .0009765625 4.000
- * 4 8192 .001953125 8.000
- * 8 4096 .00390625 16.000
- * 16 2048 .0078125 32.000
- * 32 1024 .015625 64.000
- * 64 512 .03125 128.000
- * 128 256 .0625 256.000
- * 256 128 .125 512.000
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <asm/geode.h>
-
-#define MFGPT_DEFAULT_IRQ 7
-
-static struct mfgpt_timer_t {
- unsigned int avail:1;
-} mfgpt_timers[MFGPT_MAX_TIMERS];
-
-/* Selected from the table above */
-
-#define MFGPT_DIVISOR 16
-#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
-#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
-#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
-
-/* Allow for disabling of MFGPTs */
-static int disable;
-static int __init mfgpt_disable(char *s)
-{
- disable = 1;
- return 1;
-}
-__setup("nomfgpt", mfgpt_disable);
-
-/* Reset the MFGPT timers. This is required by some broken BIOSes which already
- * do the same and leave the system in an unstable state. TinyBIOS 0.98 is
- * affected at least (0.99 is OK with MFGPT workaround left to off).
- */
-static int __init mfgpt_fix(char *s)
-{
- u32 val, dummy;
-
- /* The following udocumented bit resets the MFGPT timers */
- val = 0xFF; dummy = 0;
- wrmsr(MSR_MFGPT_SETUP, val, dummy);
- return 1;
-}
-__setup("mfgptfix", mfgpt_fix);
-
-/*
- * Check whether any MFGPTs are available for the kernel to use. In most
- * cases, firmware that uses AMD's VSA code will claim all timers during
- * bootup; we certainly don't want to take them if they're already in use.
- * In other cases (such as with VSAless OpenFirmware), the system firmware
- * leaves timers available for us to use.
- */
-
-
-static int timers = -1;
-
-static void geode_mfgpt_detect(void)
-{
- int i;
- u16 val;
-
- timers = 0;
-
- if (disable) {
- printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
- goto done;
- }
-
- if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
- printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
- goto done;
- }
-
- for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
- if (!(val & MFGPT_SETUP_SETUP)) {
- mfgpt_timers[i].avail = 1;
- timers++;
- }
- }
-
-done:
- printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
-}
-
-int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
-{
- u32 msr, mask, value, dummy;
- int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
-
- if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
- return -EIO;
-
- /*
- * The register maps for these are described in sections 6.17.1.x of
- * the AMD Geode CS5536 Companion Device Data Book.
- */
- switch (event) {
- case MFGPT_EVENT_RESET:
- /*
- * XXX: According to the docs, we cannot reset timers above
- * 6; that is, resets for 7 and 8 will be ignored. Is this
- * a problem? -dilinger
- */
- msr = MSR_MFGPT_NR;
- mask = 1 << (timer + 24);
- break;
-
- case MFGPT_EVENT_NMI:
- msr = MSR_MFGPT_NR;
- mask = 1 << (timer + shift);
- break;
-
- case MFGPT_EVENT_IRQ:
- msr = MSR_MFGPT_IRQ;
- mask = 1 << (timer + shift);
- break;
-
- default:
- return -EIO;
- }
-
- rdmsr(msr, value, dummy);
-
- if (enable)
- value |= mask;
- else
- value &= ~mask;
-
- wrmsr(msr, value, dummy);
- return 0;
-}
-EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
-
-int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
-{
- u32 zsel, lpc, dummy;
- int shift;
-
- if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
- return -EIO;
-
- /*
- * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
- * is using the same CMP of the timer's Siamese twin, the IRQ is set to
- * 2, and we mustn't use nor change it.
- * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
- * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
- * with *irq==0 is safe. Currently there _are_ no 2 drivers.
- */
- rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
- shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
- if (((zsel >> shift) & 0xF) == 2)
- return -EIO;
-
- /* Choose IRQ: if none supplied, keep IRQ already set or use default */
- if (!*irq)
- *irq = (zsel >> shift) & 0xF;
- if (!*irq)
- *irq = MFGPT_DEFAULT_IRQ;
-
- /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
- if (*irq < 1 || *irq == 2 || *irq > 15)
- return -EIO;
- rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
- if (lpc & (1 << *irq))
- return -EIO;
-
- /* All chosen and checked - go for it */
- if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
- return -EIO;
- if (enable) {
- zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
- wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
- }
-
- return 0;
-}
-
-static int mfgpt_get(int timer)
-{
- mfgpt_timers[timer].avail = 0;
- printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
- return timer;
-}
-
-int geode_mfgpt_alloc_timer(int timer, int domain)
-{
- int i;
-
- if (timers == -1) {
- /* timers haven't been detected yet */
- geode_mfgpt_detect();
- }
-
- if (!timers)
- return -1;
-
- if (timer >= MFGPT_MAX_TIMERS)
- return -1;
-
- if (timer < 0) {
- /* Try to find an available timer */
- for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- if (mfgpt_timers[i].avail)
- return mfgpt_get(i);
-
- if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
- break;
- }
- } else {
- /* If they requested a specific timer, try to honor that */
- if (mfgpt_timers[timer].avail)
- return mfgpt_get(timer);
- }
-
- /* No timers available - too bad */
- return -1;
-}
-EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
-
-
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-
-/*
- * The MFPGT timers on the CS5536 provide us with suitable timers to use
- * as clock event sources - not as good as a HPET or APIC, but certainly
- * better than the PIT. This isn't a general purpose MFGPT driver, but
- * a simplified one designed specifically to act as a clock event source.
- * For full details about the MFGPT, please consult the CS5536 data sheet.
- */
-
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-
-static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
-static u16 mfgpt_event_clock;
-
-static int irq;
-static int __init mfgpt_setup(char *str)
-{
- get_option(&str, &irq);
- return 1;
-}
-__setup("mfgpt_irq=", mfgpt_setup);
-
-static void mfgpt_disable_timer(u16 clock)
-{
- /* avoid races by clearing CMP1 and CMP2 unconditionally */
- geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
- MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
-}
-
-static int mfgpt_next_event(unsigned long, struct clock_event_device *);
-static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *);
-
-static struct clock_event_device mfgpt_clockevent = {
- .name = "mfgpt-timer",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
- .set_mode = mfgpt_set_mode,
- .set_next_event = mfgpt_next_event,
- .rating = 250,
- .cpumask = cpu_all_mask,
- .shift = 32
-};
-
-static void mfgpt_start_timer(u16 delta)
-{
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
-
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
- MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
-}
-
-static void mfgpt_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- mfgpt_disable_timer(mfgpt_event_clock);
-
- if (mode == CLOCK_EVT_MODE_PERIODIC)
- mfgpt_start_timer(MFGPT_PERIODIC);
-
- mfgpt_tick_mode = mode;
-}
-
-static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
-{
- mfgpt_start_timer(delta);
- return 0;
-}
-
-static irqreturn_t mfgpt_tick(int irq, void *dev_id)
-{
- u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
-
- /* See if the interrupt was for us */
- if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
- return IRQ_NONE;
-
- /* Turn off the clock (and clear the event) */
- mfgpt_disable_timer(mfgpt_event_clock);
-
- if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN)
- return IRQ_HANDLED;
-
- /* Clear the counter */
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
-
- /* Restart the clock in periodic mode */
-
- if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) {
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
- MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
- }
-
- mfgpt_clockevent.event_handler(&mfgpt_clockevent);
- return IRQ_HANDLED;
-}
-
-static struct irqaction mfgptirq = {
- .handler = mfgpt_tick,
- .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
- .name = "mfgpt-timer"
-};
-
-int __init mfgpt_timer_setup(void)
-{
- int timer, ret;
- u16 val;
-
- timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
- if (timer < 0) {
- printk(KERN_ERR
- "mfgpt-timer: Could not allocate a MFPGT timer\n");
- return -ENODEV;
- }
-
- mfgpt_event_clock = timer;
-
- /* Set up the IRQ on the MFGPT side */
- if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
- printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
- return -EIO;
- }
-
- /* And register it with the kernel */
- ret = setup_irq(irq, &mfgptirq);
-
- if (ret) {
- printk(KERN_ERR
- "mfgpt-timer: Unable to set up the interrupt.\n");
- goto err;
- }
-
- /* Set the clock scale and enable the event mode for CMP2 */
- val = MFGPT_SCALE | (3 << 8);
-
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
-
- /* Set up the clock event */
- mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
- mfgpt_clockevent.shift);
- mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
- &mfgpt_clockevent);
- mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
- &mfgpt_clockevent);
-
- printk(KERN_INFO
- "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
- timer, irq);
- clockevents_register_device(&mfgpt_clockevent);
-
- return 0;
-
-err:
- geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
- printk(KERN_ERR
- "mfgpt-timer: Unable to set up the MFGPT clock source\n");
- return -EIO;
-}
-
-#endif
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index f4c538b681c..37542b67c57 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,6 +13,9 @@
* Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/firmware.h>
#include <linux/pci_ids.h>
#include <linux/uaccess.h>
@@ -33,6 +36,9 @@ MODULE_LICENSE("GPL v2");
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
+const struct firmware *firmware;
+static int supported_cpu;
+
struct equiv_cpu_entry {
u32 installed_cpu;
u32 fixed_errata_mask;
@@ -71,17 +77,14 @@ static struct equiv_cpu_entry *equiv_cpu_table;
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
- if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ if (!supported_cpu)
return -1;
- }
+
+ memset(csig, 0, sizeof(*csig));
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
return 0;
}
@@ -103,23 +106,16 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
i++;
}
- if (!equiv_cpu_id) {
- printk(KERN_WARNING "microcode: CPU%d: cpu revision "
- "not listed in equivalent cpu table\n", cpu);
+ if (!equiv_cpu_id)
return 0;
- }
- if (mc_header->processor_rev_id != equiv_cpu_id) {
- printk(KERN_ERR "microcode: CPU%d: patch mismatch "
- "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
- cpu, mc_header->processor_rev_id, equiv_cpu_id);
+ if (mc_header->processor_rev_id != equiv_cpu_id)
return 0;
- }
/* ucode might be chipset specific -- currently we don't support this */
if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- printk(KERN_ERR "microcode: CPU%d: loading of chipset "
- "specific code not yet supported\n", cpu);
+ pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ cpu);
return 0;
}
@@ -148,14 +144,12 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- printk(KERN_ERR "microcode: CPU%d: update failed "
- "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
+ pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ cpu, mc_amd->hdr.patch_id);
return -1;
}
- printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
- cpu, rev);
-
+ pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
@@ -178,18 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
return NULL;
if (section_hdr[0] != UCODE_UCODE_TYPE) {
- printk(KERN_ERR "microcode: error: invalid type field in "
- "container file section header\n");
+ pr_err("error: invalid type field in container file section header\n");
return NULL;
}
total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
- printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
- size, total_size);
-
if (total_size > size || total_size > UCODE_MAX_SIZE) {
- printk(KERN_ERR "microcode: error: size mismatch\n");
+ pr_err("error: size mismatch\n");
return NULL;
}
@@ -218,15 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf)
size = buf_pos[2];
if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- printk(KERN_ERR "microcode: error: invalid type field in "
- "container file section header\n");
+ pr_err("error: invalid type field in container file section header\n");
return 0;
}
equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
if (!equiv_cpu_table) {
- printk(KERN_ERR "microcode: failed to allocate "
- "equivalent CPU table\n");
+ pr_err("failed to allocate equivalent CPU table\n");
return 0;
}
@@ -259,8 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
offset = install_equiv_cpu_table(ucode_ptr);
if (!offset) {
- printk(KERN_ERR "microcode: failed to create "
- "equivalent cpu table\n");
+ pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -291,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
if (!leftover) {
vfree(uci->mc);
uci->mc = new_mc;
- pr_debug("microcode: CPU%d found a matching microcode "
- "update with version 0x%x (current=0x%x)\n",
+ pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else {
vfree(new_mc);
@@ -308,33 +294,26 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
static enum ucode_state request_microcode_fw(int cpu, struct device *device)
{
- const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
enum ucode_state ret;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
+ if (firmware == NULL)
return UCODE_NFOUND;
- }
if (*(u32 *)firmware->data != UCODE_MAGIC) {
- printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n",
+ pr_err("invalid UCODE_MAGIC (0x%08x)\n",
*(u32 *)firmware->data);
return UCODE_ERROR;
}
ret = generic_load_microcode(cpu, firmware->data, firmware->size);
- release_firmware(firmware);
-
return ret;
}
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
- printk(KERN_INFO "microcode: AMD microcode update via "
- "/dev/cpu/microcode not supported\n");
+ pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
return UCODE_ERROR;
}
@@ -346,7 +325,31 @@ static void microcode_fini_cpu_amd(int cpu)
uci->mc = NULL;
}
+void init_microcode_amd(struct device *device)
+{
+ const char *fw_name = "amd-ucode/microcode_amd.bin";
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ WARN_ON(c->x86_vendor != X86_VENDOR_AMD);
+
+ if (c->x86 < 0x10) {
+ pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+ return;
+ }
+ supported_cpu = 1;
+
+ if (request_firmware(&firmware, fw_name, device))
+ pr_err("failed to load file %s\n", fw_name);
+}
+
+void fini_microcode_amd(void)
+{
+ release_firmware(firmware);
+}
+
static struct microcode_ops microcode_amd_ops = {
+ .init = init_microcode_amd,
+ .fini = fini_microcode_amd,
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 378e9a8f1bf..844c02c65fc 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,10 +70,12 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
-#include <linux/smp_lock.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -201,7 +203,6 @@ static int do_microcode_update(const void __user *buf, size_t size)
static int microcode_open(struct inode *unused1, struct file *unused2)
{
- cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
@@ -211,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
ssize_t ret = -EINVAL;
if ((len >> PAGE_SHIFT) > totalram_pages) {
- pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
+ pr_err("too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -246,7 +247,7 @@ static int __init microcode_dev_init(void)
error = misc_register(&microcode_dev);
if (error) {
- pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
+ pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
return error;
}
@@ -361,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
if (!uci->mc)
return UCODE_NFOUND;
- pr_debug("microcode: CPU%d updated upon resume\n", cpu);
+ pr_debug("CPU%d updated upon resume\n", cpu);
apply_microcode_on_target(cpu);
return UCODE_OK;
@@ -381,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu)
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
if (ustate == UCODE_OK) {
- pr_debug("microcode: CPU%d updated upon init\n", cpu);
+ pr_debug("CPU%d updated upon init\n", cpu);
apply_microcode_on_target(cpu);
}
@@ -393,7 +394,7 @@ static enum ucode_state microcode_update_cpu(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
enum ucode_state ustate;
- if (uci->valid)
+ if (uci->valid && uci->mc)
ustate = microcode_resume_cpu(cpu);
else
ustate = microcode_init_cpu(cpu);
@@ -408,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (!cpu_online(cpu))
return 0;
- pr_debug("microcode: CPU%d added\n", cpu);
+ pr_debug("CPU%d added\n", cpu);
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
@@ -427,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
if (!cpu_online(cpu))
return 0;
- pr_debug("microcode: CPU%d removed\n", cpu);
+ pr_debug("CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
@@ -475,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
microcode_update_cpu(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- pr_debug("microcode: CPU%d added\n", cpu);
+ pr_debug("CPU%d added\n", cpu);
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
- pr_err("microcode: Failed to create group for CPU%d\n", cpu);
+ pr_err("Failed to create group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
- pr_debug("microcode: CPU%d removed\n", cpu);
+ pr_debug("CPU%d removed\n", cpu);
break;
case CPU_DEAD:
case CPU_UP_CANCELED_FROZEN:
@@ -509,7 +510,7 @@ static int __init microcode_init(void)
microcode_ops = init_amd_microcode();
if (!microcode_ops) {
- pr_err("microcode: no support for this CPU vendor\n");
+ pr_err("no support for this CPU vendor\n");
return -ENODEV;
}
@@ -520,6 +521,9 @@ static int __init microcode_init(void)
return PTR_ERR(microcode_pdev);
}
+ if (microcode_ops->init)
+ microcode_ops->init(&microcode_pdev->dev);
+
get_online_cpus();
mutex_lock(&microcode_mutex);
@@ -540,8 +544,7 @@ static int __init microcode_init(void)
register_hotcpu_notifier(&mc_cpu_notifier);
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
- " <tigran@aivazian.fsnet.co.uk>,"
- " Peter Oruba\n");
+ " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
return 0;
}
@@ -563,6 +566,9 @@ static void __exit microcode_exit(void)
platform_device_unregister(microcode_pdev);
+ if (microcode_ops->fini)
+ microcode_ops->fini();
+
microcode_ops = NULL;
pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0d334ddd0a9..ebd193e476c 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,6 +70,9 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/firmware.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
- printk(KERN_ERR "microcode: CPU%d not a capable Intel "
- "processor\n", cpu_num);
+ pr_err("CPU%d not a capable Intel processor\n", cpu_num);
return -1;
}
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
- printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
- cpu_num, csig->sig, csig->pf, csig->rev);
+ pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+ cpu_num, csig->sig, csig->pf, csig->rev);
return 0;
}
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc)
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
- printk(KERN_ERR "microcode: error! "
- "Bad data size in microcode data file\n");
+ pr_err("error! Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
- printk(KERN_ERR "microcode: error! "
- "Unknown microcode update format\n");
+ pr_err("error! Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
- printk(KERN_ERR "microcode: error! "
- "Small exttable size in microcode data file\n");
+ pr_err("error! Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
- printk(KERN_ERR "microcode: error! "
- "Bad exttable size in microcode data file\n");
+ pr_err("error! Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc)
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
- printk(KERN_WARNING "microcode: aborting, "
- "bad extended signature table checksum\n");
+ pr_warning("aborting, bad extended signature table checksum\n");
return -EINVAL;
}
}
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc)
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ pr_err("aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc)
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ pr_err("aborting, bad checksum\n");
return -EINVAL;
}
}
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu)
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (val[1] != mc_intel->hdr.rev) {
- printk(KERN_ERR "microcode: CPU%d update "
- "to revision 0x%x failed\n",
- cpu_num, mc_intel->hdr.rev);
+ pr_err("CPU%d update to revision 0x%x failed\n",
+ cpu_num, mc_intel->hdr.rev);
return -1;
}
- printk(KERN_INFO "microcode: CPU%d updated to revision "
- "0x%x, date = %04x-%02x-%02x \n",
+ pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n",
cpu_num, val[1],
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
mc_size = get_totalsize(&mc_header);
if (!mc_size || mc_size > leftover) {
- printk(KERN_ERR "microcode: error!"
- "Bad data in microcode data file\n");
+ pr_err("error! Bad data in microcode data file\n");
break;
}
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
- pr_debug("microcode: CPU%d found a matching microcode update with"
- " version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
+ pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
out:
return state;
}
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
c->x86, c->x86_model, c->x86_mask);
if (request_firmware(&firmware, name, device)) {
- pr_debug("microcode: data file %s load failed\n", name);
+ pr_debug("data file %s load failed\n", name);
return UCODE_NFOUND;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5be95ef4ffe..40b54ceb68b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early)
*/
}
-static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
-#ifdef CONFIG_X86_32
- /*
- * We cannot access to MPC table to compute table size yet,
- * as only few megabytes from the bottom is mapped now.
- * PC-9800's MPC table places on the very last of physical
- * memory; so that simply reserving PAGE_SIZE from mpf->physptr
- * yields BUG() in reserve_bootmem.
- * also need to make sure physptr is below than max_low_pfn
- * we don't need reserve the area above max_low_pfn
- */
- unsigned long end = max_low_pfn * PAGE_SIZE;
- if (mpf->physptr < end) {
- if (mpf->physptr + size > end)
- size = end - mpf->physptr;
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
- }
-#else
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
-#endif
+ reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
}
-static int __init smp_scan_config(unsigned long base, unsigned long length,
- unsigned reserve)
+static int __init smp_scan_config(unsigned long base, unsigned long length)
{
unsigned int *bp = phys_to_virt(base);
struct mpf_intel *mpf;
+ unsigned long mem;
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length);
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
mpf, (u64)virt_to_phys(mpf));
- if (!reserve)
- return 1;
- reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
- BOOTMEM_DEFAULT);
+ mem = virt_to_phys(mpf);
+ reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
if (mpf->physptr)
- smp_reserve_bootmem(mpf);
+ smp_reserve_memory(mpf);
return 1;
}
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0;
}
-void __init default_find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(void)
{
unsigned int address;
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve)
* 2) Scan the top 1K of base RAM
* 3) Scan the 64K of bios
*/
- if (smp_scan_config(0x0, 0x400, reserve) ||
- smp_scan_config(639 * 0x400, 0x400, reserve) ||
- smp_scan_config(0xF0000, 0x10000, reserve))
+ if (smp_scan_config(0x0, 0x400) ||
+ smp_scan_config(639 * 0x400, 0x400) ||
+ smp_scan_config(0xF0000, 0x10000))
return;
/*
* If it is an SMP machine we should know now, unless the
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve)
address = get_bios_ebda();
if (address)
- smp_scan_config(address, 0x400, reserve);
+ smp_scan_config(address, 0x400);
}
#ifdef CONFIG_X86_IO_APIC
@@ -965,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void)
{
if (enable_update_mptable && alloc_mptable) {
u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
- startt = TRAMPOLINE_BASE;
-#endif
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
}
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 6a3cefc7dda..553449951b8 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
struct cpuinfo_x86 *c = &cpu_data(cpu);
- int ret = 0;
- lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
- ret = -ENXIO; /* No such CPU */
- goto out;
- }
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+ return -ENXIO; /* No such CPU */
+
c = &cpu_data(cpu);
if (!cpu_has(c, X86_FEATURE_MSR))
- ret = -EIO; /* MSR not supported */
-out:
- unlock_kernel();
- return ret;
+ return -EIO; /* MSR not supported */
+
+ return 0;
}
/*
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 4006c522adc..9d1d263f786 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -212,7 +212,7 @@ static int __init olpc_init(void)
unsigned char *romsig;
/* The ioremap check is dangerous; limit what we run it on */
- if (!is_geode() || geode_has_vsa2())
+ if (!is_geode() || cs5535_has_vsa2())
return 0;
spin_lock_init(&ec_lock);
@@ -244,7 +244,7 @@ static int __init olpc_init(void)
(unsigned char *) &olpc_platform_info.ecver, 1);
/* check to see if the VSA exists */
- if (geode_has_vsa2())
+ if (cs5535_has_vsa2())
olpc_platform_info.flags |= OLPC_F_VSA;
printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 3a7c5a44082..676b8c77a97 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,9 +8,9 @@
#include <asm/paravirt.h>
static inline void
-default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
- __raw_spin_lock(lock);
+ arch_spin_lock(lock);
}
struct pv_lock_ops pv_lock_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index c563e4c8ff3..2bbde607814 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -31,7 +31,7 @@
#include <linux/string.h>
#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/delay.h>
@@ -212,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_lock_irqsave(&tbl->it_lock, flags);
- iommu_area_reserve(tbl->it_map, index, npages);
+ bitmap_set(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
@@ -303,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
spin_lock_irqsave(&tbl->it_lock, flags);
- iommu_area_free(tbl->it_map, entry, npages);
+ bitmap_clear(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index afcc58b69c7..fcc2f2bfa39 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -120,11 +120,14 @@ static void __init dma32_free_bootmem(void)
void __init pci_iommu_alloc(void)
{
+ int use_swiotlb;
+
+ use_swiotlb = pci_swiotlb_init();
#ifdef CONFIG_X86_64
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
#endif
- if (pci_swiotlb_init())
+ if (use_swiotlb)
return;
gart_iommu_hole_init();
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index e6a0d402f17..34de53b46f8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -23,7 +23,7 @@
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/kdebug.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
@@ -126,7 +126,7 @@ static void free_iommu(unsigned long offset, int size)
unsigned long flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- iommu_area_free(iommu_gart_bitmap, offset, size);
+ bitmap_clear(iommu_gart_bitmap, offset, size);
if (offset >= next_bit)
next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void)
struct pci_dev *dev;
int i;
- if (no_agp)
+ /* don't shutdown it if there is AGP installed */
+ if (!no_agp)
return;
for (i = 0; i < num_k8_northbridges; i++) {
@@ -791,7 +792,7 @@ int __init gart_iommu_init(void)
* Out of IOMMU space handling.
* Reserve some invalid pages at the beginning of the GART.
*/
- iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+ bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
iommu_size >> 20);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b577..7a7bd4e3ec4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,11 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
+#include <linux/dmi.h>
+#include <linux/utsname.h>
#include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -17,6 +21,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -87,6 +92,25 @@ void exit_thread(void)
}
}
+void show_regs_common(void)
+{
+ const char *board, *product;
+
+ board = dmi_get_system_info(DMI_BOARD_NAME);
+ if (!board)
+ board = "";
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product)
+ product = "";
+
+ printk("\n");
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version, board, product);
+}
+
void flush_thread(void)
{
struct task_struct *tsk = current;
@@ -103,14 +127,7 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
+ flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -192,16 +209,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
@@ -224,6 +231,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cda..120b88797a7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
@@ -35,7 +34,6 @@
#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
-#include <linux/dmi.h>
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/io.h>
@@ -58,6 +56,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -127,28 +126,18 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
- const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
gs = get_user_gs(regs);
} else {
- sp = (unsigned long) (&regs->sp);
+ sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
savesegment(gs, gs);
}
- printk("\n");
-
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
- task_pid_nr(current), current->comm,
- print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
+ show_regs_common();
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
@@ -187,7 +176,7 @@ void __show_regs(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
- __show_regs(regs, 1);
+ show_registers(regs);
show_trace(NULL, regs, &regs->sp, regs->bp);
}
@@ -259,7 +248,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
tsk = current;
+ err = -ENOMEM;
+
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbcaa49..e5ab0cd0ef3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,6 @@
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ptrace.h>
@@ -38,7 +37,6 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
-#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -52,6 +50,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
@@ -162,18 +161,8 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- const char *board;
-
- printk("\n");
- print_modules();
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
+
+ show_regs_common();
printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
@@ -226,8 +215,7 @@ void __show_regs(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
- printk(KERN_INFO "CPU %d:", smp_processor_id());
- __show_regs(regs, 1);
+ show_registers(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -297,12 +285,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -341,29 +333,46 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+
return err;
}
-void
-start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+static void
+start_thread_common(struct pt_regs *regs, unsigned long new_ip,
+ unsigned long new_sp,
+ unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
loadsegment(fs, 0);
- loadsegment(es, 0);
- loadsegment(ds, 0);
+ loadsegment(es, _ds);
+ loadsegment(ds, _ds);
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
percpu_write(old_rsp, new_sp);
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
- regs->flags = 0x200;
+ regs->cs = _cs;
+ regs->ss = _ss;
+ regs->flags = X86_EFLAGS_IF;
set_fs(USER_DS);
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
-EXPORT_SYMBOL_GPL(start_thread);
+
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER_CS, __USER_DS, 0);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER32_CS, __USER32_DS, __USER32_DS);
+}
+#endif
/*
* switch_to(x,y) should switch tasks from x to y.
@@ -495,6 +504,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (preload_fpu)
__math_state_restore();
+
return prev_p;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2dc66..2779321046b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
#include "tls.h"
@@ -49,6 +52,118 @@ enum x86_regset {
REGSET_IOPERM32,
};
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+ REG_OFFSET_NAME(r15),
+ REG_OFFSET_NAME(r14),
+ REG_OFFSET_NAME(r13),
+ REG_OFFSET_NAME(r12),
+ REG_OFFSET_NAME(r11),
+ REG_OFFSET_NAME(r10),
+ REG_OFFSET_NAME(r9),
+ REG_OFFSET_NAME(r8),
+#endif
+ REG_OFFSET_NAME(bx),
+ REG_OFFSET_NAME(cx),
+ REG_OFFSET_NAME(dx),
+ REG_OFFSET_NAME(si),
+ REG_OFFSET_NAME(di),
+ REG_OFFSET_NAME(bp),
+ REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+ REG_OFFSET_NAME(ds),
+ REG_OFFSET_NAME(es),
+ REG_OFFSET_NAME(fs),
+ REG_OFFSET_NAME(gs),
+#endif
+ REG_OFFSET_NAME(orig_ax),
+ REG_OFFSET_NAME(ip),
+ REG_OFFSET_NAME(cs),
+ REG_OFFSET_NAME(flags),
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(ss),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+ [0] = offsetof(struct pt_regs, ax),
+ [1] = offsetof(struct pt_regs, dx),
+ [2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+ [0] = offsetof(struct pt_regs, di),
+ [1] = offsetof(struct pt_regs, si),
+ [2] = offsetof(struct pt_regs, dx),
+ [3] = offsetof(struct pt_regs, cx),
+ [4] = offsetof(struct pt_regs, r8),
+ [5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs: pt_regs which contains registers at function entry.
+ * @n: argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+ if (n < ARRAY_SIZE(arg_offs_table))
+ return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+ else {
+ /*
+ * The typical case: arg n is on the stack.
+ * (Note: stack[0] = return address, so skip it)
+ */
+ n -= ARRAY_SIZE(arg_offs_table);
+ return regs_get_kernel_stack_nth(regs, 1 + n);
+ }
+}
+
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
- return TASK_SIZE - 3;
-}
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
-#endif
- return TASK_SIZE_MAX - 7;
-}
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
@@ -454,99 +555,237 @@ static int genregs_set(struct task_struct *target,
return ret;
}
+static void ptrace_triggered(struct perf_event *bp, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ int i;
+ struct thread_struct *thread = &(current->thread);
+
+ /*
+ * Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ if (thread->ptrace_bps[i] == bp)
+ break;
+ }
+
+ thread->debugreg6 |= (DR_TRAP0 << i);
+}
+
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
*/
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
+ int i;
+ int dr7 = 0;
+ struct arch_hw_breakpoint *info;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ if (bp[i] && !bp[i]->attr.disabled) {
+ info = counter_arch_bp(bp[i]);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ }
}
- return 0;
+
+ return dr7;
}
-static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+static int
+ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+ struct task_struct *tsk, int disabled)
{
- int i;
+ int err;
+ int gen_len, gen_type;
+ struct perf_event_attr attr;
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /*
+ * We shoud have at least an inactive breakpoint at this
+ * slot. It means the user is writing dr7 without having
+ * written the address register first
+ */
+ if (!bp)
+ return -EINVAL;
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ if (err)
+ return err;
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+ attr = bp->attr;
+ attr.bp_len = gen_len;
+ attr.bp_type = gen_type;
+ attr.disabled = disabled;
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+ return modify_user_hw_breakpoint(bp, &attr);
+}
- case 7:
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long old_dr7;
+ int i, orig_ret = 0, rc = 0;
+ int enabled, second_pass = 0;
+ unsigned len, type;
+ struct perf_event *bp;
+
+ data &= ~DR_CONTROL_RESERVED;
+ old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+restore:
+ /*
+ * Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ enabled = decode_dr7(data, i, &len, &type);
+ bp = thread->ptrace_bps[i];
+
+ if (!enabled) {
+ if (bp) {
+ /*
+ * Don't unregister the breakpoints right-away,
+ * unless all register_user_hw_breakpoint()
+ * requests have succeeded. This prevents
+ * any window of opportunity for debug
+ * register grabbing by other users.
+ */
+ if (!second_pass)
+ continue;
+
+ rc = ptrace_modify_breakpoint(bp, len, type,
+ tsk, 1);
+ if (rc)
+ break;
+ }
+ continue;
+ }
+
+ rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+ if (rc)
+ break;
+ }
+ /*
+ * Make a second pass to free the remaining unused breakpoints
+ * or to restore the original breakpoints if an error occurred.
+ */
+ if (!second_pass) {
+ second_pass = 1;
+ if (rc < 0) {
+ orig_ret = rc;
+ data = old_dr7;
+ }
+ goto restore;
+ }
+ return ((orig_ret < 0) ? orig_ret : rc);
+}
+
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ if (n < HBP_NUM) {
+ struct perf_event *bp;
+ bp = thread->ptrace_bps[n];
+ if (!bp)
+ return 0;
+ val = bp->hw.info.address;
+ } else if (n == 6) {
+ val = thread->debugreg6;
+ } else if (n == 7) {
+ val = ptrace_get_dr7(thread->ptrace_bps);
+ }
+ return val;
+}
+
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+ unsigned long addr)
+{
+ struct perf_event *bp;
+ struct thread_struct *t = &tsk->thread;
+ struct perf_event_attr attr;
+
+ if (!t->ptrace_bps[nr]) {
+ hw_breakpoint_init(&attr);
/*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
+ * Put stub len and type to register (reserve) an inactive but
+ * correct bp
*/
-#ifdef CONFIG_X86_32
-#define DR7_MASK 0x5f54
-#else
-#define DR7_MASK 0x5554
-#endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+ attr.bp_addr = addr;
+ attr.bp_len = HW_BREAKPOINT_LEN_1;
+ attr.bp_type = HW_BREAKPOINT_W;
+ attr.disabled = 1;
+
+ bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+
+ /*
+ * CHECKME: the previous code returned -EIO if the addr wasn't
+ * a valid task virtual addr. The new one will return -EINVAL in
+ * this case.
+ * -EINVAL may be what we want for in-kernel breakpoints users,
+ * but -EIO looks better for ptrace, since we refuse a register
+ * writing for the user. And anyway this is the previous
+ * behaviour.
+ */
+ if (IS_ERR(bp))
+ return PTR_ERR(bp);
+
+ t->ptrace_bps[nr] = bp;
+ } else {
+ int err;
+
+ bp = t->ptrace_bps[nr];
+
+ attr = bp->attr;
+ attr.bp_addr = addr;
+ err = modify_user_hw_breakpoint(bp, &attr);
+ if (err)
+ return err;
}
+
return 0;
}
/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = 0;
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ return -EIO;
+
+ if (n == 6) {
+ thread->debugreg6 = val;
+ goto ret_path;
+ }
+ if (n < HBP_NUM) {
+ rc = ptrace_set_breakpoint_addr(tsk, n, val);
+ if (rc)
+ return rc;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
+
+ret_path:
+ return rc;
+}
+
+/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
*/
@@ -1437,21 +1676,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+static void fill_sigtrap_info(struct task_struct *tsk,
+ struct pt_regs *regs,
+ int error_code, int si_code,
+ struct siginfo *info)
{
- struct siginfo info;
-
tsk->thread.trap_no = 1;
tsk->thread.error_code = error_code;
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = si_code;
+ memset(info, 0, sizeof(*info));
+ info->si_signo = SIGTRAP;
+ info->si_code = si_code;
+ info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+}
- /* User-mode ip? */
- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+void user_single_step_siginfo(struct task_struct *tsk,
+ struct pt_regs *regs,
+ struct siginfo *info)
+{
+ fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
+}
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+ int error_code, int si_code)
+{
+ struct siginfo info;
+
+ fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
/* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
}
@@ -1516,29 +1767,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
+ bool step;
+
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->ax);
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
-
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
* We already reported this syscall instruction in
- * syscall_trace_enter(), so don't do any more now.
- */
- if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
- return;
-
- /*
- * If we are single-stepping, synthesize a trap to follow the
- * system call instruction.
+ * syscall_trace_enter().
*/
- if (test_thread_flag(TIF_SINGLESTEP) &&
- tracehook_consider_fatal_signal(current, SIGTRAP))
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
+ !test_thread_flag(TIF_SYSCALL_EMU);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 6c3b2c6fd77..18093d7498f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -499,6 +499,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;
+ u32 node;
u32 val;
devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
@@ -507,7 +508,13 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
return;
pci_read_config_dword(nb_ht, 0x60, &val);
- set_dev_node(&dev->dev, val & 7);
+ node = val & 7;
+ /*
+ * Some hardware may return an invalid node ID,
+ * so check it first:
+ */
+ if (node_online(node))
+ set_dev_node(&dev->dev, node);
pci_dev_put(nb_ht);
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b97fc5b124..1545bc0c984 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
},
},
+ { /* Handle problems with rebooting on ASUS P4S800 */
+ .callback = set_bios_reboot,
+ .ident = "ASUS P4S800",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 61a837743fe..fda313ebbb0 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,7 +12,7 @@
#include <linux/interrupt.h>
#include <asm/reboot_fixups.h>
#include <asm/msr.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
static void cs5530a_warm_reset(struct pci_dev *dev)
{
@@ -80,6 +80,7 @@ void mach_reboot_fixups(void)
continue;
cur->reboot_fixup(dev);
+ pci_dev_put(dev);
}
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2a34f9c5be2..f7b8b9894b2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -73,6 +73,7 @@
#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/trampoline.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -106,9 +107,11 @@
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
+#include <asm/k8.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
+#include <asm/mce.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -247,7 +250,7 @@ EXPORT_SYMBOL(edd);
* from boot_params into a safe place.
*
*/
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
{
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
sizeof(edd.mbr_signature));
@@ -256,7 +259,7 @@ static inline void copy_edd(void)
edd.edd_info_nr = boot_params.eddbuf_entries;
}
#else
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
{
}
#endif
@@ -486,42 +489,11 @@ static void __init reserve_early_setup_data(void)
#ifdef CONFIG_KEXEC
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
- const unsigned long long alignment = 16<<20; /* 16M */
- unsigned long long start = 0LL;
-
- while (1) {
- int ret;
-
- start = find_e820_area(start, ULONG_MAX, size, alignment);
- if (start == -1ULL)
- return start;
-
- /* try to reserve it */
- ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
- if (ret >= 0)
- return start;
-
- start += alignment;
- }
-}
-
static inline unsigned long long get_total_mem(void)
{
unsigned long long total;
- total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
- total += highend_pfn - highstart_pfn;
-#endif
+ total = max_pfn - min_low_pfn;
return total << PAGE_SHIFT;
}
@@ -541,21 +513,25 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
- crash_base = find_and_reserve_crashkernel(crash_size);
+ const unsigned long long alignment = 16<<20; /* 16M */
+
+ crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+ alignment);
if (crash_base == -1ULL) {
- pr_info("crashkernel reservation failed. "
- "No suitable area found.\n");
+ pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
- ret = reserve_bootmem_generic(crash_base, crash_size,
- BOOTMEM_EXCLUSIVE);
- if (ret < 0) {
- pr_info("crashkernel reservation failed - "
- "memory is in use\n");
+ unsigned long long start;
+
+ start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+ 1<<20);
+ if (start != crash_base) {
+ pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
+ reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -698,6 +674,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
void __init setup_arch(char **cmdline_p)
{
+ int acpi = 0;
+ int k8 = 0;
+
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -790,21 +769,18 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
-#ifdef CONFIG_X86_64
/*
- * Must call this twice: Once just to detect whether hardware doesn't
- * support NX (so that the early EHCI debug console setup can safely
- * call set_fixmap(), and then again after parsing early parameters to
- * honor the respective command line option.
+ * x86_configure_nx() is called before parse_early_param() to detect
+ * whether hardware doesn't support NX (so that the early EHCI debug
+ * console setup can safely call set_fixmap()). It may then be called
+ * again from within noexec_setup() during parsing early parameters
+ * to honor the respective command line option.
*/
- check_efer();
-#endif
+ x86_configure_nx();
parse_early_param();
-#ifdef CONFIG_X86_64
- check_efer();
-#endif
+ x86_report_nx();
/* Must be before kernel pagetables are setup */
vmi_activate();
@@ -900,6 +876,20 @@ void __init setup_arch(char **cmdline_p)
reserve_brk();
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+
+ reserve_trampoline_memory();
+
+#ifdef CONFIG_ACPI_SLEEP
+ /*
+ * Reserve low memory region for sleep support.
+ * even before init_memory_mapping
+ */
+ acpi_reserve_wakeup_memory();
+#endif
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -926,6 +916,8 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
+ reserve_crashkernel();
+
vsmp_init();
io_delay_init();
@@ -941,23 +933,15 @@ void __init setup_arch(char **cmdline_p)
/*
* Parse SRAT to discover nodes.
*/
- acpi_numa_init();
+ acpi = acpi_numa_init();
#endif
- initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+ if (!acpi)
+ k8 = !k8_numa_init(0, max_pfn);
#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
- reserve_crashkernel();
+ initmem_init(0, max_pfn, acpi, k8);
#ifdef CONFIG_X86_64
/*
@@ -1031,6 +1015,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
x86_init.oem.banner();
+
+ mcheck_init();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d559af913e1..35abcb8b00e 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -20,9 +22,9 @@
#include <asm/stackprotector.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-# define DBG(x...) printk(KERN_DEBUG x)
+# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
#else
-# define DBG(x...)
+# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
#endif
DEFINE_PER_CPU(int, cpu_number);
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
} else {
ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
size, align, goal);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
+ pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
+ cpu, size, node, __pa(ptr));
}
return ptr;
#else
@@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void)
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
+ pr_warning("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055a..74fe6d86dc5 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -799,15 +800,6 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
@@ -872,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
+ if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920..678d0b8c26f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -671,6 +671,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
+/* reduce the number of lines printed when booting a large cpu count system */
+static void __cpuinit announce_cpu(int cpu, int apicid)
+{
+ static int current_node = -1;
+ int node = cpu_to_node(cpu);
+
+ if (system_state == SYSTEM_BOOTING) {
+ if (node != current_node) {
+ if (current_node > (-1))
+ pr_cont(" Ok.\n");
+ current_node = node;
+ pr_info("Booting Node %3d, Processors ", node);
+ }
+ pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+ return;
+ } else
+ pr_info("Booting Node %d Processor %d APIC 0x%x\n",
+ node, cpu, apicid);
+}
+
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -687,7 +707,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -713,6 +733,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
if (IS_ERR(c_idle.idle)) {
printk("failed fork for CPU %d\n", cpu);
+ destroy_work_on_stack(&c_idle.work);
return PTR_ERR(c_idle.idle);
}
@@ -736,9 +757,8 @@ do_rest:
/* start_ip had better be page-aligned! */
start_ip = setup_trampoline();
- /* So we see what's up */
- printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
- cpu, apicid, start_ip);
+ /* So we see what's up */
+ announce_cpu(cpu, apicid);
/*
* This grunge runs the startup process for
@@ -787,21 +807,17 @@ do_rest:
udelay(100);
}
- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- pr_debug("OK.\n");
- printk(KERN_INFO "CPU%d: ", cpu);
- print_cpu_info(&cpu_data(cpu));
- pr_debug("CPU has booted.\n");
- } else {
+ if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ pr_debug("CPU%d: has booted.\n", cpu);
+ else {
boot_error = 1;
if (*((volatile unsigned char *)trampoline_base)
== 0xA5)
/* trampoline started but...? */
- printk(KERN_ERR "Stuck ??\n");
+ pr_err("CPU%d: Stuck ??\n", cpu);
else
/* trampoline code not run */
- printk(KERN_ERR "Not responding.\n");
+ pr_err("CPU%d: Not responding.\n", cpu);
if (apic->inquire_remote_apic)
apic->inquire_remote_apic(apicid);
}
@@ -831,6 +847,7 @@ do_rest:
smpboot_restore_warm_reset_vector();
}
+ destroy_work_on_stack(&c_idle.work);
return boot_error;
}
@@ -1250,16 +1267,7 @@ static void __ref remove_cpu_from_maps(int cpu)
void cpu_disable_common(void)
{
int cpu = smp_processor_id();
- /*
- * HACK:
- * Allow any queued timer interrupts to get serviced
- * This is only a temporary solution until we cleanup
- * fixup_irqs as we do for IA64.
- */
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
@@ -1300,14 +1308,16 @@ void native_cpu_die(unsigned int cpu)
for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk(KERN_INFO "CPU %d is now offline\n", cpu);
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+
if (1 == num_online_cpus())
alternatives_smp_switch(0);
return;
}
msleep(100);
}
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ pr_err("CPU %u didn't die...\n", cpu);
}
void play_dead_common(void)
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 1884a8d12bf..dee1ff7cba5 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,31 +24,6 @@
#include <asm/syscalls.h>
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- int error = -EBADF;
- struct file *file = NULL;
- struct mm_struct *mm = current->mm;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
-
- down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
/*
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls. Linux/i386 didn't use to be able to handle more than
@@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
if (a.offset & ~PAGE_MASK)
goto out;
- err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+ err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
a.fd, a.offset >> PAGE_SHIFT);
out:
return err;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 45e00eb09c3..8aa2057efd1 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, fd, unsigned long, off)
{
long error;
- struct file *file;
-
error = -EINVAL;
if (off & ~PAGE_MASK)
goto out;
- error = -EBADF;
- file = NULL;
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
+ error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
out:
return error;
}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0157cd26d7c..15228b5d3eb 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -191,7 +191,7 @@ ENTRY(sys_call_table)
.long sys_ni_syscall /* reserved for streams2 */
.long ptregs_vfork /* 190 */
.long sys_getrlimit
- .long sys_mmap2
+ .long sys_mmap_pgoff
.long sys_truncate64
.long sys_ftruncate64
.long sys_stat64 /* 195 */
@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
+ .long sys_recvmmsg
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 1740c85e24b..364d015efeb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade)
*/
apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
- if ((pa & 0xff) != UV_BAU_MESSAGE) {
- uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+ uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
((apicid << 32) | UV_BAU_MESSAGE));
- }
return 0;
}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index cd022121cab..c652ef62742 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -12,21 +12,19 @@
#endif
/* ready for x86_64 and x86 */
-unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base;
void __init reserve_trampoline_memory(void)
{
-#ifdef CONFIG_X86_32
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
+ unsigned long mem;
+
/* Has to be in very low memory so we can execute real-mode AP code. */
- reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
- "TRAMPOLINE");
+ mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
+ if (mem == -1L)
+ panic("Cannot allocate trampoline\n");
+
+ trampoline_base = __va(mem);
+ reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
}
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc..33399176512 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
+ /* DR6 may or may not be cleared by the CPU */
+ set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ /* Store the virtualized DR6 value */
+ tsk->thread.debugreg6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
/*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.debugreg6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
}
-
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
- /*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
- */
-clear_dr7:
- set_debugreg(0, 7);
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
- return;
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- preempt_conditional_cli(regs);
return;
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index f37930954d1..0aa5fed8b9e 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count;
* we want to have the fastest, inlined, non-debug version
* of a critical section, to be able to prove TSC time-warps:
*/
-static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static __cpuinitdata cycles_t last_tsc;
static __cpuinitdata cycles_t max_warp;
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void)
* previous TSC that was measured (possibly on
* another CPU) and update the previous TSC timestamp.
*/
- __raw_spin_lock(&sync_lock);
+ arch_spin_lock(&sync_lock);
prev = last_tsc;
rdtsc_barrier();
now = get_cycles();
rdtsc_barrier();
last_tsc = now;
- __raw_spin_unlock(&sync_lock);
+ arch_spin_unlock(&sync_lock);
/*
* Be nice every now and then (and also check whether
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void)
* we saw a time-warp of the TSC going backwards:
*/
if (unlikely(prev > now)) {
- __raw_spin_lock(&sync_lock);
+ arch_spin_lock(&sync_lock);
max_warp = max(max_warp, prev - now);
nr_warps++;
- __raw_spin_unlock(&sync_lock);
+ arch_spin_unlock(&sync_lock);
}
}
WARN(!(now-start),
@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
return;
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
- printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
+ if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
+ pr_info(
+ "Skipped synchronization checks as TSC is reliable.\n");
return;
}
- pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
- smp_processor_id(), cpu);
-
/*
* Reset it - in case this is a second bootup:
*/
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu)
cpu_relax();
if (nr_warps) {
- printk("\n");
+ pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+ smp_processor_id(), cpu);
pr_warning("Measured %Ld cycles TSC warp between CPUs, "
"turning off TSC clock.\n", max_warp);
mark_tsc_unstable("check_tsc_sync_source failed");
} else {
- printk(" passed.\n");
+ pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
+ smp_processor_id(), cpu);
}
/*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index aeef529917e..61d805df4c9 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -9,10 +9,25 @@
*/
#include <linux/module.h>
+#include <linux/rbtree.h>
#include <linux/irq.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+ struct rb_node list;
+ unsigned long offset;
+ int pnode;
+ int irq;
+};
+
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
+
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
static void uv_noop(unsigned int irq)
{
@@ -39,25 +54,214 @@ struct irq_chip uv_irq_chip = {
.unmask = uv_noop,
.eoi = uv_ack_apic,
.end = uv_noop,
+ .set_affinity = uv_set_irq_affinity,
};
/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+ struct rb_node **link = &uv_irq_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct uv_irq_2_mmr_pnode *n;
+ struct uv_irq_2_mmr_pnode *e;
+ unsigned long irqflags;
+
+ n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+ uv_blade_to_memory_nid(blade));
+ if (!n)
+ return -ENOMEM;
+
+ n->irq = irq;
+ n->offset = offset;
+ n->pnode = uv_blade_to_pnode(blade);
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ /* Find the right place in the rbtree: */
+ while (*link) {
+ parent = *link;
+ e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+ if (unlikely(irq == e->irq)) {
+ /* irq entry exists */
+ e->pnode = uv_blade_to_pnode(blade);
+ e->offset = offset;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ kfree(n);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ /* Insert the node into the rbtree. */
+ rb_link_node(&n->list, parent, link);
+ rb_insert_color(&n->list, &uv_irq_root);
+
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+ if (e->irq == irq) {
+ *offset = e->offset;
+ *pnode = e->pnode;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return -1;
+}
+
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+ unsigned long mmr_offset, int restrict)
+{
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg;
+ int mmr_pnode;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
+ if (err != 0)
+ return err;
+
+ if (restrict == UV_AFFINITY_CPU)
+ desc->status |= IRQ_NO_BALANCING;
+ else
+ desc->status |= IRQ_MOVE_PCNTXT;
+
+ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_name);
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->mask = 1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long mmr_offset;
+ unsigned mmr_pnode;
+
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
*/
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
- unsigned long mmr_offset)
+ unsigned long mmr_offset, int restrict)
{
- int irq;
- int ret;
+ int irq, ret;
+
+ irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
- irq = create_irq();
if (irq <= 0)
return -EBUSY;
- ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
- if (ret != irq)
+ ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+ restrict);
+ if (ret == irq)
+ uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+ else
destroy_irq(irq);
return ret;
@@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
*/
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
{
- arch_disable_uv_irq(mmr_blade, mmr_offset);
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+ if (e->irq == irq) {
+ arch_disable_uv_irq(e->pnode, e->offset);
+ rb_erase(n, &uv_irq_root);
+ kfree(e);
+ break;
+ }
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
destroy_irq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 583f11d5c48..3c84aa001c1 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -74,7 +74,7 @@ struct uv_rtc_timer_head {
*/
static struct uv_rtc_timer_head **blade_info __read_mostly;
-static int uv_rtc_enable;
+static int uv_rtc_evt_enable;
/*
* Hardware interface routines
@@ -90,7 +90,7 @@ static void uv_rtc_send_IPI(int cpu)
pnode = uv_apicid_to_pnode(apicid);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
- (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
+ (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
@@ -115,7 +115,7 @@ static int uv_setup_intr(int cpu, u64 expires)
uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
UVH_EVENT_OCCURRED0_RTC1_MASK);
- val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
/* Set configuration */
@@ -123,7 +123,10 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode));
+ if (uv_read_rtc(NULL) <= expires)
+ return 0;
+
+ return !uv_intr_pending(pnode);
}
/*
@@ -223,6 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
next_cpu = head->next_cpu;
*t = expires;
+
/* Will this one be next to go off? */
if (next_cpu < 0 || bcpu == next_cpu ||
expires < head->cpu[next_cpu].expires) {
@@ -231,7 +235,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
*t = ULLONG_MAX;
uv_rtc_find_next_timer(head, pnode);
spin_unlock_irqrestore(&head->lock, flags);
- return 1;
+ return -ETIME;
}
}
@@ -244,7 +248,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
*
* Returns 1 if this timer was pending.
*/
-static int uv_rtc_unset_timer(int cpu)
+static int uv_rtc_unset_timer(int cpu, int force)
{
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
@@ -256,14 +260,15 @@ static int uv_rtc_unset_timer(int cpu)
spin_lock_irqsave(&head->lock, flags);
- if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t)
+ if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
rc = 1;
- *t = ULLONG_MAX;
-
- /* Was the hardware setup for this timer? */
- if (head->next_cpu == bcpu)
- uv_rtc_find_next_timer(head, pnode);
+ if (rc) {
+ *t = ULLONG_MAX;
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == bcpu)
+ uv_rtc_find_next_timer(head, pnode);
+ }
spin_unlock_irqrestore(&head->lock, flags);
@@ -310,32 +315,32 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode,
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
- uv_rtc_unset_timer(ced_cpu);
+ uv_rtc_unset_timer(ced_cpu, 1);
break;
}
}
static void uv_rtc_interrupt(void)
{
- struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
int cpu = smp_processor_id();
+ struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
if (!ced || !ced->event_handler)
return;
- if (uv_rtc_unset_timer(cpu) != 1)
+ if (uv_rtc_unset_timer(cpu, 0) != 1)
return;
ced->event_handler(ced);
}
-static int __init uv_enable_rtc(char *str)
+static int __init uv_enable_evt_rtc(char *str)
{
- uv_rtc_enable = 1;
+ uv_rtc_evt_enable = 1;
return 1;
}
-__setup("uvrtc", uv_enable_rtc);
+__setup("uvrtcevt", uv_enable_evt_rtc);
static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
{
@@ -350,27 +355,32 @@ static __init int uv_rtc_setup_clock(void)
{
int rc;
- if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension)
+ if (!is_uv_system())
return -ENODEV;
- generic_interrupt_extension = uv_rtc_interrupt;
-
clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
clocksource_uv.shift);
+ /* If single blade, prefer tsc */
+ if (uv_num_possible_blades() == 1)
+ clocksource_uv.rating = 250;
+
rc = clocksource_register(&clocksource_uv);
- if (rc) {
- generic_interrupt_extension = NULL;
+ if (rc)
+ printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
+ else
+ printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
+ sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+ if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
return rc;
- }
/* Setup and register clockevents */
rc = uv_rtc_allocate_timers();
- if (rc) {
- clocksource_unregister(&clocksource_uv);
- generic_interrupt_extension = NULL;
- return rc;
- }
+ if (rc)
+ goto error;
+
+ x86_platform_ipi_callback = uv_rtc_interrupt;
clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
NSEC_PER_SEC, clock_event_device_uv.shift);
@@ -383,11 +393,19 @@ static __init int uv_rtc_setup_clock(void)
rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
if (rc) {
- clocksource_unregister(&clocksource_uv);
- generic_interrupt_extension = NULL;
+ x86_platform_ipi_callback = NULL;
uv_rtc_deallocate_timers();
+ goto error;
}
+ printk(KERN_INFO "UV RTC clockevents registered\n");
+
+ return 0;
+
+error:
+ clocksource_unregister(&clocksource_uv);
+ printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
+
return rc;
}
arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index f068553a1b1..34a279a7471 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
return;
}
- apic_cpus = apic->apicid_to_cpu_present(m->apicid);
+ apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
/*
* Validate version
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
apic_version[m->apicid] = ver;
}
-static void __init visws_find_smp_config(unsigned int reserve)
+static void __init visws_find_smp_config(void)
{
struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq)
}
static struct irq_chip cobalt_irq_type = {
- .typename = "Cobalt-APIC",
+ .name = "Cobalt-APIC",
.startup = startup_cobalt_irq,
.shutdown = disable_cobalt_irq,
.enable = enable_cobalt_irq,
@@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq)
}
static struct irq_chip piix4_master_irq_type = {
- .typename = "PIIX4-master",
+ .name = "PIIX4-master",
.startup = startup_piix4_master_irq,
.ack = ack_cobalt_irq,
.end = end_piix4_master_irq,
@@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = {
static struct irq_chip piix4_virtual_irq_type = {
- .typename = "PIIX4-virtual",
+ .name = "PIIX4-virtual",
.shutdown = disable_8259A_irq,
.enable = enable_8259A_irq,
.disable = disable_8259A_irq,
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 611b9e2360d..74c92bb194d 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
evt->min_delta_ns = clockevent_delta2ns(1, evt);
evt->cpumask = cpumask_of(cpu);
- printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
+ printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n",
evt->name, evt->mult, evt->shift);
clockevents_register_device(evt);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3c68fe2d46c..f3f2104408d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+/*
+ * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
+ * we retain large page mappings for boundaries spanning kernel text, rodata
+ * and data sections.
+ *
+ * However, kernel identity mappings will have different RWX permissions
+ * to the pages mapping to text and to the pages padding (which are freed) the
+ * text section. Hence kernel identity mappings will be broken to smaller
+ * pages. For 64-bit, kernel text and kernel identity mappings are different,
+ * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
+ * as well as retain 2MB large page mappings for kernel text.
+ */
+#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+
+#define X64_ALIGN_DEBUG_RODATA_END \
+ . = ALIGN(HPAGE_SIZE); \
+ __end_rodata_hpage_align = .;
+
+#else
+
+#define X64_ALIGN_DEBUG_RODATA_BEGIN
+#define X64_ALIGN_DEBUG_RODATA_END
+
+#endif
+
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
@@ -90,7 +116,9 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
+ X64_ALIGN_DEBUG_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
+ X64_ALIGN_DEBUG_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -107,13 +135,13 @@ SECTIONS
PAGE_ALIGNED_DATA(PAGE_SIZE)
- CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
+ CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
DATA_DATA
CONSTRUCTORS
/* rarely changed data like cpu maps */
- READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
+ READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
/* End of data section */
_edata = .;
@@ -137,12 +165,12 @@ SECTIONS
*(.vsyscall_0)
} :user
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data)
}
@@ -166,7 +194,7 @@ SECTIONS
}
vgetcpu_mode = VVIRT(.vgetcpu_mode);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies)
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8cb4974ff59..9055e5872ff 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+ u32 mult)
{
unsigned long flags;
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.clock.vread = clock->vread;
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = clock->mult;
+ vsyscall_gtod_data.clock.mult = mult;
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = {
};
static ctl_table kernel_root_table2[] = {
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+ { .procname = "kernel", .mode = 0555,
.child = kernel_table2 },
{}
};
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 3909e3ba5ce..a1029769b6f 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -30,9 +30,8 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(copy_user_generic);
EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(__copy_from_user_inatomic);
+EXPORT_SYMBOL(_copy_from_user);
+EXPORT_SYMBOL(_copy_to_user);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d11c5ff7c65..ccd179dec36 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -13,6 +13,7 @@
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
+#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = {
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop,
+ .is_untracked_pat_range = is_ISA_range,
};