summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/boot.c9
-rw-r--r--arch/x86/kernel/apic_32.c41
-rw-r--r--arch/x86/kernel/apic_64.c237
-rw-r--r--arch/x86/kernel/apm_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/bios_uv.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile32
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c42
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/cpu/cyrix.c16
-rw-r--r--arch/x86/kernel/cpu/feature_names.c84
-rw-r--r--arch/x86/kernel/cpu/intel.c71
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c169
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl32
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/powerflags.c20
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_64.c7
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c10
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/ipi.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c132
-rw-r--r--arch/x86/kernel/ptrace.c36
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h19
-rw-r--r--arch/x86/kernel/signal_32.c191
-rw-r--r--arch/x86/kernel/signal_64.c308
-rw-r--r--arch/x86/kernel/smpboot.c47
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c44
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c67
-rw-r--r--arch/x86/kernel/visws_quirks.c16
-rw-r--r--arch/x86/kernel/vm86_32.c1
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/xsave.c316
59 files changed, 2584 insertions, 828 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec..d6ea91abaeb 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
-obj-y += i387.o
+obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-y += ds.o
obj-$(CONFIG_X86_32) += tls.o
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o
+ obj-y += genx2apic_cluster.o
+ obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd211c..267e684f33a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
#include <asm/proto.h>
-#include <asm/genapic.h>
#else /* X86 */
@@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#warning ACPI uses CMPXCHG, i486 and later hardware
#endif
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
struct acpi_mcfg_allocation *pci_mmcfg_config;
int pci_mmcfg_config_num;
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
{
if (!strcmp(mcfg->header.oem_id, "SGI"))
@@ -775,7 +774,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
set_fixmap_nocache(FIX_APIC_BASE, address);
if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
#ifdef CONFIG_X86_32
apic_version[boot_cpu_physical_apicid] =
GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1351,7 +1350,9 @@ static void __init acpi_process_madt(void)
acpi_ioapic = 1;
smp_found_config = 1;
+#ifdef CONFIG_X86_32
setup_apic_routing();
+#endif
}
}
if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b..44cae65e32e 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
* default configuration (or the MP table is broken).
*/
if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
* might be zero if read from MP tables. Get it from LAPIC.
*/
#ifdef CONFIG_CRASH_DUMP
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
#endif
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831..b6256587f99 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
#include <linux/clockchips.h>
#include <linux/acpi_pmtmr.h>
#include <linux/module.h>
+#include <linux/dmar.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -39,6 +40,7 @@
#include <asm/proto.h>
#include <asm/timex.h>
#include <asm/apic.h>
+#include <asm/i8259.h>
#include <mach_ipi.h>
#include <mach_apic.h>
@@ -46,6 +48,11 @@
static int disable_apic_timer __cpuinitdata;
static int apic_calibrate_pmtmr __initdata;
int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
@@ -118,13 +125,13 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -140,6 +147,69 @@ u32 safe_apic_wait_icr_idle(void)
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, id << 24);
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+ unsigned long val;
+
+ rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ return val;
+}
+
+static struct apic_ops x2apic_ops = {
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = x2apic_icr_read,
+ .icr_write = x2apic_icr_write,
+ .wait_icr_idle = x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -629,10 +699,10 @@ int __init verify_local_APIC(void)
/*
* The ID register is read/write in a real APIC.
*/
- reg0 = read_apic_id();
+ reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = read_apic_id();
+ reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -833,6 +903,125 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate();
}
+void check_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+ if (msr & X2APIC_ENABLE) {
+ printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ x2apic_preenabled = x2apic = 1;
+ apic_ops = &x2apic_ops;
+ }
+}
+
+void enable_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+ if (!(msr & X2APIC_ENABLE)) {
+ printk("Enabling x2apic\n");
+ wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+ }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+ int ret;
+ unsigned long flags;
+
+ if (!cpu_has_x2apic)
+ return;
+
+ if (!x2apic_preenabled && disable_x2apic) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
+ return;
+ }
+
+ if (x2apic_preenabled && disable_x2apic)
+ panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
+ return;
+ }
+
+ ret = dmar_table_init();
+ if (ret) {
+ printk(KERN_INFO
+ "dmar_table_init() failed with %d:\n", ret);
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else
+ printk(KERN_INFO
+ "Not enabling x2apic,Intr-remapping\n");
+ return;
+ }
+
+ local_irq_save(flags);
+ mask_8259A();
+ save_mask_IO_APIC_setup();
+
+ ret = enable_intr_remapping(1);
+
+ if (ret && x2apic_preenabled) {
+ local_irq_restore(flags);
+ panic("x2apic enabled by bios. But IR enabling failed");
+ }
+
+ if (ret)
+ goto end;
+
+ if (!x2apic) {
+ x2apic = 1;
+ apic_ops = &x2apic_ops;
+ enable_x2apic();
+ }
+end:
+ if (ret)
+ /*
+ * IR enabling failed
+ */
+ restore_IO_APIC_setup();
+ else
+ reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+ unmask_8259A();
+ local_irq_restore(flags);
+
+ if (!ret) {
+ if (!x2apic_preenabled)
+ printk(KERN_INFO
+ "Enabled x2apic and interrupt-remapping\n");
+ else
+ printk(KERN_INFO
+ "Enabled Interrupt-remapping\n");
+ } else
+ printk(KERN_ERR
+ "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+ if (!cpu_has_x2apic)
+ return;
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled prior OS handover,"
+ " enable CONFIG_INTR_REMAP");
+
+ printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
+#endif
+
+ return;
+}
+
/*
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
@@ -872,7 +1061,7 @@ void __init early_init_lapic_mapping(void)
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/**
@@ -880,6 +1069,11 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
+ if (x2apic) {
+ boot_cpu_physical_apicid = read_apic_id();
+ return;
+ }
+
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
@@ -899,7 +1093,7 @@ void __init init_apic_mappings(void)
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/*
@@ -918,6 +1112,9 @@ int __init APIC_init_uniprocessor(void)
return -1;
}
+ enable_IR_x2apic();
+ setup_apic_routing();
+
verify_local_APIC();
connect_bsp_APIC();
@@ -1093,6 +1290,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
cpu_set(cpu, cpu_present_map);
}
+int hard_smp_processor_id(void)
+{
+ return read_apic_id();
+}
+
/*
* Power management
*/
@@ -1129,7 +1331,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
maxlvt = lapic_get_maxlvt();
- apic_pm_state.apic_id = read_apic_id();
+ apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1164,10 +1366,14 @@ static int lapic_resume(struct sys_device *dev)
maxlvt = lapic_get_maxlvt();
local_irq_save(flags);
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
+ if (!x2apic) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ } else
+ enable_x2apic();
+
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1307,6 +1513,15 @@ __cpuinit int apic_is_clustered_box(void)
return (clusters > 2);
}
+static __init int setup_nox2apic(char *str)
+{
+ disable_x2apic = 1;
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+ return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
/*
* APIC command line parameters
*/
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9ee24e6bc4b..b93d069aea7 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,7 +228,6 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
-#include <linux/smp_lock.h>
#include <asm/system.h>
#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006f..505543a75a5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
#define __NO_STUBS 1
#undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#define __SYSCALL(nr, sym) [nr] = 1,
static char syscalls[] = {
#include <asm/unistd.h>
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index c639bd55391..fdd585f9c53 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
{
const char *str;
switch (status) {
- case 0: str = "Call completed without error"; break;
- case -1: str = "Not implemented"; break;
- case -2: str = "Invalid argument"; break;
- case -3: str = "Call completed with error"; break;
- default: str = "Unknown BIOS status code"; break;
+ case 0: str = "Call completed without error"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ default: str = "Unknown BIOS status code"; break;
}
return str;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee76eaad300..3ede19a4e0b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,22 +3,32 @@
#
obj-y := intel_cacheinfo.o addon_cpuid_features.o
-obj-y += proc.o feature_names.o
+obj-y += proc.o capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += common.o bugs.o
+obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += common_64.o bugs_64.o
-obj-$(CONFIG_X86_32) += amd.o
-obj-$(CONFIG_X86_64) += amd_64.o
-obj-$(CONFIG_X86_32) += cyrix.o
-obj-$(CONFIG_X86_32) += centaur.o
-obj-$(CONFIG_X86_64) += centaur_64.o
-obj-$(CONFIG_X86_32) += transmeta.o
-obj-$(CONFIG_X86_32) += intel.o
-obj-$(CONFIG_X86_64) += intel_64.o
-obj-$(CONFIG_X86_32) += umc.o
+
+obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o
+obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o
+obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
+obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
+obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o
+obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o
+obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+
+quiet_cmd_mkcapflags = MKCAP $@
+ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+
+cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
+
+targets += capflags.c
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+ $(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 00000000000..2056ccf572c
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
+/*
+ * cmpxchg*() fallbacks for CPU not supporting these instructions
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+ u8 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u8 *)ptr;
+ if (prev == old)
+ *(u8 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+ u16 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u16 *)ptr;
+ if (prev == old)
+ *(u16 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+ u32 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg for 386. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u32 *)ptr;
+ if (prev == old)
+ *(u32 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+ u64 prev;
+ unsigned long flags;
+
+ /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+ local_irq_save(flags);
+ prev = *(u64 *)ptr;
+ if (prev == old)
+ *(u64 *)ptr = new;
+ local_irq_restore(flags);
+ return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e456bd955b..8260d930eab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,6 +355,35 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_NOPL);
}
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect. Hence, probe for it based on first
+ * principles.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+ const u32 nopl_signature = 0x888c53b1; /* Random number */
+ u32 has_nopl = nopl_signature;
+
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
+ if (c->x86 >= 6) {
+ asm volatile("\n"
+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: xor %0,%0\n"
+ " jmp 2b\n"
+ " .previous\n"
+ _ASM_EXTABLE(1b,3b)
+ : "+a" (has_nopl));
+
+ if (has_nopl == nopl_signature)
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+ }
+}
+
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
@@ -723,9 +752,20 @@ void __cpuinit cpu_init(void)
/*
* Force FPU initialization:
*/
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
mxcsr_feature_mask_init();
+
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
+
+ xsave_init();
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index a11f5d4477c..35d11efdf1f 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -636,6 +636,8 @@ void __cpuinit cpu_init(void)
barrier();
check_efer();
+ if (cpu != 0 && x2apic)
+ enable_x2apic();
/*
* set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 898a5a2002e..13f8fa16b81 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* Load/Store Serialize to mem access disable (=reorder it) */
- setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+ setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
/* set load/store serialize from 1GB to 4GB */
ccr3 |= 0xe0;
setCx86(CX86_CCR3, ccr3);
@@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
write_cr0(read_cr0() | X86_CR0_NW);
/* CCR2 bit 2: lock NW bit and set WT1 */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
}
/*
@@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void)
local_irq_save(flags);
/* Suspend on halt power saving and enable #SUSP pin */
- setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* FPU fast, DTE cache, Mem bypass */
- setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
+ setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
set_cx86_memwb();
@@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
/* Enable cxMMX extensions (GX1 Datasheet 54) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+ setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
/*
* GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -314,7 +314,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
if (dir1 > 7) {
dir0_msn++; /* M II */
/* Enable MMX extensions (App note 108) */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+ setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
} else {
c->coma_bug = 1; /* 6x86MX, it has the bug. */
}
@@ -429,7 +429,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */
+ setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index c9017799497..00000000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Strings for the various x86 capability flags.
- *
- * This file must not contain any executable code.
- */
-
-#include <asm/cpufeature.h>
-
-/*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
-const char * const x86_cap_flags[NCAPINTS*32] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, NULL,
- "rep_good", NULL, NULL, NULL,
- "nopl", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-};
-
-const char *const x86_power_flags[32] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
-};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f2569b8f..77618c717d7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -23,13 +23,6 @@
#include <mach_apic.h>
#endif
-#ifdef CONFIG_X86_INTEL_USERCOPY
-/*
- * Alignment at which movsl is preferred for bulk memory copies.
- */
-struct movsl_mask movsl_mask __read_mostly;
-#endif
-
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
@@ -314,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
-#ifndef CONFIG_X86_CMPXCHG
-unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
-{
- u8 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u8 *)ptr;
- if (prev == old)
- *(u8 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u8);
-
-unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
-{
- u16 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u16 *)ptr;
- if (prev == old)
- *(u16 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u16);
-
-unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
-{
- u32 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u32 *)ptr;
- if (prev == old)
- *(u32 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u32);
-#endif
-
-#ifndef CONFIG_X86_CMPXCHG64
-unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
-{
- u64 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u64 *)ptr;
- if (prev == old)
- *(u64 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_486_u64);
-#endif
-
/* arch_initcall(intel_cpu_init); */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b0a10b002f..3f46afbb1cf 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
/*
- * Routines to indentify caches on Intel CPU.
+ * Routines to indentify caches on Intel CPU.
*
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
+ * Changes:
+ * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
@@ -13,6 +13,7 @@
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/smp.h>
@@ -130,9 +131,18 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};
+#ifdef CONFIG_PCI
+static struct pci_device_id k8_nb_id[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
+};
+#endif
+
unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
- union _cpuid4_leaf_ebx *ebx,
- union _cpuid4_leaf_ecx *ecx)
+static void __cpuinit
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
{
unsigned dummy;
unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
(ebx->split.ways_of_associativity + 1) - 1;
}
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned edx;
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
+
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
this_leaf->eax = eax;
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
- this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
@@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
/* pointer to _cpuid4_info array (for each cache leaf) */
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
this_leaf = CPUID4_INFO_IDX(cpu, index);
for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
- sibling_leaf = CPUID4_INFO_IDX(sibling, index);
+ sibling_leaf = CPUID4_INFO_IDX(sibling, index);
cpu_clear(cpu, sibling_leaf->shared_cpu_map);
}
}
@@ -572,7 +596,7 @@ struct _index_kobject {
/* pointer to array of kobjects for cpuX/cache/indexY */
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
}
}
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+#ifdef CONFIG_PCI
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ struct pci_dev *dev = NULL;
+ int i;
+
+ for (i = 0; i <= node; i++) {
+ do {
+ dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ } while (!pci_match_id(&k8_nb_id[0], dev));
+ if (!dev)
+ break;
+ }
+ return dev;
+}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = NULL;
+ ssize_t ret = 0;
+ int i;
+
+ if (!this_leaf->can_disable)
+ return sprintf(buf, "Feature not enabled\n");
+
+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 2; i++) {
+ unsigned int reg;
+
+ pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+ ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+ ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
+ buf,
+ reg & 0x80000000 ? "Disabled" : "Allowed",
+ reg & 0x40000000 ? "Disabled" : "Allowed");
+ ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
+ buf, (reg & 0x30000) >> 16, reg & 0xfff);
+ }
+ return ret;
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = NULL;
+ unsigned int ret, index, val;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ if (strlen(buf) > 15)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x %x", &index, &val);
+ if (ret != 2)
+ return -EINVAL;
+ if (index > 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+ return 1;
+}
+
struct _cache_attr {
struct attribute attr;
ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = {
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable.attr,
NULL
};
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
buf) :
- 0;
+ 0;
return ret;
}
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}
static struct sysfs_ops sysfs_ops = {
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 00000000000..dfea390e160
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+#
+# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
+#
+
+($in, $out) = @ARGV;
+
+open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
+open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
+
+print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+
+while (defined($line = <IN>)) {
+ if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
+ $macro = $1;
+ $feature = $2;
+ $tail = $3;
+ if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
+ $feature = $1;
+ }
+
+ if ($feature ne '') {
+ printf OUT "\t%-32s = \"%s\",\n",
+ "[$macro]", "\L$feature";
+ }
+ }
+}
+print OUT "};\n";
+
+close(IN);
+close(OUT);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a56..58ac5d3d436 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
mtrr_type type;
};
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
static int __init
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 00000000000..5abbea297e0
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
+/*
+ * Strings for the various x86 power flags
+ *
+ * This file must not contain any executable code.
+ */
+
+#include <asm/cpufeature.h>
+
+const char *const x86_power_flags[32] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
+};
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 8e9cd6a8ec1..6a44d646599 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -36,7 +36,6 @@
#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
-#include <linux/smp_lock.h>
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 15e6c6bc4a4..d3e524c8452 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -7,9 +7,8 @@
#include <linux/errno.h>
#include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
/**
* copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb144..6c9bfc9e1e9 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/hardirq.h>
+#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
+extern struct genapic apic_x2xpic_uv_x;
+extern struct genapic apic_x2apic_phys;
+extern struct genapic apic_x2apic_cluster;
struct genapic __read_mostly *genapic = &apic_flat;
-static enum uv_system_type uv_system_type;
+static struct genapic *apic_probe[] __initdata = {
+ &apic_x2apic_uv_x,
+ &apic_x2apic_phys,
+ &apic_x2apic_cluster,
+ &apic_physflat,
+ NULL,
+};
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
void __init setup_apic_routing(void)
{
- if (uv_system_type == UV_NON_UNIQUE_APIC)
- genapic = &apic_x2apic_uv_x;
- else
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
- genapic = &apic_physflat;
- else
-#endif
-
- if (max_physical_apicid < 8)
- genapic = &apic_flat;
- else
- genapic = &apic_physflat;
+ if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
+ if (!intr_remapping_enabled)
+ genapic = &apic_flat;
+ }
- printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+ if (genapic == &apic_flat) {
+ if (max_physical_apicid >= 8)
+ genapic = &apic_physflat;
+ printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+ }
}
/* Same for both flat and physical. */
-void send_IPI_self(int vector)
+void apic_send_IPI_self(int vector)
{
__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- if (!strcmp(oem_id, "SGI")) {
- if (!strcmp(oem_table_id, "UVL"))
- uv_system_type = UV_LEGACY_APIC;
- else if (!strcmp(oem_table_id, "UVX"))
- uv_system_type = UV_X2APIC;
- else if (!strcmp(oem_table_id, "UVH"))
- uv_system_type = UV_NON_UNIQUE_APIC;
+ int i;
+
+ for (i = 0; apic_probe[i]; ++i) {
+ if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+ genapic = apic_probe[i];
+ printk(KERN_INFO "Setting APIC routing to %s.\n",
+ genapic->name);
+ return 1;
+ }
}
return 0;
}
-
-unsigned int read_apic_id(void)
-{
- unsigned int id;
-
- WARN_ON(preemptible() && num_online_cpus() > 1);
- id = apic_read(APIC_ID);
- if (uv_system_type >= UV_X2APIC)
- id |= __get_cpu_var(x2apic_extra_bits);
- return id;
-}
-
-enum uv_system_type get_uv_system_type(void)
-{
- return uv_system_type;
-}
-
-int is_uv_system(void)
-{
- return uv_system_type != UV_NONE;
-}
-EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d3..9eca5ba7a6b 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
+#include <linux/hardirq.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <mach_apicdef.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 1;
+}
static cpumask_t flat_target_cpus(void)
{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = (((x)>>24) & 0xFFu);
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xFFu)<<24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ unsigned int id;
+
+ id = get_apic_id(apic_read(APIC_ID));
+ return id;
+}
+
static int flat_apic_id_registered(void)
{
- return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
}
static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
struct genapic apic_flat = {
.name = "flat",
+ .acpi_madt_oem_check = flat_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFu<<24),
};
/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
* We cannot use logical delivery in this case because the mask
* overflows, so use physical mode.
*/
+static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+ /*
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
+ */
+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+ return 1;
+#endif
+
+ return 0;
+}
static cpumask_t physflat_target_cpus(void)
{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
struct genapic apic_physflat = {
.name = "physical flat",
+ .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFu<<24),
};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 00000000000..fed9f68efd6
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (cpu_has_x2apic)
+ return 1;
+
+ return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+ cpumask_t domain = CPU_MASK_NONE;
+ cpu_set(cpu, domain);
+ return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+ unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+ for_each_cpu_mask(query_cpu, mask) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = x;
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = id;
+ return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+ return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ return x2apic_read_id() >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+ return;
+}
+
+struct genapic apic_x2apic_cluster = {
+ .name = "cluster x2apic",
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .int_delivery_mode = dest_LowestPrio,
+ .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+ .target_cpus = x2apic_target_cpus,
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .apic_id_registered = x2apic_apic_id_registered,
+ .init_apic_ldr = init_x2apic_ldr,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 00000000000..958d537b4cc
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+static int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+ x2apic_phys = 1;
+ return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (cpu_has_x2apic && x2apic_phys)
+ return 1;
+
+ return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+ cpumask_t domain = CPU_MASK_NONE;
+ cpu_set(cpu, domain);
+ return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+ unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+ for_each_cpu_mask(query_cpu, mask) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = x;
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = id;
+ return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+ return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ return x2apic_read_id() >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+void init_x2apic_ldr(void)
+{
+ return;
+}
+
+struct genapic apic_x2apic_phys = {
+ .name = "physical x2apic",
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .int_delivery_mode = dest_Fixed,
+ .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+ .target_cpus = x2apic_target_cpus,
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .apic_id_registered = x2apic_apic_id_registered,
+ .init_apic_ldr = init_x2apic_ldr,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16b..99269beabc7 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
-#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
@@ -26,6 +26,35 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+static enum uv_system_type uv_system_type;
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strcmp(oem_id, "SGI")) {
+ if (!strcmp(oem_table_id, "UVL"))
+ uv_system_type = UV_LEGACY_APIC;
+ else if (!strcmp(oem_table_id, "UVX"))
+ uv_system_type = UV_X2APIC;
+ else if (!strcmp(oem_table_id, "UVH")) {
+ uv_system_type = UV_NON_UNIQUE_APIC;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+ return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+ return uv_system_type != UV_NONE;
+}
+
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
return 1;
}
+static void uv_init_apic_ldr(void)
+{
+}
+
static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
{
int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
return BAD_APICID;
}
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ WARN_ON(preemptible() && num_online_cpus() > 1);
+ id = x | __get_cpu_var(x2apic_extra_bits);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ /* maskout x2apic_extra_bits ? */
+ x = id;
+ return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+ return get_apic_id(apic_read(APIC_ID));
+}
+
static unsigned int phys_pkg_id(int index_msb)
{
- return GET_APIC_ID(read_apic_id()) >> index_msb;
+ return uv_read_apic_id() >> index_msb;
}
#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
struct genapic apic_x2apic_uv_x = {
.name = "UV large system",
+ .acpi_madt_oem_check = uv_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = uv_target_cpus,
.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
.apic_id_registered = uv_apic_id_registered,
+ .init_apic_ldr = uv_init_apic_ldr,
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
/* ZZZ.send_IPI_self = uv_send_IPI_self, */
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
};
static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +464,5 @@ void __cpuinit uv_cpu_init(void)
if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
set_x2apic_extra_bits(uv_hub_info->pnode);
}
+
+
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb8..45723f1fe19 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
# include <asm/sigcontext32.h>
# include <asm/user32.h>
#else
-# define save_i387_ia32 save_i387
-# define restore_i387_ia32 restore_i387
+# define save_i387_xstate_ia32 save_i387_xstate
+# define restore_i387_xstate_ia32 restore_i387_xstate
# define _fpstate_ia32 _fpstate
+# define _xstate_ia32 _xstate
+# define sig_xstate_ia32_size sig_xstate_size
+# define fx_sw_reserved_ia32 fx_sw_reserved
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
#endif
@@ -36,6 +39,7 @@
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
+unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
return;
}
+ if (cpu_has_xsave) {
+ xsave_cntxt_init();
+ return;
+ }
+
if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
+ xsave_init();
+
mxcsr_feature_mask_init();
/* clean state in init */
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
}
#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
*/
target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
+ /*
+ * update the header bits in the xsave header, indicating the
+ * presence of FP and SSE state.
+ */
+ if (cpu_has_xsave)
+ target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
return ret;
}
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!ret)
convert_to_fxsr(target, &env);
+ /*
+ * update the header bit in the xsave header, indicating the
+ * presence of FP.
+ */
+ if (cpu_has_xsave)
+ target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
return ret;
}
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
struct task_struct *tsk = current;
struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
- unlazy_fpu(tsk);
fp->status = fp->swd;
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
struct user_i387_ia32_struct env;
int err = 0;
- unlazy_fpu(tsk);
-
convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env)))
return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
if (err)
return -1;
- if (__copy_to_user(&buf->_fxsr_env[0], fx,
- sizeof(struct i387_fxsave_struct)))
+ if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
return -1;
return 1;
}
-int save_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int save_i387_xsave(void __user *buf)
+{
+ struct _fpstate_ia32 __user *fx = buf;
+ int err = 0;
+
+ if (save_i387_fxsave(fx) < 0)
+ return -1;
+
+ err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
+ sizeof(struct _fpx_sw_bytes));
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 __user *) (buf + sig_xstate_ia32_size
+ - FP_XSTATE_MAGIC2_SIZE));
+ if (err)
+ return -1;
+
+ return 1;
+}
+
+int save_i387_xstate_ia32(void __user *buf)
{
+ struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
+ struct task_struct *tsk = current;
+
if (!used_math())
return 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
+ return -EACCES;
/*
* This will cause a "finit" to be triggered by the next
* attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
if (!HAVE_HWFP) {
return fpregs_soft_get(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) ? -1 : 1;
+ NULL, fp) ? -1 : 1;
}
+ unlazy_fpu(tsk);
+
+ if (cpu_has_xsave)
+ return save_i387_xsave(fp);
if (cpu_has_fxsr)
- return save_i387_fxsave(buf);
+ return save_i387_fxsave(fp);
else
- return save_i387_fsave(buf);
+ return save_i387_fsave(fp);
}
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
sizeof(struct i387_fsave_struct));
}
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
+ unsigned int size)
{
struct task_struct *tsk = current;
struct user_i387_ia32_struct env;
int err;
err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct));
+ size);
/* mxcsr reserved bits must be masked to zero for security reasons */
tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
return 0;
}
-int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int restore_i387_xsave(void __user *buf)
+{
+ struct _fpx_sw_bytes fx_sw_user;
+ struct _fpstate_ia32 __user *fx_user =
+ ((struct _fpstate_ia32 __user *) buf);
+ struct i387_fxsave_struct __user *fx =
+ (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
+ struct xsave_hdr_struct *xsave_hdr =
+ &current->thread.xstate->xsave.xsave_hdr;
+ u64 mask;
+ int err;
+
+ if (check_for_xstate(fx, buf, &fx_sw_user))
+ goto fx_only;
+
+ mask = fx_sw_user.xstate_bv;
+
+ err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
+
+ xsave_hdr->xstate_bv &= pcntxt_mask;
+ /*
+ * These bits must be zero.
+ */
+ xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+ /*
+ * Init the state that is not present in the memory layout
+ * and enabled by the OS.
+ */
+ mask = ~(pcntxt_mask & ~mask);
+ xsave_hdr->xstate_bv &= mask;
+
+ return err;
+fx_only:
+ /*
+ * Couldn't find the extended state information in the memory
+ * layout. Restore the FP/SSE and init the other extended state
+ * enabled by the OS.
+ */
+ xsave_hdr->xstate_bv = XSTATE_FPSSE;
+ return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
+}
+
+int restore_i387_xstate_ia32(void __user *buf)
{
int err;
struct task_struct *tsk = current;
+ struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
if (HAVE_HWFP)
clear_fpu(tsk);
+ if (!buf) {
+ if (used_math()) {
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+
+ return 0;
+ } else
+ if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
+ return -EACCES;
+
if (!used_math()) {
err = init_fpu(tsk);
if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
}
if (HAVE_HWFP) {
- if (cpu_has_fxsr)
- err = restore_i387_fxsave(buf);
+ if (cpu_has_xsave)
+ err = restore_i387_xsave(buf);
+ else if (cpu_has_fxsr)
+ err = restore_i387_fxsave(fp, sizeof(struct
+ i387_fxsave_struct));
else
- err = restore_i387_fsave(buf);
+ err = restore_i387_fsave(fp);
} else {
err = fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) != 0;
+ NULL, fp) != 0;
}
set_used_math();
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d920..4b8a53d841f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
device_initcall(i8259A_init_sysfs);
+void mask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
void init_8259A(int auto_eoi)
{
unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec..26ea3ea3fb3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,6 +46,7 @@
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
+#include <asm/setup.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -1490,7 +1491,7 @@ void /*__init*/ print_local_APIC(void *dummy)
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
- GET_APIC_ID(read_apic_id()));
+ GET_APIC_ID(v));
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
@@ -1698,8 +1699,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest.physical.physical_dest =
- GET_APIC_ID(read_apic_id());
+ entry.dest.physical.physical_dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1725,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
-#ifdef CONFIG_X86_NUMAQ
- if (found_numaq)
+ if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
return;
-#endif
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18..e63282e7886 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
#include <acpi/acpi_bus.h>
#endif
#include <linux/bootmem.h>
+#include <linux/dmar.h>
#include <asm/idle.h>
#include <asm/io.h>
@@ -49,6 +50,7 @@
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
#include <mach_ipi.h>
#include <mach_apic.h>
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
*/
int nr_ioapic_registers[MAX_IO_APICS];
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
/* I/O APIC entries */
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
pin = entry->pin;
if (pin == -1)
break;
- io_apic_write(apic, 0x11 + pin*2, dest);
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+ int apic, pin;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(apic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ early_ioapic_entries[apic] =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ nr_ioapic_registers[apic], GFP_KERNEL);
+ if (!early_ioapic_entries[apic])
+ return -ENOMEM;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = early_ioapic_entries[apic][pin] =
+ ioapic_read_entry(apic, pin);
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ }
+ }
+ return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ ioapic_write_entry(apic, pin,
+ early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+ /*
+ * for now plain restore of previous settings.
+ * TBD: In the case of OS enabling interrupt-remapping,
+ * IO-APIC RTE's need to be setup to point to interrupt-remapping
+ * table entries. for now, do a plain restore, and wait for
+ * the setup_IO_APIC_irqs() to do proper initialization.
+ */
+ restore_IO_APIC_setup();
+}
+
int skip_ioapic_setup;
int ioapic_force;
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
}
static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
static void ioapic_register_intr(int irq, unsigned long trigger)
{
- if (trigger) {
+ if (trigger)
irq_desc[irq].status |= IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq, "fasteoi");
- } else {
+ else
irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_edge_irq, "edge");
+ return;
+ }
+#endif
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector)
+{
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+ struct irte irte;
+ struct IR_IO_APIC_route_entry *ir_entry =
+ (struct IR_IO_APIC_route_entry *) entry;
+ int index;
+
+ if (!iommu)
+ panic("No mapping iommu for ioapic %d\n", apic);
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+ memset(&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = trigger;
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = vector;
+ irte.dest_id = IRTE_DEST(destination);
+
+ modify_irte(irq, &irte);
+
+ ir_entry->index2 = (index >> 15) & 0x1;
+ ir_entry->zero = 0;
+ ir_entry->format = 1;
+ ir_entry->index = (index & 0x7fff);
+ } else
+#endif
+ {
+ entry->delivery_mode = INT_DELIVERY_MODE;
+ entry->dest_mode = INT_DEST_MODE;
+ entry->dest = destination;
}
+
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = trigger;
+ entry->polarity = polarity;
+ entry->vector = vector;
+
+ /* Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (trigger)
+ entry->mask = 1;
+ return 0;
}
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
irq, trigger, polarity);
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest = cpu_mask_to_apicid(mask);
- entry.mask = 0; /* enable IRQ */
- entry.trigger = trigger;
- entry.polarity = polarity;
- entry.vector = cfg->vector;
-
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (trigger)
- entry.mask = 1;
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ cpu_mask_to_apicid(mask), trigger, polarity,
+ cfg->vector)) {
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mp_ioapics[apic].mp_apicid, pin);
+ __clear_irq_vector(irq);
+ return;
+ }
ioapic_register_intr(irq, trigger);
if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
{
struct IO_APIC_route_entry entry;
+ if (intr_remapping_enabled)
+ return;
+
memset(&entry, 0, sizeof(entry));
/*
@@ -1090,6 +1237,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
void __apicdebuginit print_local_APIC(void * dummy)
{
unsigned int v, ver, maxlvt;
+ unsigned long icr;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1097,7 +1245,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
@@ -1133,10 +1281,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
v = apic_read(APIC_ESR);
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+ icr = apic_icr_read();
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1291,7 +1438,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest = GET_APIC_ID(read_apic_id());
+ entry.dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_desc *desc = irq_desc + irq;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+ int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+ unsigned int dest;
+ unsigned long flags;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ if (modify_ioapic_rte) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * Modified the IRTE and flushes the Interrupt entry cache.
+ */
+ modify_irte(irq, &irte);
+
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+ int ret = -1;
+
+ mask_IO_APIC_irq(irq);
+
+ if (io_apic_level_ack_pending(irq)) {
+ /*
+ * Interrupt in progress. Migrating irq now will change the
+ * vector information in the IO-APIC RTE and that will confuse
+ * the EOI broadcast performed by cpu.
+ * So, delay the irq migration to the next instance.
+ */
+ schedule_delayed_work(&ir_migration_work, 1);
+ goto unmask;
+ }
+
+ /* everthing is clear. we have right of way */
+ migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+ ret = 0;
+ irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+ cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+ unmask_IO_APIC_irq(irq);
+ return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+ int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ struct irq_desc *desc = irq_desc + irq;
+ if (desc->status & IRQ_MOVE_PENDING) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->chip->set_affinity ||
+ !(desc->status & IRQ_MOVE_PENDING)) {
+ desc->status &= ~IRQ_MOVE_PENDING;
+ spin_unlock_irqrestore(&desc->lock, flags);
+ continue;
+ }
+
+ desc->chip->set_affinity(irq,
+ irq_desc[irq].pending_mask);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+ }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ if (irq_desc[irq].status & IRQ_LEVEL) {
+ irq_desc[irq].status |= IRQ_MOVE_PENDING;
+ irq_desc[irq].pending_mask = mask;
+ migrate_irq_remapped_level(irq);
+ return;
+ }
+
+ migrate_ioapic_irq(irq, mask);
+}
+#endif
+
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -1453,6 +1741,17 @@ static void irq_complete_move(unsigned int irq)
#else
static inline void irq_complete_move(unsigned int irq) {}
#endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+#endif
static void ack_apic_edge(unsigned int irq)
{
@@ -1527,6 +1826,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
.retrigger = ioapic_retrigger_irq,
};
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+ .name = "IR-IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_x2apic_edge,
+ .eoi = ack_x2apic_level,
+#ifdef CONFIG_SMP
+ .set_affinity = set_ir_ioapic_affinity_irq,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+#endif
+
static inline void init_IO_APIC_traps(void)
{
int irq;
@@ -1712,6 +2026,8 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
+ if (intr_remapping_enabled)
+ panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -1738,6 +2054,8 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
+ if (intr_remapping_enabled)
+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1977,6 +2295,9 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq);
+#ifdef CONFIG_INTR_REMAP
+ free_irte(irq);
+#endif
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2316,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
tmp = TARGET_CPUS;
err = assign_irq_vector(irq, tmp);
- if (!err) {
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ if (err)
+ return err;
+
+ cpus_and(tmp, cfg->domain, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irte irte;
+ int ir_index;
+ u16 sub_handle;
+
+ ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+ BUG_ON(ir_index == -1);
+
+ memset (&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = 0; /* edge */
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ modify_irte(irq, &irte);
msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->data = sub_handle;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(ir_index) |
+ MSI_ADDR_IR_INDEX2(ir_index);
+ } else
+#endif
+ {
+ msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
MSI_ADDR_BASE_LO |
((INT_DEST_MODE == 0) ?
@@ -2049,6 +2401,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
write_msi_msg(irq, &msg);
irq_desc[irq].affinity = mask;
}
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned int dest;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * atomically update the IRTE with the new destination and vector.
+ */
+ modify_irte(irq, &irte);
+
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+#endif
#endif /* CONFIG_SMP */
/*
@@ -2066,26 +2467,157 @@ static struct irq_chip msi_chip = {
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+ .name = "IR-PCI-MSI",
+ .unmask = unmask_msi_irq,
+ .mask = mask_msi_irq,
+ .ack = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+ struct intel_iommu *iommu;
+ int index;
+
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ printk(KERN_ERR
+ "Unable to map PCI %s to iommu\n", pci_name(dev));
+ return -ENOENT;
+ }
+
+ index = alloc_irte(iommu, irq, nvec);
+ if (index < 0) {
+ printk(KERN_ERR
+ "Unable to allocate %d IRTE for PCI %s\n", nvec,
+ pci_name(dev));
+ return -ENOSPC;
+ }
+ return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
{
+ int ret;
struct msi_msg msg;
+
+ ret = msi_compose_msg(dev, irq, &msg);
+ if (ret < 0)
+ return ret;
+
+ set_irq_msi(irq, desc);
+ write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irq_desc *desc = irq_desc + irq;
+ /*
+ * irq migration in process context
+ */
+ desc->status |= IRQ_MOVE_PCNTXT;
+ set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+ } else
+#endif
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+ return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
int irq, ret;
+
irq = create_irq();
if (irq < 0)
return irq;
- ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ ret = msi_alloc_irte(dev, irq, 1);
+ if (ret < 0)
+ goto error;
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
}
+ return 0;
- set_irq_msi(irq, desc);
- write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+ destroy_irq(irq);
+ return ret;
+#endif
+}
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, ret, sub_handle;
+ struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+ struct intel_iommu *iommu = 0;
+ int index = 0;
+#endif
+ sub_handle = 0;
+ list_for_each_entry(desc, &dev->msi_list, list) {
+ irq = create_irq();
+ if (irq < 0)
+ return irq;
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ if (!sub_handle) {
+ /*
+ * allocate the consecutive block of IRTE's
+ * for 'nvec'
+ */
+ index = msi_alloc_irte(dev, irq, nvec);
+ if (index < 0) {
+ ret = index;
+ goto error;
+ }
+ } else {
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ ret = -ENOENT;
+ goto error;
+ }
+ /*
+ * setup the mapping between the irq and the IRTE
+ * base index, the sub_handle pointing to the
+ * appropriate interrupt remap table entry.
+ */
+ set_irte_irq(irq, iommu, index, sub_handle);
+ }
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
+ if (ret < 0)
+ goto error;
+ sub_handle++;
+ }
return 0;
+
+error:
+ destroy_irq(irq);
+ return ret;
}
void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2865,10 @@ void __init setup_ioapic_dest(void)
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+ else if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
else
set_ioapic_affinity_irq(irq, TARGET_CPUS);
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a31c8..19191430274 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <asm/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b669d..f1c688e46f3 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
#ifdef CONFIG_X86_32
#include <mach_apic.h>
+#include <mach_ipi.h>
+
/*
* the following functions deal with sending IPIs between CPUs.
*
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
}
/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
static int convert_apicid_to_cpu(int apic_id)
{
int i;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1cf8c1fcc08..b71e02d42f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -325,7 +325,7 @@ skip:
for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).irq_call_count);
- seq_printf(p, " function call interrupts\n");
+ seq_printf(p, " Function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1f78b238d8d..f065fe9071b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -129,7 +129,7 @@ skip:
seq_printf(p, "CAL: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
- seq_printf(p, " function call interrupts\n");
+ seq_printf(p, " Function call interrupts\n");
seq_printf(p, "TLB: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4..0ed5f939b90 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
+#include <asm/syscalls.h>
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725c..f98f4e1dba0 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
generic_bigsmp_probe();
#endif
+#ifdef CONFIG_X86_32
setup_apic_routing();
+#endif
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f01..4caff39078e 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
}
}
+static int __init numaq_setup_ioapic_ids(void)
+{
+ /* so can skip it */
+ return 1;
+}
+
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
+ .setup_ioapic_ids = numaq_setup_ioapic_ids,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17e61c..4090cd6f843 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = native_apic_write,
- .apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 58262218781..9fe644f4861 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
start = start_##ops##_##x; \
end = end_##ops##_##x; \
goto patch_site
- switch(type) {
+ switch (type) {
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, restore_fl);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 87d4d6964ec..f704cb51ff8 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void)
* using 512M as goal
*/
align = 64ULL<<20;
- size = round_up(dma32_bootmem_size, align);
+ size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
if (dma32_bootmem_ptr)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0b..2c9abc95e02 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,8 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/kdebug.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2..7502508a766 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,11 +37,11 @@
#include <linux/kdebug.h>
#include <linux/tick.h>
#include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
-#include <asm/io.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/idle.h>
+#include <asm/syscalls.h>
asmlinkage extern void ret_from_fork(void);
@@ -88,7 +89,7 @@ void exit_idle(void)
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
-#include <asm/nmi.h>
+#include <linux/nmi.h>
/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
@@ -151,7 +152,7 @@ void cpu_idle(void)
}
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -160,59 +161,61 @@ void __show_regs(struct pt_regs * regs)
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
- regs->flags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
+ regs->sp, regs->flags);
+ printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
regs->dx, regs->si, regs->di);
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+ printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
regs->bp, regs->r8, regs->r9);
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
+ printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+ regs->r10, regs->r11, regs->r12);
+ printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+ regs->r13, regs->r14, regs->r15);
+
+ asm("movl %%ds,%0" : "=r" (ds));
+ asm("movl %%cs,%0" : "=r" (cs));
+ asm("movl %%es,%0" : "=r" (es));
asm("movl %%fs,%0" : "=r" (fsindex));
asm("movl %%gs,%0" : "=r" (gsindex));
rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrl(MSR_GS_BASE, gs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
- printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
- printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+ printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs, fsindex, gs, gsindex, shadowgs);
+ printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+ es, cr0);
+ printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+ cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- printk("CPU %d:", smp_processor_id());
+ printk(KERN_INFO "CPU %d:", smp_processor_id());
__show_regs(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -313,10 +316,10 @@ void prepare_to_copy(struct task_struct *tsk)
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
+ struct task_struct *p, struct pt_regs *regs)
{
int err;
- struct pt_regs * childregs;
+ struct pt_regs *childregs;
struct task_struct *me = current;
childregs = ((struct pt_regs *)
@@ -361,10 +364,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
if (test_thread_flag(TIF_IA32))
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
- else
-#endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
+ else
+#endif
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+ if (err)
goto out;
}
err = 0;
@@ -543,7 +546,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
unsigned fsindex, gsindex;
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter>5)
+ if (next_p->fpu_counter > 5)
prefetch(next->xstate);
/*
@@ -551,13 +554,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
load_sp0(tss, next);
- /*
+ /*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
- loadsegment(es, next->es);
+ loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
@@ -583,7 +586,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_leave_lazy_cpu_mode();
- /*
+ /*
* Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
@@ -592,13 +595,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
- /*
+ /*
* Check if the user used a selector != 0; if yes
* clear 64bit base, since overloaded base is always
* mapped to the Null selector
*/
if (fsindex)
- prev->fs = 0;
+ prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
@@ -608,7 +611,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (gsindex)
- prev->gs = 0;
+ prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -617,12 +620,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* Must be after DS reload */
unlazy_fpu(prev_p);
- /*
+ /*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ write_pda(pcurrent, next_p);
write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) +
@@ -663,7 +666,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
char __user * __user *envp, struct pt_regs *regs)
{
long error;
- char * filename;
+ char *filename;
filename = getname(name);
error = PTR_ERR(filename);
@@ -721,55 +724,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp,ip;
+ u64 fp, ip;
int count = 0;
- if (!p || p == current || p->state==TASK_RUNNING)
- return 0;
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
stack = (unsigned long)task_stack_page(p);
if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.sp);
- do {
+ do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
+ return 0;
ip = *(u64 *)(fp+8);
if (!in_sched_functions(ip))
return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
+ fp = *(u64 *)fp;
+ } while (count++ < 16);
return 0;
}
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{
- int ret = 0;
+{
+ int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (code) {
case ARCH_SET_GS:
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, GS_TLS, addr);
+ if (doit) {
load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
+ load_gs_index(GS_TLS_SEL);
}
- task->thread.gsindex = GS_TLS_SEL;
+ task->thread.gsindex = GS_TLS_SEL;
task->thread.gs = 0;
- } else {
+ } else {
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
load_gs_index(0);
ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
- }
+ }
}
put_cpu();
break;
@@ -823,8 +826,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gs;
- }
- else
+ } else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccce85d..9e43a48ad6e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -14,6 +14,7 @@
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
+#include <linux/tracehook.h>
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/security.h>
@@ -69,7 +70,7 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
@@ -1375,30 +1376,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
force_sig_info(SIGTRAP, &info, tsk);
}
-static void syscall_trace(struct pt_regs *regs)
-{
- if (!(current->ptrace & PT_PTRACED))
- return;
-
-#if 0
- printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
- current->comm,
- regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
- current_thread_info()->flags, current->ptrace);
-#endif
-
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
-}
#ifdef CONFIG_X86_32
# define IS_IA32 1
@@ -1432,8 +1409,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
ret = -1L;
- if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
- syscall_trace(regs);
+ if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+ tracehook_report_syscall_entry(regs))
+ ret = -1L;
if (unlikely(current->audit_context)) {
if (IS_IA32)
@@ -1459,7 +1437,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if (test_thread_flag(TIF_SYSCALL_TRACE))
- syscall_trace(regs);
+ tracehook_report_syscall_exit(regs, 0);
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1453,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
* system call instruction.
*/
if (test_thread_flag(TIF_SINGLESTEP) &&
- (current->ptrace & PT_PTRACED))
+ tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
send_sigtrap(current, regs, 0);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9838f2539df..c6b9330c1bf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -742,6 +742,8 @@ void __init setup_arch(char **cmdline_p)
#else
num_physpages = max_pfn;
+ if (cpu_has_x2apic)
+ check_x2apic();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb519d2d..cc673aa55ce 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
char __user *pretcode;
int sig;
struct sigcontext sc;
- struct _fpstate fpstate;
+ /*
+ * fpstate is unused. fpstate is moved/allocated after
+ * retcode[] below. This movement allows to have the FP state and the
+ * future state extensions (xsave) stay together.
+ * And at the same time retaining the unused fpstate, prevents changing
+ * the offset of extramask[] in the sigframe and thus prevent any
+ * legacy application accessing/modifying it.
+ */
+ struct _fpstate fpstate_unused;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
+ /* fp state follows here */
};
struct rt_sigframe {
@@ -15,13 +24,19 @@ struct rt_sigframe {
void __user *puc;
struct siginfo info;
struct ucontext uc;
- struct _fpstate fpstate;
char retcode[8];
+ /* fp state follows here */
};
#else
struct rt_sigframe {
char __user *pretcode;
struct ucontext uc;
struct siginfo info;
+ /* fp state follows here */
};
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs *regs);
#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcdd893..da3cf3270f8 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/tracehook.h>
#include <linux/elf.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -26,6 +27,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
@@ -159,28 +162,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
}
{
- struct _fpstate __user *buf;
+ void __user *buf;
err |= __get_user(buf, &sc->fpstate);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
-
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
+ err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax);
return err;
-
-badframe:
- return 1;
}
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -226,9 +215,8 @@ badframe:
return 0;
}
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+static long do_rt_sigreturn(struct pt_regs *regs)
{
- struct pt_regs *regs = (struct pt_regs *)&__unused;
struct rt_sigframe __user *frame;
unsigned long ax;
sigset_t set;
@@ -254,15 +242,22 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
return ax;
badframe:
- force_sig(SIGSEGV, current);
+ signal_fault(regs, frame, "rt_sigreturn");
return 0;
}
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+ struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+ return do_rt_sigreturn(regs);
+}
+
/*
* Set up a signal frame.
*/
static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
int tmp, err = 0;
@@ -289,7 +284,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
- tmp = save_i387(fpstate);
+ tmp = save_i387_xstate(fpstate);
if (tmp < 0)
err = 1;
else
@@ -306,7 +301,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
* Determine which stack to use..
*/
static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+ void **fpstate)
{
unsigned long sp;
@@ -332,6 +328,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
sp = (unsigned long) ka->sa.sa_restorer;
}
+ if (used_math()) {
+ sp = sp - sig_xstate_size;
+ *fpstate = (struct _fpstate *) sp;
+ }
+
sp -= frame_size;
/*
* Align the stack pointer according to the i386 ABI,
@@ -343,38 +344,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
}
static int
-setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
- struct pt_regs *regs)
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+ struct pt_regs *regs)
{
struct sigframe __user *frame;
void __user *restorer;
int err = 0;
- int usig;
+ void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
-
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
+ return -EFAULT;
- err = __put_user(usig, &frame->sig);
- if (err)
- goto give_sigsegv;
+ if (__put_user(sig, &frame->sig))
+ return -EFAULT;
- err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
- if (err)
- goto give_sigsegv;
+ if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+ return -EFAULT;
if (_NSIG_WORDS > 1) {
- err = __copy_to_user(&frame->extramask, &set->sig[1],
- sizeof(frame->extramask));
- if (err)
- goto give_sigsegv;
+ if (__copy_to_user(&frame->extramask, &set->sig[1],
+ sizeof(frame->extramask)))
+ return -EFAULT;
}
if (current->mm->context.vdso)
@@ -399,7 +391,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
@@ -414,50 +406,43 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0;
- int usig;
+ void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
+ return -EFAULT;
- usig = current_thread_info()->exec_domain
- && current_thread_info()->exec_domain->signal_invmap
- && sig < 32
- ? current_thread_info()->exec_domain->signal_invmap[sig]
- : sig;
-
- err |= __put_user(usig, &frame->sig);
+ err |= __put_user(sig, &frame->sig);
err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info);
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -477,12 +462,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->sp = (unsigned long)frame;
regs->ip = (unsigned long)ka->sa.sa_handler;
- regs->ax = (unsigned long)usig;
+ regs->ax = (unsigned long)sig;
regs->dx = (unsigned long)&frame->info;
regs->cx = (unsigned long)&frame->uc;
@@ -492,25 +477,48 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
/*
* OK, we're invoking a handler:
*/
static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ int ret;
+ int usig;
+
+ usig = current_thread_info()->exec_domain
+ && current_thread_info()->exec_domain->signal_invmap
+ && sig < 32
+ ? current_thread_info()->exec_domain->signal_invmap[sig]
+ : sig;
+
+ /* Set up the stack frame */
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ ret = __setup_rt_frame(usig, ka, info, set, regs);
+ else
+ ret = __setup_frame(usig, ka, set, regs);
+
+ if (ret) {
+ force_sigsegv(sig, current);
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs)
{
int ret;
/* Are we from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
- switch (regs->ax) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -537,11 +545,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- /* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = setup_frame(sig, ka, oldset, regs);
+ ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret)
return ret;
@@ -558,8 +562,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,9 +570,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
+ tracehook_signal_handler(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
+
return 0;
}
+#define NR_restart_syscall __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -623,9 +629,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if ((long)regs->orig_ax >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
- switch (regs->ax) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -634,7 +640,7 @@ static void do_signal(struct pt_regs *regs)
break;
case -ERESTART_RESTARTBLOCK:
- regs->ax = __NR_restart_syscall;
+ regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@@ -661,5 +667,26 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
+
clear_thread_flag(TIF_IRET);
}
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+ struct task_struct *me = current;
+
+ if (show_unhandled_signals && printk_ratelimit()) {
+ printk(KERN_INFO
+ "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ me->comm, me->pid, where, frame,
+ regs->ip, regs->sp, regs->orig_ax);
+ print_vma_addr(" in ", regs->ip);
+ printk(KERN_CONT "\n");
+ }
+
+ force_sig(SIGSEGV, me);
+}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5b742..bf77d4789a2 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -15,17 +15,21 @@
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/ptrace.h>
+#include <linux/tracehook.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/compiler.h>
+#include <linux/uaccess.h>
+
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,11 +45,6 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs);
-
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
@@ -54,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
}
/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
- sizeof(tsk->thread.xstate->fxsave));
-
- if ((unsigned long)buf % 16)
- printk("save_i387: bad fpstate %p\n", buf);
-
- if (!used_math())
- return 0;
- clear_used_math(); /* trigger finit */
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- err = save_i387_checking((struct i387_fxsave_struct __user *)
- buf);
- if (err)
- return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
- } else {
- if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
- sizeof(struct i387_fxsave_struct)))
- return -1;
- }
- return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err;
-
- if (!used_math()) {
- err = init_fpu(tsk);
- if (err)
- return err;
- }
-
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
- err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
- if (unlikely(err)) {
- /*
- * Encountered an error while doing the restore from the
- * user buffer, clear the fpu state.
- */
- clear_fpu(tsk);
- clear_used_math();
- }
- return err;
-}
-
-/*
* Do a signal return; undo the signal stack.
*/
static int
@@ -128,7 +64,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
+#define COPY(x) (err |= __get_user(regs->x, &sc->x))
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
@@ -158,34 +94,21 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
}
{
- struct _fpstate __user * buf;
- err |= __get_user(buf, &sc->fpstate);
+ void __user *buf;
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
+ err |= __get_user(buf, &sc->fpstate);
+ err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax);
return err;
-
-badframe:
- return 1;
}
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+static long do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
- sigset_t set;
unsigned long ax;
+ sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -198,7 +121,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
-
+
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
@@ -208,16 +131,22 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax;
badframe:
- signal_fault(regs,frame,"sigreturn");
+ signal_fault(regs, frame, "rt_sigreturn");
return 0;
-}
+}
+
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+ return do_rt_sigreturn(regs);
+}
/*
* Set up a signal frame.
*/
static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+ unsigned long mask, struct task_struct *me)
{
int err = 0;
@@ -269,41 +198,40 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
sp = current->sas_ss_sp + current->sas_ss_size;
}
- return (void __user *)round_down(sp - size, 16);
+ return (void __user *)round_down(sp - size, 64);
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
- struct _fpstate __user *fp = NULL;
+ void __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
- fp = get_stack(ka, regs, sizeof(struct _fpstate));
+ fp = get_stack(ka, regs, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
- if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
- goto give_sigsegv;
-
- if (save_i387(fp) < 0)
- err |= -1;
+ if (save_i387_xstate(fp) < 0)
+ return -EFAULT;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- goto give_sigsegv;
+ return -EFAULT;
- if (ka->sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, info);
- if (err)
- goto give_sigsegv;
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user(&frame->info, info))
+ return -EFAULT;
}
-
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
@@ -311,9 +239,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
+ if (sizeof(*set) == 16) {
__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+ __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
} else
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
@@ -324,15 +252,15 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else {
/* could use a vstub here */
- goto give_sigsegv;
+ return -EFAULT;
}
if (err)
- goto give_sigsegv;
+ return -EFAULT;
/* Set up registers for signal handler */
regs->di = sig;
- /* In case the signal handler was declared without prototypes */
+ /* In case the signal handler was declared without prototypes */
regs->ax = 0;
/* This also works for non SA_SIGINFO handlers because they expect the
@@ -348,44 +276,34 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
/*
- * Return -1L or the syscall number that @regs is executing.
+ * OK, we're invoking a handler
*/
-static long current_syscall(struct pt_regs *regs)
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
{
- /*
- * We always sign-extend a -1 value being set here,
- * so this is always either -1L or a syscall number.
- */
- return regs->orig_ax;
-}
+ int ret;
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error. This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
- /*
- * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
- * and will match correctly in comparisons.
- */
- return (int) regs->ax;
+ if (test_thread_flag(TIF_IA32)) {
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ ret = ia32_setup_rt_frame(sig, ka, info, set, regs);
+ else
+ ret = ia32_setup_frame(sig, ka, set, regs);
+ } else
#endif
- return regs->ax;
-}
+ ret = __setup_rt_frame(sig, ka, info, set, regs);
-/*
- * OK, we're invoking a handler
- */
+ if (ret) {
+ force_sigsegv(sig, current);
+ return -EFAULT;
+ }
+
+ return ret;
+}
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +312,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret;
/* Are we from a system call? */
- if (current_syscall(regs) >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
- switch (current_syscall_ret(regs)) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->ax = -EINTR;
@@ -423,50 +341,46 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32)) {
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
- else
- ret = ia32_setup_frame(sig, ka, oldset, regs);
- } else
-#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
- if (ret == 0) {
- /*
- * This has nothing to do with segment registers,
- * despite the name. This magic affects uaccess.h
- * macros' behavior. Reset it to the normal setting.
- */
- set_fs(USER_DS);
+ if (ret)
+ return ret;
- /*
- * Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
+ /*
+ * This has nothing to do with segment registers,
+ * despite the name. This magic affects uaccess.h
+ * macros' behavior. Reset it to the normal setting.
+ */
+ set_fs(USER_DS);
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~X86_EFLAGS_TF;
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- }
+ /*
+ * Clear the direction flag as per the ABI for function entry.
+ */
+ regs->flags &= ~X86_EFLAGS_DF;
- return ret;
+ /*
+ * Clear TF when entering the signal handler, but
+ * notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ regs->flags &= ~X86_EFLAGS_TF;
+
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ tracehook_signal_handler(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
+
+ return 0;
}
+#define NR_restart_syscall \
+ test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -518,9 +432,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (current_syscall(regs) >= 0) {
+ if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
- switch (current_syscall_ret(regs)) {
+ switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -528,9 +442,7 @@ static void do_signal(struct pt_regs *regs)
regs->ip -= 2;
break;
case -ERESTART_RESTARTBLOCK:
- regs->ax = test_thread_flag(TIF_IA32) ?
- __NR_ia32_restart_syscall :
- __NR_restart_syscall;
+ regs->ax = NR_restart_syscall;
regs->ip -= 2;
break;
}
@@ -558,17 +470,25 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
- struct task_struct *me = current;
+{
+ struct task_struct *me = current;
+
if (show_unhandled_signals && printk_ratelimit()) {
- printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
- me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+ printk(KERN_INFO
+ "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ me->comm, me->pid, where, frame,
+ regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
- printk("\n");
+ printk(KERN_CONT "\n");
}
- force_sig(SIGSEGV, me);
-}
+ force_sig(SIGSEGV, me);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f91..2ff0bbcd5bd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
@@ -123,13 +123,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
static atomic_t init_deasserted;
-static int boot_cpu_logical_apicid;
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
#endif
#ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = BAD_APICID };
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(read_apic_id());
+ phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT
"a previous APIC delivery may have failed\n");
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
timeout = 0;
do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt;
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+ apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
/*
* Turn INIT on target chip
*/
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/*
* Send IPI
*/
- apic_write(APIC_ICR,
- APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+ phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
pr_debug("Deasserting INIT.\n");
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Send IPI */
- apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+ phys_apicid);
/*
* Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
+#ifdef CONFIG_X86_64
+ enable_IR_x2apic();
+ setup_apic_routing();
+#endif
+
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
disable_smp();
@@ -1186,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
}
preempt_disable();
- if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+ if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+ read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
preempt_enable();
@@ -1313,16 +1313,13 @@ __init void prefill_possible_map(void)
if (!num_processors)
num_processors = 1;
-#ifdef CONFIG_HOTPLUG_CPU
if (additional_cpus == -1) {
if (disabled_cpus > 0)
additional_cpus = disabled_cpus;
else
additional_cpus = 0;
}
-#else
- additional_cpus = 0;
-#endif
+
possible = num_processors + additional_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044b..7b987852e87 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
#include <linux/init.h>
#include <asm/io.h>
#include <asm/bios_ebda.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/mpparse.h>
static struct rio_table_hdr *rio_table_hdr __initdata;
static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb855a6..1884a8d12bf 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
#include <linux/uaccess.h>
#include <linux/unistd.h>
+#include <asm/syscalls.h>
+
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef3381..6bc211accf0 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -13,15 +13,17 @@
#include <linux/utsname.h>
#include <linux/personality.h>
#include <linux/random.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/ia32.h>
+#include <asm/syscalls.h>
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off)
{
long error;
- struct file * file;
+ struct file *file;
error = -EINVAL;
if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
unmapped base down for this case. This can give
conflicts with the heap, but we assume that glibc
malloc knows how to fall back to mmap. Give it 1GB
- of playground for now. -AK */
- *begin = 0x40000000;
- *end = 0x80000000;
+ of playground for now. -AK */
+ *begin = 0x40000000;
+ *end = 0x80000000;
if (current->flags & PF_RANDOMIZE) {
new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
if (new_begin)
@@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
}
} else {
*begin = TASK_UNMAPPED_BASE;
- *end = TASK_SIZE;
+ *end = TASK_SIZE;
}
-}
+}
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_area_struct *vma;
unsigned long start_addr;
unsigned long begin, end;
-
+
if (flags & MAP_FIXED)
return addr;
- find_start_end(flags, &begin, &end);
+ find_start_end(flags, &begin, &end);
if (len > end)
return -ENOMEM;
@@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
&& len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
+ mm->cached_hole_size = 0;
mm->free_area_cache = begin;
}
addr = mm->free_area_cache;
- if (addr < begin)
- addr = begin;
+ if (addr < begin)
+ addr = begin;
start_addr = addr;
full_search:
@@ -127,7 +129,7 @@ full_search:
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
+ mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
}
@@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr-len);
if (!vma || addr <= vma->vm_start)
/* remember the address as a hint for next time */
- return (mm->free_area_cache = addr-len);
+ return mm->free_area_cache = addr-len;
}
if (mm->mmap_base < len)
@@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
vma = find_vma(mm, addr);
if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
- return (mm->free_area_cache = addr);
+ return mm->free_area_cache = addr;
/* remember the largest hole we saw so far */
if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@ bottomup:
}
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, utsname(), sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof(*name));
up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
+ if (personality(current->personality) == PER_LINUX32)
+ err |= copy_to_user(&name->machine, "i686", 5);
return err ? -EFAULT : 0;
}
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c1748..3d1be4f0fac 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
#define __NO_STUBS
#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#include <asm/unistd_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
typedef void (*sys_call_ptr_t)(void);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c664afc..bbecf8b6bf9 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/time.h>
+#include <asm/timer.h>
#include "do_timer.h"
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf375a30..6bb7b8579e7 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/proto.h>
+#include <asm/syscalls.h>
#include "tls.h"
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a..da5a5964fcc 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
- init_thread_xstate();
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caaca711..56d6f114778 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -32,6 +32,8 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
@@ -45,9 +47,6 @@
#include <asm/unwind.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
void printk_address(unsigned long address, int reliable)
{
- printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+ printk(" [<%016lx>] %s%pS\n",
+ address, reliable ? "" : "? ", (void *) address);
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
[STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+ [N_EXCEPTION_STACKS ...
+ N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
};
unsigned k;
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
}
/*
- * x86-64 can have up to three kernel stacks:
+ * x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+ unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!bp) {
if (task == current) {
/* Grab bp right from our regs */
- asm("movq %%rbp, %0" : "=r" (bp) :);
+ asm("movq %%rbp, %0" : "=r" (bp) : );
} else {
/* bp is the last reg pushed by switch_to */
bp = *(unsigned long *) task->thread.sp;
@@ -357,11 +358,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+ unsigned long *irqstack_end =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+ unsigned long *irqstack =
+ (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
- // debugging aid: "show_stack(NULL, NULL);" prints the
- // back trace for this cpu.
+ /*
+ * debugging aid: "show_stack(NULL, NULL);" prints the
+ * back trace for this cpu.
+ */
if (sp == NULL) {
if (task)
@@ -404,7 +409,7 @@ void dump_stack(void)
#ifdef CONFIG_FRAME_POINTER
if (!bp)
- asm("movq %%rbp, %0" : "=r" (bp):);
+ asm("movq %%rbp, %0" : "=r" (bp) : );
#endif
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@ void dump_stack(void)
init_utsname()->version);
show_trace(NULL, NULL, &stack, bp);
}
-
EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
@@ -493,7 +497,7 @@ unsigned __kprobes long oops_begin(void)
raw_local_irq_save(flags);
cpu = smp_processor_id();
if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
+ if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
__raw_spin_lock(&die_lock);
@@ -638,7 +642,7 @@ kernel_trap:
}
#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
@@ -648,7 +652,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
}
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
{ \
siginfo_t info; \
info.si_signo = signr; \
@@ -683,7 +687,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
preempt_conditional_cli(regs);
}
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
@@ -778,9 +782,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
}
static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+ NOTIFY_STOP)
return;
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
reason);
@@ -882,7 +887,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
else if (user_mode(eregs))
regs = task_pt_regs(current);
/* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
+ kernel process stack. */
else if (eregs->flags & X86_EFLAGS_IF)
regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
if (eregs != regs)
@@ -891,7 +896,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
}
/* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
unsigned long error_code)
{
struct task_struct *tsk = current;
@@ -1035,7 +1040,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
asmlinkage void bad_intr(void)
{
- printk("bad interrupt");
+ printk("bad interrupt");
}
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1052,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
conditional_sti(regs);
if (!user_mode(regs) &&
- kernel_math_error(regs, "kernel simd math error", 19))
+ kernel_math_error(regs, "kernel simd math error", 19))
return;
/*
@@ -1092,7 +1097,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
force_sig_info(SIGFPE, &info, task);
}
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
{
}
@@ -1134,7 +1139,7 @@ asmlinkage void math_state_restore(void)
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
- if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+ if (unlikely(restore_fpu_checking(me))) {
stts();
force_sig(SIGSEGV, me);
return;
@@ -1149,8 +1154,10 @@ void __init trap_init(void)
set_intr_gate(0, &divide_error);
set_intr_gate_ist(1, &debug, DEBUG_STACK);
set_intr_gate_ist(2, &nmi, NMI_STACK);
- set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
- set_system_gate(4, &overflow); /* int4 can be called from all */
+ /* int3 can be called from all */
+ set_system_gate_ist(3, &int3, DEBUG_STACK);
+ /* int4 can be called from all */
+ set_system_gate(4, &overflow);
set_intr_gate(5, &bounds);
set_intr_gate(6, &invalid_op);
set_intr_gate(7, &device_not_available);
@@ -1173,10 +1180,6 @@ void __init trap_init(void)
set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif
/*
- * initialize the per thread extended state:
- */
- init_thread_xstate();
- /*
* Should be a barrier for any external CPU state:
*/
cpu_init();
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 594ef47f0a6..61a97e616f7 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -25,45 +25,31 @@
#include <asm/visws/cobalt.h>
#include <asm/visws/piix4.h>
#include <asm/arch_hooks.h>
+#include <asm/io_apic.h>
#include <asm/fixmap.h>
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/e820.h>
-#include <asm/smp.h>
#include <asm/io.h>
#include <mach_ipi.h>
#include "mach_apic.h"
-#include <linux/init.h>
-#include <linux/smp.h>
-
#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/apic.h>
#include <asm/i8259.h>
#include <asm/irq_vectors.h>
-#include <asm/visws/cobalt.h>
#include <asm/visws/lithium.h>
-#include <asm/visws/piix4.h>
#include <linux/sched.h>
#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
extern int no_broadcast;
-#include <asm/io.h>
#include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
char visws_board_type = -1;
char visws_board_rev = -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566fa27d..4eeb5cf9720 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/irq.h>
+#include <asm/syscalls.h>
/*
* Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db5..61531d5c950 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- para_fill(pv_apic_ops.apic_read, APICRead);
- para_fill(pv_apic_ops.apic_write, APICWrite);
+ para_fill(apic_ops->read, APICRead);
+ para_fill(apic_ops->write, APICWrite);
#endif
/*
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 00000000000..07713d64deb
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/i387.h>
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/sigcontext32.h>
+#endif
+#include <asm/xcr.h>
+
+/*
+ * Supported feature mask by the CPU and the kernel.
+ */
+u64 pcntxt_mask;
+
+struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+struct _fpx_sw_bytes fx_sw_reserved_ia32;
+#endif
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+int check_for_xstate(struct i387_fxsave_struct __user *buf,
+ void __user *fpstate,
+ struct _fpx_sw_bytes *fx_sw_user)
+{
+ int min_xstate_size = sizeof(struct i387_fxsave_struct) +
+ sizeof(struct xsave_hdr_struct);
+ unsigned int magic2;
+ int err;
+
+ err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
+ sizeof(struct _fpx_sw_bytes));
+
+ if (err)
+ return err;
+
+ /*
+ * First Magic check failed.
+ */
+ if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
+ return -1;
+
+ /*
+ * Check for error scenarios.
+ */
+ if (fx_sw_user->xstate_size < min_xstate_size ||
+ fx_sw_user->xstate_size > xstate_size ||
+ fx_sw_user->xstate_size > fx_sw_user->extended_size)
+ return -1;
+
+ err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
+ fx_sw_user->extended_size -
+ FP_XSTATE_MAGIC2_SIZE));
+ /*
+ * Check for the presence of second magic word at the end of memory
+ * layout. This detects the case where the user just copied the legacy
+ * fpstate layout with out copying the extended state information
+ * in the memory layout.
+ */
+ if (err || magic2 != FP_XSTATE_MAGIC2)
+ return -1;
+
+ return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Signal frame handlers.
+ */
+
+int save_i387_xstate(void __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
+ return -EACCES;
+
+ BUG_ON(sig_xstate_size < xstate_size);
+
+ if ((unsigned long)buf % 64)
+ printk("save_i387_xstate: bad fpstate %p\n", buf);
+
+ if (!used_math())
+ return 0;
+ clear_used_math(); /* trigger finit */
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ /*
+ * Start with clearing the user buffer. This will present a
+ * clean context for the bytes not touched by the fxsave/xsave.
+ */
+ __clear_user(buf, sig_xstate_size);
+
+ if (task_thread_info(tsk)->status & TS_XSAVE)
+ err = xsave_user(buf);
+ else
+ err = fxsave_user(buf);
+
+ if (err)
+ return err;
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ } else {
+ if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+ xstate_size))
+ return -1;
+ }
+
+ if (task_thread_info(tsk)->status & TS_XSAVE) {
+ struct _fpstate __user *fx = buf;
+
+ err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 __user *) (buf + sig_xstate_size
+ - FP_XSTATE_MAGIC2_SIZE));
+ }
+
+ return 1;
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE
+ * state.
+ */
+int restore_user_xstate(void __user *buf)
+{
+ struct _fpx_sw_bytes fx_sw_user;
+ u64 mask;
+ int err;
+
+ if (((unsigned long)buf % 64) ||
+ check_for_xstate(buf, buf, &fx_sw_user))
+ goto fx_only;
+
+ mask = fx_sw_user.xstate_bv;
+
+ /*
+ * restore the state passed by the user.
+ */
+ err = xrestore_user(buf, mask);
+ if (err)
+ return err;
+
+ /*
+ * init the state skipped by the user.
+ */
+ mask = pcntxt_mask & ~mask;
+
+ xrstor_state(init_xstate_buf, mask);
+
+ return 0;
+
+fx_only:
+ /*
+ * couldn't find the extended state information in the
+ * memory layout. Restore just the FP/SSE and init all
+ * the other extended state.
+ */
+ xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
+ return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+int restore_i387_xstate(void __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ if (!buf) {
+ if (used_math())
+ goto clear;
+ return 0;
+ } else
+ if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
+ return -EACCES;
+
+ if (!used_math()) {
+ err = init_fpu(tsk);
+ if (err)
+ return err;
+ }
+
+ if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+ clts();
+ task_thread_info(current)->status |= TS_USEDFPU;
+ }
+ if (task_thread_info(tsk)->status & TS_XSAVE)
+ err = restore_user_xstate(buf);
+ else
+ err = fxrstor_checking((__force struct i387_fxsave_struct *)
+ buf);
+ if (unlikely(err)) {
+ /*
+ * Encountered an error while doing the restore from the
+ * user buffer, clear the fpu state.
+ */
+clear:
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+ return err;
+}
+#endif
+
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void prepare_fx_sw_frame(void)
+{
+ int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
+ FP_XSTATE_MAGIC2_SIZE;
+
+ sig_xstate_size = sizeof(struct _fpstate) + size_extended;
+
+#ifdef CONFIG_IA32_EMULATION
+ sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
+#endif
+
+ memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+
+ fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+ fx_sw_reserved.extended_size = sig_xstate_size;
+ fx_sw_reserved.xstate_bv = pcntxt_mask;
+ fx_sw_reserved.xstate_size = xstate_size;
+#ifdef CONFIG_IA32_EMULATION
+ memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+ fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
+#endif
+}
+
+/*
+ * Represents init state for the supported extended state.
+ */
+struct xsave_struct *init_xstate_buf;
+
+#ifdef CONFIG_X86_64
+unsigned int sig_xstate_size = sizeof(struct _fpstate);
+#endif
+
+/*
+ * Enable the extended processor state save/restore feature
+ */
+void __cpuinit xsave_init(void)
+{
+ if (!cpu_has_xsave)
+ return;
+
+ set_in_cr4(X86_CR4_OSXSAVE);
+
+ /*
+ * Enable all the features that the HW is capable of
+ * and the Linux kernel is aware of.
+ */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+void setup_xstate_init(void)
+{
+ init_xstate_buf = alloc_bootmem(xstate_size);
+ init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ */
+void __init xsave_cntxt_init(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ pcntxt_mask = eax + ((u64)edx << 32);
+
+ if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+ printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+ pcntxt_mask);
+ BUG();
+ }
+
+ /*
+ * for now OS knows only about FP/SSE
+ */
+ pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
+ xsave_init();
+
+ /*
+ * Recompute the context size for enabled features
+ */
+ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ xstate_size = ebx;
+
+ prepare_fx_sw_frame();
+
+ setup_xstate_init();
+
+ printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
+ "cntxt size 0x%x\n",
+ pcntxt_mask, xstate_size);
+}