summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorDave Kleikamp <shaggy@austin.ibm.com>2005-07-13 08:57:38 -0500
committerDave Kleikamp <shaggy@austin.ibm.com>2005-07-13 08:57:38 -0500
commitf7f24758ac98a506770bc5910d33567610fa3403 (patch)
treeff7fad3d01bf9dc2e2e54b908f9fca4891e1ee72 /arch/i386/kernel
parentb38a3ab3d1bb0dc3288f73903d4dc4672b5cd2d0 (diff)
parentc32511e2718618f0b53479eb36e07439aa363a74 (diff)
Merge with /home/shaggy/git/linus-clean/
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c277
-rw-r--r--arch/i386/kernel/acpi/cstate.c103
-rw-r--r--arch/i386/kernel/acpi/sleep.c27
-rw-r--r--arch/i386/kernel/acpi/wakeup.S5
-rw-r--r--arch/i386/kernel/apic.c82
-rw-r--r--arch/i386/kernel/apm.c13
-rw-r--r--arch/i386/kernel/cpu/common.c50
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c2
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/i386/kernel/cpu/intel.c14
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/k7.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p6.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c23
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c76
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/i386/kernel/crash.c223
-rw-r--r--arch/i386/kernel/dmi_scan.c391
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/head.S6
-rw-r--r--arch/i386/kernel/i8259.c12
-rw-r--r--arch/i386/kernel/io_apic.c45
-rw-r--r--arch/i386/kernel/irq.c72
-rw-r--r--arch/i386/kernel/kprobes.c133
-rw-r--r--arch/i386/kernel/machine_kexec.c226
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/process.c68
-rw-r--r--arch/i386/kernel/reboot.c82
-rw-r--r--arch/i386/kernel/relocate_kernel.S120
-rw-r--r--arch/i386/kernel/setup.c69
-rw-r--r--arch/i386/kernel/signal.c4
-rw-r--r--arch/i386/kernel/smp.c34
-rw-r--r--arch/i386/kernel/smpboot.c359
-rw-r--r--arch/i386/kernel/syscall_table.S7
-rw-r--r--arch/i386/kernel/sysenter.c12
-rw-r--r--arch/i386/kernel/time.c7
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/i386/kernel/timers/common.c2
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c4
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c4
-rw-r--r--arch/i386/kernel/timers/timer_pit.c4
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c5
-rw-r--r--arch/i386/kernel/traps.c51
-rw-r--r--arch/i386/kernel/vmlinux.lds.S62
51 files changed, 1989 insertions, 725 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 51ecd512603..4cc83b322b3 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index ee75cb286cf..5e291a20c03 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -2,3 +2,7 @@ obj-$(CONFIG_ACPI_BOOT) := boot.o
obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y += cstate.o
+endif
+
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 848bb97af7c..b7808a89d94 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -29,6 +29,7 @@
#include <linux/efi.h>
#include <linux/irq.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
@@ -158,9 +159,15 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
#endif
#ifdef CONFIG_PCI_MMCONFIG
-static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_mcfg *mcfg;
+ unsigned long i;
+ int config_size;
if (!phys_addr || !size)
return -EINVAL;
@@ -171,18 +178,38 @@ static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
return -ENODEV;
}
- if (mcfg->base_reserved) {
- printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+ /* how many config structures do we have */
+ pci_mmcfg_config_num = 0;
+ i = size - sizeof(struct acpi_table_mcfg);
+ while (i >= sizeof(struct acpi_table_mcfg_config)) {
+ ++pci_mmcfg_config_num;
+ i -= sizeof(struct acpi_table_mcfg_config);
+ };
+ if (pci_mmcfg_config_num == 0) {
+ printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
return -ENODEV;
}
- pci_mmcfg_base_addr = mcfg->base_address;
+ config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+ pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+ if (!pci_mmcfg_config) {
+ printk(KERN_WARNING PREFIX
+ "No memory for MCFG config tables\n");
+ return -ENOMEM;
+ }
+
+ memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+ for (i = 0; i < pci_mmcfg_config_num; ++i) {
+ if (mcfg->config[i].base_reserved) {
+ printk(KERN_ERR PREFIX
+ "MMCONFIG not in low 4GB of memory\n");
+ return -ENODEV;
+ }
+ }
return 0;
}
-#else
-#define acpi_parse_mcfg NULL
-#endif /* !CONFIG_PCI_MMCONFIG */
+#endif /* CONFIG_PCI_MMCONFIG */
#ifdef CONFIG_X86_LOCAL_APIC
static int __init
@@ -506,6 +533,22 @@ acpi_unmap_lsapic(int cpu)
EXPORT_SYMBOL(acpi_unmap_lsapic);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+int
+acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int
+acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
static unsigned long __init
acpi_scan_rsdp (
unsigned long start,
@@ -815,6 +858,219 @@ acpi_process_madt(void)
return;
}
+extern int acpi_force;
+
+#ifdef __i386__
+
+#ifdef CONFIG_ACPI_PCI
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+ d->ident);
+ acpi_noirq_set();
+ }
+ return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+ d->ident);
+ acpi_disable_pci();
+ }
+ return 0;
+}
+#endif
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
+ disable_acpi();
+ } else {
+ printk(KERN_NOTICE
+ "Warning: DMI blacklist says broken, but acpi forced\n");
+ }
+ return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
+ disable_acpi();
+ acpi_ht = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
+ }
+ return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@sourceforge.net
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+ /*
+ * Boxes that need ACPI disabled
+ */
+ {
+ .callback = dmi_disable_acpi,
+ .ident = "IBM Thinkpad",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+ },
+ },
+
+ /*
+ * Boxes that need acpi=ht
+ */
+ {
+ .callback = force_acpi_ht,
+ .ident = "FSC Primergy T850",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "DELL GX240",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "HP VISUALIZE NT Workstation",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "Compaq Workstation W8000",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P4B266",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS P2B-DS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ASUS CUR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "ABIT i440BX-W83977",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+ DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM Bladecenter",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eServer xSeries 360",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 330",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+ },
+ },
+ {
+ .callback = force_acpi_ht,
+ .ident = "IBM eserver xSeries 440",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+ },
+ },
+
+#ifdef CONFIG_ACPI_PCI
+ /*
+ * Boxes that need ACPI PCI IRQ routing disabled
+ */
+ {
+ .callback = disable_acpi_irq,
+ .ident = "ASUS A7V",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+ DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+ /* newer BIOS, Revision 1011, does work */
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
+ },
+ },
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+ */
+ { /* _BBN 0 bug */
+ .callback = disable_acpi_pci,
+ .ident = "ASUS PR-DLS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+ DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
+ DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+ },
+ },
+ {
+ .callback = disable_acpi_pci,
+ .ident = "Acer TravelMate 36x Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
+#endif
+ { }
+};
+
+#endif /* __i386__ */
+
/*
* acpi_boot_table_init() and acpi_boot_init()
* called from setup_arch(), always.
@@ -843,6 +1099,10 @@ acpi_boot_table_init(void)
{
int error;
+#ifdef __i386__
+ dmi_check_system(acpi_dmi_table);
+#endif
+
/*
* If acpi_disabled, bail out
* One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -870,8 +1130,6 @@ acpi_boot_table_init(void)
*/
error = acpi_blacklisted();
if (error) {
- extern int acpi_force;
-
if (acpi_force) {
printk(KERN_WARNING PREFIX "acpi=force override\n");
} else {
@@ -907,7 +1165,6 @@ int __init acpi_boot_init(void)
acpi_process_madt();
acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
- acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
return 0;
}
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
new file mode 100644
index 00000000000..4c3036ba65d
--- /dev/null
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -0,0 +1,103 @@
+/*
+ * arch/i386/kernel/acpi/cstate.c
+ *
+ * Copyright (C) 2005 Intel Corporation
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * - Added _PDC for SMP C-states on Intel CPUs
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include <acpi/processor.h>
+#include <asm/acpi.h>
+
+static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
+ *pow)
+{
+ struct acpi_object_list *obj_list;
+ union acpi_object *obj;
+ u32 *buf;
+
+ /* allocate and initialize pdc. It will be used later. */
+ obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
+ if (!obj_list) {
+ printk(KERN_ERR "Memory allocation error\n");
+ return;
+ }
+
+ obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+ if (!obj) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj_list);
+ return;
+ }
+
+ buf = kmalloc(12, GFP_KERNEL);
+ if (!buf) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj);
+ kfree(obj_list);
+ return;
+ }
+
+ buf[0] = ACPI_PDC_REVISION_ID;
+ buf[1] = 1;
+ buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = 12;
+ obj->buffer.pointer = (u8 *) buf;
+ obj_list->count = 1;
+ obj_list->pointer = obj;
+ pow->pdc = obj_list;
+
+ return;
+}
+
+/* Initialize _PDC data based on the CPU vendor */
+void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ pow->pdc = NULL;
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ acpi_processor_power_init_intel_pdc(pow);
+
+ return;
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_pdc);
+
+/*
+ * Initialize bm_flags based on the CPU cache properties
+ * On SMP it depends on cache configuration
+ * - When cache is not shared among all CPUs, we flush cache
+ * before entering C3.
+ * - When cache is shared among all CPUs, we use bm_check
+ * mechanism as in UP case
+ *
+ * This routine is called only after all the CPUs are online
+ */
+void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ flags->bm_check = 0;
+ if (num_online_cpus() == 1)
+ flags->bm_check = 1;
+ else if (c->x86_vendor == X86_VENDOR_INTEL) {
+ /*
+ * Today all CPUs that support C3 share cache.
+ * TBD: This needs to look at cache shared map, once
+ * multi-core detection patch makes to the base.
+ */
+ flags->bm_check = 1;
+ }
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 28bb0514bb6..c1af93032ff 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/dmi.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
@@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str)
__setup("acpi_sleep=", acpi_sleep_setup);
+
+
+static __init int reset_videomode_after_s3(struct dmi_system_id *d)
+{
+ acpi_video_flags |= 2;
+ return 0;
+}
+
+static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
+ { /* Reset video mode after returning from ACPI S3 sleep */
+ .callback = reset_videomode_after_s3,
+ .ident = "Toshiba Satellite 4030cdt",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+ },
+ },
+ { }
+};
+
+static int __init acpisleep_dmi_init(void)
+{
+ dmi_check_system(acpisleep_dmi_table);
+ return 0;
+}
+
+core_initcall(acpisleep_dmi_init);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 39d32484f6f..44d886c745e 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -74,8 +74,9 @@ wakeup_code:
movw %ax,%fs
movw $0x0e00 + 'i', %fs:(0x12)
- # need a gdt
- lgdt real_save_gdt - wakeup_code
+ # need a gdt -- use lgdtl to force 32-bit operands, in case
+ # the GDT is located past 16 megabytes.
+ lgdtl real_save_gdt - wakeup_code
movl real_save_cr0 - wakeup_code, %eax
movl %eax, %cr0
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8d993fa7175..bd1dbf3bd22 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/cpu.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -34,12 +35,18 @@
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
+#include <asm/i8253.h>
#include <mach_apic.h>
#include "io_ports.h"
/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
* Debug level
*/
int apic_verbosity;
@@ -205,7 +212,7 @@ void __init connect_bsp_APIC(void)
enable_apic_mode();
}
-void disconnect_bsp_APIC(void)
+void disconnect_bsp_APIC(int virt_wire_setup)
{
if (pic_mode) {
/*
@@ -219,6 +226,42 @@ void disconnect_bsp_APIC(void)
outb(0x70, 0x22);
outb(0x00, 0x23);
}
+ else {
+ /* Go back to Virtual Wire compatibility mode */
+ unsigned long value;
+
+ /* For the spurious interrupt use vector F, and enable it */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+ value |= 0xf;
+ apic_write_around(APIC_SPIV, value);
+
+ if (!virt_wire_setup) {
+ /* For LVT0 make it edge triggered, active high, external and enabled */
+ value = apic_read(APIC_LVT0);
+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+ apic_write_around(APIC_LVT0, value);
+ }
+ else {
+ /* Disable LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+ }
+
+ /* For LVT1 make it edge triggered, active high, nmi and enabled */
+ value = apic_read(APIC_LVT1);
+ value &= ~(
+ APIC_MODE_MASK | APIC_SEND_PENDING |
+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+ apic_write_around(APIC_LVT1, value);
+ }
}
void disable_local_APIC(void)
@@ -363,7 +406,7 @@ void __init init_bsp_APIC(void)
apic_write_around(APIC_LVT1, value);
}
-void __init setup_local_APIC (void)
+void __devinit setup_local_APIC(void)
{
unsigned long oldvalue, value, ver, maxlvt;
@@ -634,7 +677,7 @@ static struct sys_device device_lapic = {
.cls = &lapic_sysclass,
};
-static void __init apic_pm_activate(void)
+static void __devinit apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -665,26 +708,6 @@ static void apic_pm_activate(void) { }
* Original code written by Keir Fraser.
*/
-/*
- * Knob to control our willingness to enable the local APIC.
- */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
-
-static int __init lapic_disable(char *str)
-{
- enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
- return 0;
-}
-__setup("nolapic", lapic_disable);
-
-static int __init lapic_enable(char *str)
-{
- enable_local_apic = 1;
- return 0;
-}
-__setup("lapic", lapic_enable);
-
static int __init apic_set_verbosity(char *str)
{
if (strcmp("debug", str) == 0)
@@ -855,9 +878,8 @@ fake_ioapic_page:
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
*/
-static unsigned int __init get_8254_timer_count(void)
+static unsigned int __devinit get_8254_timer_count(void)
{
- extern spinlock_t i8253_lock;
unsigned long flags;
unsigned int count;
@@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void)
}
/* next tick in 8254 can be caught by catching timer wraparound */
-static void __init wait_8254_wraparound(void)
+static void __devinit wait_8254_wraparound(void)
{
unsigned int curr_count, prev_count;
@@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void)
* Default initialization for 8254 timers. If we use other timers like HPET,
* we override this later
*/
-void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
+void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
/*
* This function sets up the local APIC timer, with a timeout of
@@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void __init setup_APIC_timer(unsigned int clocks)
+static void __devinit setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
@@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void)
local_irq_enable();
}
-void __init setup_secondary_APIC_clock(void)
+void __devinit setup_secondary_APIC_clock(void)
{
setup_APIC_timer(calibration_result);
}
-void __init disable_APIC_timer(void)
+void __devinit disable_APIC_timer(void)
{
if (using_apic_timer) {
unsigned long v;
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 0ff65abcd56..064211d5f41 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -228,10 +228,10 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/i8253.h>
#include "io_ports.h"
-extern spinlock_t i8253_lock;
extern unsigned long get_cmos_time(void);
extern void machine_real_restart(unsigned char *, int);
@@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int);
struct apm_user {
int magic;
struct apm_user * next;
- int suser: 1;
- int writer: 1;
- int reader: 1;
- int suspend_wait: 1;
+ unsigned int suser: 1;
+ unsigned int writer: 1;
+ unsigned int reader: 1;
+ unsigned int suspend_wait: 1;
int suspend_result;
int suspends_pending;
int standbys_pending;
@@ -1168,8 +1168,7 @@ static void get_time_diff(void)
static void reinit_timer(void)
{
#ifdef INIT_TIMER_AFTER_SUSPEND
- unsigned long flags;
- extern spinlock_t i8253_lock;
+ unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
/* set the clock to 100 Hz */
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index b9954248d0a..4553ffd94b1 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
-static int cachesize_override __initdata = -1;
-static int disable_x86_fxsr __initdata = 0;
-static int disable_x86_serial_nr __initdata = 1;
+static int cachesize_override __devinitdata = -1;
+static int disable_x86_fxsr __devinitdata = 0;
+static int disable_x86_serial_nr __devinitdata = 1;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
@@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str)
}
__setup("cachesize=", cachesize_setup);
-int __init get_model_name(struct cpuinfo_x86 *c)
+int __devinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
@@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c)
}
-void __init display_cacheinfo(struct cpuinfo_x86 *c)
+void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ecx, edx, l2size;
@@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c)
/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
/* Look up CPU names by table lookup. */
-static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
@@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c)
}
-void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
{
char *v = c->x86_vendor_id;
int i;
@@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag)
/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
+static int __devinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -249,7 +249,7 @@ static void __init early_cpu_detect(void)
#endif
}
-void __init generic_identify(struct cpuinfo_x86 * c)
+void __devinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
int junk;
@@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c)
}
}
-static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
/* Disable processor serial number */
@@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __init identify_cpu(struct cpuinfo_x86 *c)
+void __devinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -432,10 +432,18 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
+ if (c == &boot_cpu_data)
+ sysenter_setup();
+ enable_sep_cpu();
+
+ if (c == &boot_cpu_data)
+ mtrr_bp_init();
+ else
+ mtrr_ap_init();
}
#ifdef CONFIG_X86_HT
-void __init detect_ht(struct cpuinfo_x86 *c)
+void __devinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
int index_msb, tmp;
@@ -490,7 +498,7 @@ void __init detect_ht(struct cpuinfo_x86 *c)
}
#endif
-void __init print_cpu_info(struct cpuinfo_x86 *c)
+void __devinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -513,7 +521,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c)
printk("\n");
}
-cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
/* This is hacky. :)
* We're emulating future behavior.
@@ -560,7 +568,7 @@ void __init early_cpu_init(void)
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-void __init cpu_init (void)
+void __devinit cpu_init(void)
{
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
@@ -648,3 +656,15 @@ void __init cpu_init (void)
clear_used_math();
mxcsr_feature_mask_init();
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __devinit cpu_uninit(void)
+{
+ int cpu = raw_smp_processor_id();
+ cpu_clear(cpu, cpu_initialized);
+
+ /* lazy TLB state */
+ per_cpu(cpu_tlbstate, cpu).state = 0;
+ per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 1a49adb1f4a..e86ea486c31 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -190,7 +190,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
/* detect which companion chip is used */
while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) {
- if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) {
+ if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) {
return gx_pci;
}
}
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 5c530064eb7..73a5dc5b26b 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
}
#endif
- if (powernow_table)
- kfree(powernow_table);
-
+ kfree(powernow_table);
return 0;
}
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 7dcbf70fc16..327a55d4d1c 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -375,7 +375,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
arg0.buffer.pointer = (u8 *) arg0_buf;
arg0_buf[0] = ACPI_PDC_REVISION_ID;
arg0_buf[1] = 1;
- arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
+ arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
p.pdc = &arg_list;
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 121aa2176e6..a2c33c1a46c 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -25,10 +25,10 @@ extern int trap_init_f00f_bug(void);
/*
* Alignment at which movsl is preferred for bulk memory copies.
*/
-struct movsl_mask movsl_mask;
+struct movsl_mask movsl_mask __read_mostly;
#endif
-void __init early_intel_workaround(struct cpuinfo_x86 *c)
+void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
{
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
@@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c)
* This is called before we do cpu ident work
*/
-int __init ppro_with_ram_bug(void)
+int __devinit ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void)
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
-static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
/*
* find out the number of processor cores on the die
*/
-static int __init num_cpu_cores(struct cpuinfo_x86 *c)
+static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax;
@@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-static void __init init_intel(struct cpuinfo_x86 *c)
+static void __devinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
char *p = NULL;
@@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
return size;
}
-static struct cpu_dev intel_cpu_dev __initdata = {
+static struct cpu_dev intel_cpu_dev __devinitdata = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
.c_models = {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index a710dc4eb20..1d768b26326 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -28,7 +28,7 @@ struct _cache_table
};
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
-static struct _cache_table cache_table[] __initdata =
+static struct _cache_table cache_table[] __devinitdata =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void)
return retval;
}
-unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
+unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 8df52e86c4d..c4abe765739 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
/* AMD K7 machine check is Intel like */
-void __init amd_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index bf6d1aefafc..2cf25d2ba0f 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
#include "mce.h"
-int mce_disabled __initdata = 0;
+int mce_disabled __devinitdata = 0;
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
-void __init mcheck_init(struct cpuinfo_x86 *c)
+void __devinit mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled==1)
return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8b16ceb929b..0abccb6fdf9 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal regulation detect and init */
-static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned int cpu = smp_processor_id();
@@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
-void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index c45a1b485c8..ec0614cd292 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 46640f8c249..f01b73f947e 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 753fa7acb98..7bae68fa168 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting on the Winchip C6 series */
-void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
+void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f468a979e9a..169ac8e0db6 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -67,14 +67,6 @@ void __init get_mtrr_state(void)
mtrr_state.enabled = (lo & 0xc00) >> 10;
}
-/* Free resources associated with a struct mtrr_state */
-void __init finalize_mtrr_state(void)
-{
- if (mtrr_state.var_ranges)
- kfree(mtrr_state.var_ranges);
- mtrr_state.var_ranges = NULL;
-}
-
/* Some BIOS's are fucked and don't set all MTRRs the same! */
void __init mtrr_state_warn(void)
{
@@ -335,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
*/
{
unsigned long flags;
+ struct mtrr_var_range *vr;
+
+ vr = &mtrr_state.var_ranges[reg];
local_irq_save(flags);
prepare_set();
@@ -343,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
/* The invalid bit is kept in the mask, so we simply clear the
relevant mask register to disable a range. */
mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+ memset(vr, 0, sizeof(struct mtrr_var_range));
} else {
- mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
- (base & size_and_mask) >> (32 - PAGE_SHIFT));
- mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
- (-size & size_and_mask) >> (32 - PAGE_SHIFT));
+ vr->base_lo = base << PAGE_SHIFT | type;
+ vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+ vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+ vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+ mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
+ mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
}
post_set();
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index d66b09e0c82..764cac64e21 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = -EINVAL;
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
/* Search for existing MTRR */
down(&main_lock);
for (i = 0; i < num_var_ranges; ++i) {
@@ -372,6 +374,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
out:
up(&main_lock);
+ unlock_cpu_hotplug();
return error;
}
@@ -461,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
return -ENXIO;
max = num_var_ranges;
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
down(&main_lock);
if (reg < 0) {
/* Search for existing MTRR */
@@ -501,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
error = reg;
out:
up(&main_lock);
+ unlock_cpu_hotplug();
return error;
}
/**
@@ -544,21 +550,9 @@ static void __init init_ifs(void)
centaur_init_mtrr();
}
-static void __init init_other_cpus(void)
-{
- if (use_intel())
- get_mtrr_state();
-
- /* bring up the other processors */
- set_mtrr(~0U,0,0,0);
-
- if (use_intel()) {
- finalize_mtrr_state();
- mtrr_state_warn();
- }
-}
-
-
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
struct mtrr_value {
mtrr_type ltype;
unsigned long lbase;
@@ -611,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = {
/**
- * mtrr_init - initialize mtrrs on the boot CPU
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
*
* This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
*
*/
-static int __init mtrr_init(void)
+void __init mtrr_bp_init(void)
{
init_ifs();
@@ -674,12 +668,48 @@ static int __init mtrr_init(void)
if (mtrr_if) {
set_num_var_ranges();
init_table();
- init_other_cpus();
-
- return sysdev_driver_register(&cpu_sysdev_class,
- &mtrr_sysdev_driver);
+ if (use_intel())
+ get_mtrr_state();
}
- return -ENXIO;
}
-subsys_initcall(mtrr_init);
+void mtrr_ap_init(void)
+{
+ unsigned long flags;
+
+ if (!mtrr_if || !use_intel())
+ return;
+ /*
+ * Ideally we should hold main_lock here to avoid mtrr entries changed,
+ * but this routine will be called in cpu boot time, holding the lock
+ * breaks it. This routine is called in two cases: 1.very earily time
+ * of software resume, when there absolutely isn't mtrr entry changes;
+ * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+ * prevent mtrr entry changes
+ */
+ local_irq_save(flags);
+
+ mtrr_if->set_all();
+
+ local_irq_restore(flags);
+}
+
+static int __init mtrr_init_finialize(void)
+{
+ if (!mtrr_if)
+ return 0;
+ if (use_intel())
+ mtrr_state_warn();
+ else {
+ /* The CPUs haven't MTRR and seemes not support SMP. They have
+ * specific drivers, we use a tricky method to support
+ * suspend/resume for them.
+ * TBD: is there any system with such CPU which supports
+ * suspend/resume? if no, we should remove the code.
+ */
+ sysdev_driver_register(&cpu_sysdev_class,
+ &mtrr_sysdev_driver);
+ }
+ return 0;
+}
+subsys_initcall(mtrr_init_finialize);
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index de135124559..99c9f268204 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if;
extern unsigned int num_var_ranges;
-void finalize_mtrr_state(void);
void mtrr_state_warn(void);
char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
new file mode 100644
index 00000000000..e5fab12f792
--- /dev/null
+++ b/arch/i386/kernel/crash.c
@@ -0,0 +1,223 @@
+/*
+ * Architecture specific (i386) functions for kexec based crash dumps.
+ *
+ * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *
+ * Copyright (C) IBM Corporation, 2004. All rights reserved.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include <asm/processor.h>
+#include <asm/hardirq.h>
+#include <asm/nmi.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <mach_ipi.h>
+
+
+note_buf_t crash_notes[NR_CPUS];
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, &note, sizeof(note));
+ buf += (sizeof(note) +3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, &note, sizeof(note));
+}
+
+static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
+{
+ struct elf_prstatus prstatus;
+ u32 *buf;
+
+ if ((cpu < 0) || (cpu >= NR_CPUS))
+ return;
+
+ /* Using ELF notes here is opportunistic.
+ * I need a well defined structure format
+ * for the data I pass, and I need tags
+ * on the data to indicate what information I have
+ * squirrelled away. ELF notes happen to provide
+ * all of that that no need to invent something new.
+ */
+ buf = &crash_notes[cpu][0];
+ memset(&prstatus, 0, sizeof(prstatus));
+ prstatus.pr_pid = current->pid;
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ final_note(buf);
+}
+
+static void crash_get_current_regs(struct pt_regs *regs)
+{
+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
+
+ regs->eip = (unsigned long)current_text_addr();
+}
+
+/* CPU does not save ss and esp on stack if execution is already
+ * running in kernel mode at the time of NMI occurrence. This code
+ * fixes it.
+ */
+static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ newregs->esp = (unsigned long)&(oldregs->esp);
+ __asm__ __volatile__("xorl %eax, %eax;");
+ __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss));
+}
+
+/* We may have saved_regs from where the error came from
+ * or it is NULL if via a direct panic().
+ */
+static void crash_save_self(struct pt_regs *saved_regs)
+{
+ struct pt_regs regs;
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (saved_regs)
+ crash_setup_regs(&regs, saved_regs);
+ else
+ crash_get_current_regs(&regs);
+ crash_save_this_cpu(&regs, cpu);
+}
+
+#ifdef CONFIG_SMP
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+{
+ struct pt_regs fixed_regs;
+
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if (cpu == crashing_cpu)
+ return 1;
+ local_irq_disable();
+
+ if (!user_mode(regs)) {
+ crash_setup_regs(&fixed_regs, regs);
+ regs = &fixed_regs;
+ }
+ crash_save_this_cpu(regs, cpu);
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ __asm__("hlt");
+ for(;;);
+
+ return 1;
+}
+
+/*
+ * By using the NMI code instead of a vector we just sneak thru the
+ * word generator coming out with just what we want. AND it does
+ * not matter if clustered_apic_mode is set or not.
+ */
+static void smp_send_nmi_allbutself(void)
+{
+ send_IPI_allbutself(APIC_DM_NMI);
+}
+
+static void nmi_shootdown_cpus(void)
+{
+ unsigned long msecs;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ set_nmi_callback(crash_nmi_callback);
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+ disable_local_APIC();
+}
+#else
+static void nmi_shootdown_cpus(void)
+{
+ /* There are no cpus to shootdown */
+}
+#endif
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* This function is only called after the system
+ * has paniced or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means shooting down the other cpus in
+ * an SMP system.
+ */
+ /* The kernel is broken so disable interrupts */
+ local_irq_disable();
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
+ nmi_shootdown_cpus();
+ lapic_shutdown();
+#if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+#endif
+ crash_save_self(regs);
+}
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index 6ed7e28f306..a3cdf894302 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -1,22 +1,15 @@
#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/acpi.h>
-#include <asm/io.h>
-#include <linux/pm.h>
-#include <asm/system.h>
#include <linux/dmi.h>
#include <linux/bootmem.h>
-struct dmi_header
-{
- u8 type;
- u8 length;
- u16 handle;
+struct dmi_header {
+ u8 type;
+ u8 length;
+ u16 handle;
};
#undef DMI_DEBUG
@@ -29,15 +22,13 @@ struct dmi_header
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
- u8 *bp=(u8 *)dm;
- bp+=dm->length;
- if(!s)
+ u8 *bp = ((u8 *) dm) + dm->length;
+
+ if (!s)
return "";
s--;
- while(s>0 && *bp)
- {
- bp+=strlen(bp);
- bp++;
+ while (s > 0 && *bp) {
+ bp += strlen(bp) + 1;
s--;
}
return bp;
@@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s)
* We have to be cautious here. We have seen BIOSes with DMI pointers
* pointing to completely the wrong place for example
*/
-
-static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
+static int __init dmi_table(u32 base, int len, int num,
+ void (*decode)(struct dmi_header *))
{
- u8 *buf;
- struct dmi_header *dm;
- u8 *data;
- int i=0;
+ u8 *buf, *data;
+ int i = 0;
buf = bt_ioremap(base, len);
- if(buf==NULL)
+ if (buf == NULL)
return -1;
data = buf;
@@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm
* Stop when we see all the items the table claimed to have
* OR we run off the end of the table (also happens)
*/
-
- while(i<num && data-buf+sizeof(struct dmi_header)<=len)
- {
- dm=(struct dmi_header *)data;
+ while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
+ struct dmi_header *dm = (struct dmi_header *)data;
/*
* We want to know the total length (formated area and strings)
* before decoding to make sure we won't run off the table in
* dmi_decode or dmi_string
*/
- data+=dm->length;
- while(data-buf<len-1 && (data[0] || data[1]))
+ data += dm->length;
+ while ((data - buf < len - 1) && (data[0] || data[1]))
data++;
- if(data-buf<len-1)
+ if (data - buf < len - 1)
decode(dm);
- data+=2;
+ data += 2;
i++;
}
bt_iounmap(buf, len);
return 0;
}
-
-inline static int __init dmi_checksum(u8 *buf)
+static int __init dmi_checksum(u8 *buf)
{
- u8 sum=0;
+ u8 sum = 0;
int a;
- for(a=0; a<15; a++)
- sum+=buf[a];
- return (sum==0);
+ for (a = 0; a < 15; a++)
+ sum += buf[a];
+
+ return sum == 0;
}
static int __init dmi_iterate(void (*decode)(struct dmi_header *))
@@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *))
p = ioremap(0xF0000, 0x10000);
if (p == NULL)
return -1;
+
for (q = p; q < p + 0x10000; q += 16) {
memcpy_fromio(buf, q, 15);
- if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
- {
- u16 num=buf[13]<<8|buf[12];
- u16 len=buf[7]<<8|buf[6];
- u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
+ if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+ u16 num = (buf[13] << 8) | buf[12];
+ u16 len = (buf[7] << 8) | buf[6];
+ u32 base = (buf[11] << 24) | (buf[10] << 16) |
+ (buf[9] << 8) | buf[8];
/*
* DMI version 0.0 means that the real version is taken from
* the SMBIOS version, which we don't know at this point.
*/
- if(buf[14]!=0)
+ if (buf[14] != 0)
printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14]>>4, buf[14]&0x0F);
+ buf[14] >> 4, buf[14] & 0xF);
else
printk(KERN_INFO "DMI present.\n");
+
dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
- base));
- if(dmi_table(base,len, num, decode)==0)
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+
+ if (dmi_table(base,len, num, decode) == 0)
return 0;
}
}
@@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX];
/*
* Save a DMI string
*/
-
static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
{
char *d = (char*)dm;
char *p = dmi_string(dm, d[string]);
- if(p==NULL || *p == 0)
+
+ if (p == NULL || *p == 0)
return;
if (dmi_ident[slot])
return;
- dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
+
+ dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
if(dmi_ident[slot])
strcpy(dmi_ident[slot], p);
else
@@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
}
/*
- * Ugly compatibility crap.
- */
-#define dmi_blacklist dmi_system_id
-#define NO_MATCH { DMI_NONE, NULL}
-#define MATCH DMI_MATCH
-
-/*
- * Toshiba keyboard likes to repeat keys when they are not repeated.
- */
-
-static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
-{
- printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
- return 0;
-}
-
-
-#ifdef CONFIG_ACPI_SLEEP
-static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
-{
- /* See acpi_wakeup.S */
- extern long acpi_video_flags;
- acpi_video_flags |= 2;
- return 0;
-}
-#endif
-
-
-#ifdef CONFIG_ACPI_BOOT
-extern int acpi_force;
-
-static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
- disable_acpi();
- } else {
- printk(KERN_NOTICE
- "Warning: DMI blacklist says broken, but acpi forced\n");
- }
- return 0;
-}
-
-/*
- * Limit ACPI to CPU enumeration for HT
- */
-static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
- disable_acpi();
- acpi_ht = 1;
- } else {
- printk(KERN_NOTICE
- "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
- }
- return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI_PCI
-static __init int disable_acpi_irq(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
- d->ident);
- acpi_noirq_set();
- }
- return 0;
-}
-static __init int disable_acpi_pci(struct dmi_blacklist *d)
-{
- if (!acpi_force) {
- printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
- d->ident);
- acpi_disable_pci();
- }
- return 0;
-}
-#endif
-
-/*
- * Process the DMI blacklists
- */
-
-
-/*
- * This will be expanded over time to force things like the APM
- * interrupt mask settings according to the laptop
- */
-
-static __initdata struct dmi_blacklist dmi_blacklist[]={
-
- { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#ifdef CONFIG_ACPI_SLEEP
- { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
- MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
- NO_MATCH, NO_MATCH, NO_MATCH
- } },
-#endif
-
-#ifdef CONFIG_ACPI_BOOT
- /*
- * If your system is blacklisted here, but you find that acpi=force
- * works for you, please contact acpi-devel@sourceforge.net
- */
-
- /*
- * Boxes that need ACPI disabled
- */
-
- { dmi_disable_acpi, "IBM Thinkpad", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "2629H1G"),
- NO_MATCH, NO_MATCH }},
-
- /*
- * Boxes that need acpi=ht
- */
-
- { force_acpi_ht, "FSC Primergy T850", {
- MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
- MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "DELL GX240", {
- MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
- MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "HP VISUALIZE NT Workstation", {
- MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "Compaq Workstation W8000", {
- MATCH(DMI_SYS_VENDOR, "Compaq"),
- MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P4B266", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P4B266"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS P2B-DS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "P2B-DS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ASUS CUR-DLS", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "CUR-DLS"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "ABIT i440BX-W83977", {
- MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
- MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM Bladecenter", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eServer xSeries 360", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 330", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
- NO_MATCH, NO_MATCH }},
-
- { force_acpi_ht, "IBM eserver xSeries 440", {
- MATCH(DMI_BOARD_VENDOR, "IBM"),
- MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
- NO_MATCH, NO_MATCH }},
-
-#endif // CONFIG_ACPI_BOOT
-
-#ifdef CONFIG_ACPI_PCI
- /*
- * Boxes that need ACPI PCI IRQ routing disabled
- */
-
- { disable_acpi_irq, "ASUS A7V", {
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
- MATCH(DMI_BOARD_NAME, "<A7V>"),
- /* newer BIOS, Revision 1011, does work */
- MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
- NO_MATCH }},
-
- /*
- * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
- */
- { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
- MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- MATCH(DMI_BOARD_NAME, "PR-DLS"),
- MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
- MATCH(DMI_BIOS_DATE, "03/21/2003") }},
-
- { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
- MATCH(DMI_SYS_VENDOR, "Acer"),
- MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
- NO_MATCH, NO_MATCH
- } },
-
-#endif
-
- { NULL, }
-};
-
-/*
* Process a DMI table entry. Right now all we care about are the BIOS
* and machine entries. For 2.5 we should pull the smbus controller info
* out of here.
*/
-
static void __init dmi_decode(struct dmi_header *dm)
{
-#ifdef DMI_DEBUG
- u8 *data = (u8 *)dm;
-#endif
+ u8 *data __attribute__((__unused__)) = (u8 *)dm;
- switch(dm->type)
- {
- case 0:
- dmi_printk(("BIOS Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
- dmi_printk(("BIOS Version: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
- dmi_printk(("BIOS Release: %s\n",
- dmi_string(dm, data[8])));
- dmi_save_ident(dm, DMI_BIOS_DATE, 8);
- break;
- case 1:
- dmi_printk(("System Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
- dmi_printk(("Product Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
- dmi_printk(("Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
- dmi_printk(("Serial Number: %s\n",
- dmi_string(dm, data[7])));
- break;
- case 2:
- dmi_printk(("Board Vendor: %s\n",
- dmi_string(dm, data[4])));
- dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
- dmi_printk(("Board Name: %s\n",
- dmi_string(dm, data[5])));
- dmi_save_ident(dm, DMI_BOARD_NAME, 5);
- dmi_printk(("Board Version: %s\n",
- dmi_string(dm, data[6])));
- dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
- break;
+ switch(dm->type) {
+ case 0:
+ dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
+ dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
+ dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
+ dmi_save_ident(dm, DMI_BIOS_DATE, 8);
+ break;
+ case 1:
+ dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
+ dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
+ dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
+ dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
+ dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
+ break;
+ case 2:
+ dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
+ dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BOARD_NAME, 5);
+ dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
+ break;
}
}
void __init dmi_scan_machine(void)
{
- int err = dmi_iterate(dmi_decode);
- if(err == 0)
- dmi_check_system(dmi_blacklist);
- else
+ if (dmi_iterate(dmi_decode))
printk(KERN_INFO "DMI not present.\n");
}
@@ -470,7 +226,6 @@ fail: d++;
return count;
}
-
EXPORT_SYMBOL(dmi_check_system);
/**
@@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system);
* Returns one DMI data value, can be used to perform
* complex DMI data checks.
*/
-char * dmi_get_system_info(int field)
+char *dmi_get_system_info(int field)
{
return dmi_ident[field];
}
-
+EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index f732f427b41..385883ea8c1 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -30,6 +30,7 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/efi.h>
+#include <linux/kexec.h>
#include <asm/setup.h>
#include <asm/io.h>
@@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->type == EFI_CONVENTIONAL_MEMORY) {
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index e966fc8c44c..4477bb10709 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP
movl %eax,%cr0
call check_x87
- incb ready
lgdt cpu_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
@@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
#ifdef CONFIG_SMP
- movb ready, %cl
- cmpb $1,%cl
+ movb ready, %cl
+ movb $1, ready
+ cmpb $0,%cl
je 1f # the first CPU calls start_kernel
# all other CPUs call initialize_secondary
call initialize_secondary
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 2c4813b47e5..178f4e9bac9 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
return 0;
}
+static int i8259A_shutdown(struct sys_device *dev)
+{
+ /* Put the i8259A into a quiescent state that
+ * the kernel initialization code can get it
+ * out of.
+ */
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ return 0;
+}
+
static struct sysdev_class i8259_sysdev_class = {
set_kset_name("i8259"),
.suspend = i8259A_suspend,
.resume = i8259A_resume,
+ .shutdown = i8259A_shutdown,
};
static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 08540bc4ba3..6578f40bd50 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -37,6 +37,7 @@
#include <asm/smp.h>
#include <asm/desc.h>
#include <asm/timer.h>
+#include <asm/i8259.h>
#include <mach_apic.h>
@@ -573,12 +574,14 @@ static int balanced_irq(void *unused)
for ( ; ; ) {
set_current_state(TASK_INTERRUPTIBLE);
time_remaining = schedule_timeout(time_remaining);
- try_to_freeze(PF_FREEZE);
+ try_to_freeze();
if (time_after(jiffies,
prev_balance_time+balanced_irq_interval)) {
+ preempt_disable();
do_irq_balance();
prev_balance_time = jiffies;
time_remaining = balanced_irq_interval;
+ preempt_enable();
}
}
return 0;
@@ -630,10 +633,8 @@ static int __init balanced_irq_init(void)
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
for (i = 0; i < NR_CPUS; i++) {
- if(irq_cpu_data[i].irq_delta)
- kfree(irq_cpu_data[i].irq_delta);
- if(irq_cpu_data[i].last_irq)
- kfree(irq_cpu_data[i].last_irq);
+ kfree(irq_cpu_data[i].irq_delta);
+ kfree(irq_cpu_data[i].last_irq);
}
return 0;
}
@@ -1566,7 +1567,6 @@ void print_all_local_APICs (void)
void /*__init*/ print_PIC(void)
{
- extern spinlock_t i8259A_lock;
unsigned int v;
unsigned long flags;
@@ -1634,12 +1634,43 @@ static void __init enable_IO_APIC(void)
*/
void disable_IO_APIC(void)
{
+ int pin;
/*
* Clear the IO-APIC before rebooting:
*/
clear_IO_APIC();
- disconnect_bsp_APIC();
+ /*
+ * If the i82559 is routed through an IOAPIC
+ * Put that IOAPIC in virtual wire mode
+ * so legacy interrups can be delivered.
+ */
+ pin = find_isa_irq_pin(0, mp_ExtINT);
+ if (pin != -1) {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+ entry.trigger = 0; /* Edge */
+ entry.irr = 0;
+ entry.polarity = 0; /* High */
+ entry.delivery_status = 0;
+ entry.dest_mode = 0; /* Physical */
+ entry.delivery_mode = 7; /* ExtInt */
+ entry.vector = 0;
+ entry.dest.physical.physical_dest = 0;
+
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ disconnect_bsp_APIC(pin != -1);
}
/*
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c..ce66dcc26d9 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -153,6 +156,11 @@ void irq_ctx_init(int cpu)
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}
+void irq_ctx_exit(int cpu)
+{
+ hardirq_ctx[cpu] = NULL;
+}
+
extern asmlinkage void __do_softirq(void);
asmlinkage void do_softirq(void)
@@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
+ for_each_cpu(j)
+ seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
@@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
@@ -240,16 +246,14 @@ skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- per_cpu(irq_stat,j).apic_timer_irqs);
+ for_each_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +263,45 @@ skip:
}
return 0;
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <mach_apic.h>
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 3762f6b35ab..a6d8c45961d 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->eip = (unsigned long)&p->ainsn.insn;
}
-struct task_struct *arch_get_kprobe_task(void *ptr)
-{
- return ((struct thread_info *) (((unsigned long) ptr) &
- (~(THREAD_SIZE -1))))->task;
-}
-
void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)&regs->esp;
- struct kretprobe_instance *ri;
- static void *orig_ret_addr;
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *) *sara;
- /*
- * Save the return address when the return probe hits
- * the first time, and use it to populate the (krprobe
- * instance)->ret_addr for subsequent return probes at
- * the same addrress since stack address would have
- * the kretprobe_trampoline by then.
- */
- if (((void*) *sara) != kretprobe_trampoline)
- orig_ret_addr = (void*) *sara;
-
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->stack_addr = sara;
- ri->ret_addr = orig_ret_addr;
- add_rp_inst(ri);
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
- } else {
- rp->nmissed++;
- }
-}
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
- struct kretprobe_instance *ri;
- while ((ri = get_rp_inst_tsk(tk)) != NULL) {
- *((unsigned long *)(ri->stack_addr)) =
- (unsigned long) ri->ret_addr;
- recycle_rp_inst(ri);
- }
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
}
/*
@@ -286,36 +261,59 @@ no_kprobe:
*/
int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct task_struct *tsk;
- struct kretprobe_instance *ri;
- struct hlist_head *head;
- struct hlist_node *node;
- unsigned long *sara = ((unsigned long *) &regs->esp) - 1;
-
- tsk = arch_get_kprobe_task(sara);
- head = kretprobe_inst_table_head(tsk);
-
- hlist_for_each_entry(ri, node, head, hlist) {
- if (ri->stack_addr == sara && ri->rp) {
- if (ri->rp->handler)
- ri->rp->handler(ri, regs);
- }
- }
- return 0;
-}
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
- unsigned long flags)
-{
- struct kretprobe_instance *ri;
- /* RA already popped */
- unsigned long *sara = ((unsigned long *)&regs->esp) - 1;
+ head = kretprobe_inst_table_head(current);
- while ((ri = get_rp_inst(sara))) {
- regs->eip = (unsigned long)ri->ret_addr;
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
}
- regs->eflags &= ~TF_MASK;
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->eip = orig_ret_address;
+
+ unlock_kprobes();
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we have handled unlocking
+ * and re-enabling preemption.
+ */
+ return 1;
}
/*
@@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
current_kprobe->post_handler(current_kprobe, regs, 0);
}
- if (current_kprobe->post_handler != trampoline_post_handler)
- resume_execution(current_kprobe, regs);
+ resume_execution(current_kprobe, regs);
regs->eflags |= kprobe_saved_eflags;
/*Restore back the original saved kprobes variables and continue. */
@@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
}
return 0;
}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
new file mode 100644
index 00000000000..52ed18d8b51
--- /dev/null
+++ b/arch/i386/kernel/machine_kexec.c
@@ -0,0 +1,226 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/cpufeature.h>
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("movl %%cr3,%0": "=r"(cr3));
+ return cr3;
+}
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#define LEVEL0_SIZE (1UL << 12UL)
+
+#ifndef CONFIG_X86_PAE
+#define LEVEL1_SIZE (1UL << 22UL)
+static u32 pgtable_level1[1024] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ pgtable_level2 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = address / LEVEL1_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level2);
+}
+
+#else
+#define LEVEL1_SIZE (1UL << 21UL)
+#define LEVEL2_SIZE (1UL << 30UL)
+static u64 pgtable_level1[512] PAGE_ALIGNED;
+static u64 pgtable_level2[512] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long level1_index, level2_index, level3_index;
+ u64 *pgtable_level3;
+
+ /* Find the current page table */
+ pgtable_level3 = __va(read_cr3());
+
+ /* Find the indexes of the physical address to identity map */
+ level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+ level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+ level3_index = address / LEVEL2_SIZE;
+
+ /* Identity map the page table entry */
+ pgtable_level1[level1_index] = address | L0_ATTR;
+ pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[level3_index],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(pgtable_level3);
+}
+#endif
+
+
+static void set_idt(void *newidt, __u16 limit)
+{
+ unsigned char curidt[6];
+
+ /* ia32 supports unaliged loads & stores */
+ (*(__u16 *)(curidt)) = limit;
+ (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
+
+ __asm__ __volatile__ (
+ "lidt %0\n"
+ : "=m" (curidt)
+ );
+};
+
+
+static void set_gdt(void *newgdt, __u16 limit)
+{
+ unsigned char curgdt[6];
+
+ /* ia32 supports unaligned loads & stores */
+ (*(__u16 *)(curgdt)) = limit;
+ (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
+
+ __asm__ __volatile__ (
+ "lgdt %0\n"
+ : "=m" (curgdt)
+ );
+};
+
+static void load_segments(void)
+{
+#define __STR(X) #X
+#define STR(X) __STR(X)
+
+ __asm__ __volatile__ (
+ "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"STR(__KERNEL_DS)",%eax\n"
+ "\tmovl %eax,%ds\n"
+ "\tmovl %eax,%es\n"
+ "\tmovl %eax,%fs\n"
+ "\tmovl %eax,%gs\n"
+ "\tmovl %eax,%ss\n"
+ );
+#undef STR
+#undef __STR
+}
+
+typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
+const extern unsigned char relocate_new_kernel[];
+extern void relocate_new_kernel_end(void);
+const extern unsigned int relocate_new_kernel_size;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ unsigned long page_list;
+ unsigned long reboot_code_buffer;
+
+ relocate_new_kernel_t rnk;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+ /* Compute some offsets */
+ reboot_code_buffer = page_to_pfn(image->control_code_page)
+ << PAGE_SHIFT;
+ page_list = image->head;
+
+ /* Set up an identity mapping for the reboot_code_buffer */
+ identity_map_page(reboot_code_buffer);
+
+ /* copy it out */
+ memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+ relocate_new_kernel_size);
+
+ /* The segment registers are funny things, they are
+ * automatically loaded from a table, in memory wherever you
+ * set them to a specific selector, but this table is never
+ * accessed again you set the segment to a different selector.
+ *
+ * The more common model is are caches where the behide
+ * the scenes work is done, but is also dropped at arbitrary
+ * times.
+ *
+ * I take advantage of this here by force loading the
+ * segments, before I zap the gdt with an invalid value.
+ */
+ load_segments();
+ /* The gdt & idt are now invalid.
+ * If you want to load them you must set up your own idt & gdt.
+ */
+ set_gdt(phys_to_virt(0),0);
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+ rnk = (relocate_new_kernel_t) reboot_code_buffer;
+ (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
+}
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 383a11600d2..af917f609c7 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -67,7 +67,6 @@ unsigned long mp_lapic_addr;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int boot_cpu_logical_apicid = -1U;
/* Internal processor count */
static unsigned int __initdata num_processors;
@@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = apicid;
}
if (num_processors >= NR_CPUS) {
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index aea2ce1145d..ba243a4cc11 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -55,6 +56,9 @@
#include <linux/irq.h>
#include <linux/err.h>
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
static int hlt_counter;
@@ -143,14 +147,42 @@ static void poll_idle (void)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* This must be done before dead CPU ack */
+ cpu_exit_clear();
+ wbinvd();
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+ while (1)
+ __asm__ __volatile__("hlt":::"memory");
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
-void cpu_idle (void)
+void cpu_idle(void)
{
+ int cpu = raw_smp_processor_id();
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
@@ -165,6 +197,9 @@ void cpu_idle (void)
if (!idle)
idle = default_idle;
+ if (cpu_is_offline(cpu))
+ play_dead();
+
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
@@ -223,7 +258,7 @@ static void mwait_idle(void)
}
}
-void __init select_idle_routine(const struct cpuinfo_x86 *c)
+void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_MWAIT)) {
printk("monitor/mwait feature present.\n");
@@ -582,6 +617,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
}
/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+ struct task_struct *next_p)
+{
+ struct thread_info *prev, *next;
+
+ /*
+ * gcc should eliminate the ->thread_info dereference if
+ * has_secure_computing returns 0 at compile time (SECCOMP=n).
+ */
+ prev = prev_p->thread_info;
+ next = next_p->thread_info;
+
+ if (has_secure_computing(prev) || has_secure_computing(next)) {
+ /* slow path here */
+ if (has_secure_computing(prev) &&
+ !has_secure_computing(next)) {
+ write_cr4(read_cr4() & ~X86_CR4_TSD);
+ } else if (!has_secure_computing(prev) &&
+ has_secure_computing(next))
+ write_cr4(read_cr4() | X86_CR4_TSD);
+ }
+}
+
+/*
* switch_to(x,yn) should switch tasks from x to y.
*
* We fsave/fwait so that an exception goes off at the right time
@@ -660,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
handle_io_bitmap(next, tss);
+ disable_tsc(prev_p, next_p);
+
return prev_p;
}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index db912209a8d..b3e58484996 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -26,7 +26,6 @@ static int reboot_mode;
static int reboot_thru_bios;
#ifdef CONFIG_SMP
-int reboot_smp = 0;
static int reboot_cpu = -1;
/* shamelessly grabbed from lib/vsprintf.c for readability */
#define is_digit(c) ((c) >= '0' && (c) <= '9')
@@ -49,7 +48,6 @@ static int __init reboot_setup(char *str)
break;
#ifdef CONFIG_SMP
case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
- reboot_smp = 1;
if (is_digit(*(str+1))) {
reboot_cpu = (int) (*(str+1) - '0');
if (is_digit(*(str+2)))
@@ -88,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
return 0;
}
-/*
- * Some machines require the "reboot=s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_reboot(struct dmi_system_id *d)
-{
-#ifdef CONFIG_SMP
- if (!reboot_smp) {
- reboot_smp = 1;
- printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
- }
-#endif
- return 0;
-}
-
-/*
- * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic.
- */
-static int __init set_smp_bios_reboot(struct dmi_system_id *d)
-{
- set_smp_reboot(d);
- set_bios_reboot(d);
- return 0;
-}
-
static struct dmi_system_id __initdata reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Dell 1300's */
- .callback = set_smp_bios_reboot,
+ .callback = set_bios_reboot,
.ident = "Dell PowerEdge 1300",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -301,41 +275,32 @@ void machine_real_restart(unsigned char *code, int length)
EXPORT_SYMBOL(machine_real_restart);
#endif
-void machine_restart(char * __unused)
+void machine_shutdown(void)
{
#ifdef CONFIG_SMP
- int cpuid;
-
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- if (reboot_smp) {
-
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !physid_isset(cpuid, phys_cpu_present_map))
- reboot_cpu = boot_cpu_physical_apicid;
-
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
+ int reboot_cpu_id;
+
+ /* The boot cpu is always logical cpu 0 */
+ reboot_cpu_id = 0;
+
+ /* See if there has been given a command line override */
+ if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
+ cpu_isset(reboot_cpu, cpu_online_map)) {
+ reboot_cpu_id = reboot_cpu;
}
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
+ /* Make certain the cpu I'm rebooting on is online */
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
+ reboot_cpu_id = smp_processor_id();
}
- /*
- * Stop all CPUs and turn off local APICs and the IO-APIC, so
- * other OSs see a clean IRQ state.
+
+ /* Make certain I only run on the appropriate processor */
+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
+
+ /* O.K. Now that I'm on the appropriate processor, stop
+ * all of the others, and disable their local APICs.
*/
+
smp_send_stop();
#endif /* CONFIG_SMP */
@@ -344,6 +309,11 @@ void machine_restart(char * __unused)
#ifdef CONFIG_X86_IO_APIC
disable_IO_APIC();
#endif
+}
+
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
if (!reboot_thru_bios) {
if (efi_enabled) {
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
new file mode 100644
index 00000000000..d312616effa
--- /dev/null
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -0,0 +1,120 @@
+/*
+ * relocate_kernel.S - put the kernel image in place to boot
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+
+ /*
+ * Must be relocatable PIC code callable as a C function, that once
+ * it starts can not use the previous processes stack.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+ movl 8(%esp), %ebp /* reboot_code_buffer */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+ /* zero out flags, and disable interrupts */
+ pushl $0
+ popfl
+
+ /* set a new stack at the bottom of our page... */
+ lea 4096(%ebp), %esp
+
+ /* store the parameters back on the stack */
+ pushl %edx /* store the start address */
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+ * 18 0 == Alignment check disabled
+ * 16 0 == Write protect disabled
+ * 3 0 == No task switch
+ * 2 0 == Don't do FP software emulation.
+ * 0 1 == Proctected mode enabled
+ */
+ movl %cr0, %eax
+ andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
+ orl $(1<<0), %eax
+ movl %eax, %cr0
+
+ /* clear cr4 if applicable */
+ testl %ecx, %ecx
+ jz 1f
+ /* Set cr4 to a known state:
+ * Setting everything to zero seems safe.
+ */
+ movl %cr4, %eax
+ andl $0, %eax
+ movl %eax, %cr4
+
+ jmp 1f
+1:
+
+ /* Flush the TLB (needed?) */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* Do the copies */
+ movl %ebx, %ecx
+ jmp 1f
+
+0: /* top, read another word from the indirection page */
+ movl (%ebx), %ecx
+ addl $4, %ebx
+1:
+ testl $0x1, %ecx /* is it a destination page */
+ jz 2f
+ movl %ecx, %edi
+ andl $0xfffff000, %edi
+ jmp 0b
+2:
+ testl $0x2, %ecx /* is it an indirection page */
+ jz 2f
+ movl %ecx, %ebx
+ andl $0xfffff000, %ebx
+ jmp 0b
+2:
+ testl $0x4, %ecx /* is it the done indicator */
+ jz 2f
+ jmp 3f
+2:
+ testl $0x8, %ecx /* is it the source indicator */
+ jz 0b /* Ignore it otherwise */
+ movl %ecx, %esi /* For every source page do a copy */
+ andl $0xfffff000, %esi
+
+ movl $1024, %ecx
+ rep ; movsl
+ jmp 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 30406fd0b64..7306353c520 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -43,7 +43,12 @@
#include <linux/init.h>
#include <linux/edd.h>
#include <linux/nodemask.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
#include <video/edid.h>
+
+#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -55,12 +60,15 @@
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+/* Forward Declaration. */
+void __init find_max_pfn(void);
+
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
address, and must not be in the .bss segment! */
unsigned long init_pg_tables_end __initdata = ~0UL;
-int disable_pse __initdata = 0;
+int disable_pse __devinitdata = 0;
/*
* Machine setup..
@@ -732,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p)
if (to != command_line)
to--;
if (!memcmp(from+7, "exactmap", 8)) {
+#ifdef CONFIG_CRASH_DUMP
+ /* If we are doing a crash dump, we
+ * still need to know the real mem
+ * size before original memory map is
+ * reset.
+ */
+ find_max_pfn();
+ saved_max_pfn = max_pfn;
+#endif
from += 8+7;
e820.nr_map = 0;
userdef = 1;
@@ -835,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p)
#endif /* CONFIG_X86_LOCAL_APIC */
#endif /* CONFIG_ACPI_BOOT */
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* enable local APIC */
+ else if (!memcmp(from, "lapic", 5))
+ lapic_enable();
+
+ /* disable local APIC */
+ else if (!memcmp(from, "nolapic", 6))
+ lapic_disable();
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_KEXEC
+ /* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel. By reserving this memory we guarantee
+ * that linux never set's it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+ else if (!memcmp(from, "crashkernel=", 12)) {
+ unsigned long size, base;
+ size = memparse(from+12, &from);
+ if (*from == '@') {
+ base = memparse(from+1, &from);
+ /* FIXME: Do I want a sanity check
+ * to validate the memory range?
+ */
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+#endif
+#ifdef CONFIG_CRASH_DUMP
+ /* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel.
+ */
+ else if (!memcmp(from, "elfcorehdr=", 11))
+ elfcorehdr_addr = memparse(from+11, &from);
+#endif
+
/*
* highmem=size forces highmem to be exactly 'size' bytes.
* This works even on boxes that have no highmem otherwise.
@@ -1113,8 +1168,8 @@ void __init setup_bootmem_allocator(void)
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+ reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1170,6 +1225,11 @@ void __init setup_bootmem_allocator(void)
}
}
#endif
+#ifdef CONFIG_KEXEC
+ if (crashk_res.start != crashk_res.end)
+ reserve_bootmem(crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
+#endif
}
/*
@@ -1223,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
}
}
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index b9b8f4e20fa..89ef7adc63a 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -608,10 +608,8 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
if (!user_mode(regs))
return 1;
- if (current->flags & PF_FREEZE) {
- refrigerator(0);
+ if (try_to_freeze())
goto no_signal;
- }
if (!oldset)
oldset = &current->blocked;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 68be7d0c723..cec4bde6716 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,7 @@
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/cpu.h>
#include <linux/module.h>
#include <asm/mtrr.h>
@@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
unsigned long flags;
local_irq_save(flags);
-
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
*/
@@ -346,21 +347,21 @@ out:
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
/*
* A couple of (to be removed) sanity checks:
*
- * - we do not send IPIs to not-yet booted CPUs.
* - current CPU must not be in mask
* - mask must exist :)
*/
BUG_ON(cpus_empty(cpumask));
-
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
BUG_ON(!mm);
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+ if (cpus_empty(cpumask))
+ return;
+
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
@@ -476,6 +477,7 @@ void flush_tlb_all(void)
*/
void smp_send_reschedule(int cpu)
{
+ WARN_ON(cpu_is_offline(cpu));
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
@@ -493,6 +495,16 @@ struct call_data_struct {
int wait;
};
+void lock_ipi_call_lock(void)
+{
+ spin_lock_irq(&call_lock);
+}
+
+void unlock_ipi_call_lock(void)
+{
+ spin_unlock_irq(&call_lock);
+}
+
static struct call_data_struct * call_data;
/*
@@ -516,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
*/
{
struct call_data_struct data;
- int cpus = num_online_cpus()-1;
+ int cpus;
- if (!cpus)
+ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
+ cpus = num_online_cpus() - 1;
+ if (!cpus) {
+ spin_unlock(&call_lock);
return 0;
+ }
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -531,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
if (wait)
atomic_set(&data.finished, 0);
- spin_lock(&call_lock);
call_data = &data;
mb();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index c20d96d5c15..8ac8e9fd561 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
#include <linux/smp_lock.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -56,20 +59,30 @@
#include <smpboot_hooks.h>
/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
+static int __devinitdata smp_b_stepping;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
#ifdef CONFIG_X86_HT
EXPORT_SYMBOL(smp_num_siblings);
#endif
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+
+/* Package ID of each logical CPU */
+int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(phys_proc_id);
-int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
+
+/* Core ID of each logical CPU */
+int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(cpu_core_id);
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
+
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_core_map);
+
/* bitmap of online cpus */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
@@ -77,11 +90,17 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask;
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
+ * should use IA64's algorithm
+ */
+static int __devinitdata tsc_sync_disabled;
+
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
-u8 x86_cpu_to_apicid[NR_CPUS] =
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = 0xff };
EXPORT_SYMBOL(x86_cpu_to_apicid);
@@ -96,13 +115,16 @@ static int trampoline_exec;
static void map_cpu_to_logical_apicid(void);
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-static unsigned long __init setup_trampoline(void)
+static unsigned long __devinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
@@ -132,7 +154,7 @@ void __init smp_alloc_memory(void)
* a given CPU
*/
-static void __init smp_store_cpu_info(int id)
+static void __devinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
@@ -326,7 +348,7 @@ extern void calibrate_delay(void);
static atomic_t init_deasserted;
-static void __init smp_callin(void)
+static void __devinit smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -411,16 +433,48 @@ static void __init smp_callin(void)
/*
* Synchronize the TSC with the BP
*/
- if (cpu_has_tsc && cpu_khz)
+ if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
}
static int cpucount;
+static inline void
+set_cpu_sibling_map(int cpu)
+{
+ int i;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (cpu_core_id[cpu] == cpu_core_id[i]) {
+ cpu_set(i, cpu_sibling_map[cpu]);
+ cpu_set(cpu, cpu_sibling_map[i]);
+ }
+ }
+ } else {
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (current_cpu_data.x86_num_cores > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ cpu_set(i, cpu_core_map[cpu]);
+ cpu_set(cpu, cpu_core_map[i]);
+ }
+ }
+ } else {
+ cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ }
+}
+
/*
* Activate a secondary processor.
*/
-static void __init start_secondary(void *unused)
+static void __devinit start_secondary(void *unused)
{
/*
* Dont put anything before smp_callin(), SMP
@@ -443,7 +497,23 @@ static void __init start_secondary(void *unused)
* the local TLBs too.
*/
local_flush_tlb();
+
+ /* This must be done before setting cpu_online_map */
+ set_cpu_sibling_map(raw_smp_processor_id());
+ wmb();
+
+ /*
+ * We need to hold call_lock, so there is no inconsistency
+ * between the time smp_call_function() determines number of
+ * IPI receipients, and the time when the determination is made
+ * for which cpus receive the IPI. Holding this
+ * lock helps us to not include this cpu in a currently in progress
+ * smp_call_function().
+ */
+ lock_ipi_call_lock();
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_ipi_call_lock();
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -458,7 +528,7 @@ static void __init start_secondary(void *unused)
* from the task structure
* This function must not return.
*/
-void __init initialize_secondary(void)
+void __devinit initialize_secondary(void)
{
/*
* We don't actually need to load the full TSS,
@@ -480,10 +550,10 @@ extern struct {
#ifdef CONFIG_NUMA
/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
EXPORT_SYMBOL(cpu_2_node);
/* set up a mapping between cpu and node. */
@@ -511,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu)
#endif /* CONFIG_NUMA */
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
static void map_cpu_to_logical_apicid(void)
{
@@ -572,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid)
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __init
+static int __devinit
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -618,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_NMI */
#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
+static int __devinit
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
@@ -753,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif /* WAKE_SECONDARY_VIA_INIT */
extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+ cpumask_t tmp_map;
+ int cpu;
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+ if (cpu >= NR_CPUS)
+ return -ENODEV;
+ return cpu;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
+static inline struct task_struct * alloc_idle_task(int cpu)
+{
+ struct task_struct *idle;
+
+ if ((idle = cpu_idle_tasks[cpu]) != NULL) {
+ /* initialize thread_struct. we really want to avoid destroy
+ * idle tread
+ */
+ idle->thread.esp = (unsigned long)(((struct pt_regs *)
+ (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
+ init_idle(idle, cpu);
+ return idle;
+ }
+ idle = fork_idle(cpu);
+
+ if (!IS_ERR(idle))
+ cpu_idle_tasks[cpu] = idle;
+ return idle;
+}
+#else
+#define alloc_idle_task(cpu) fork_idle(cpu)
+#endif
-static int __init do_boot_cpu(int apicid)
+static int __devinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -763,16 +868,17 @@ static int __init do_boot_cpu(int apicid)
{
struct task_struct *idle;
unsigned long boot_error;
- int timeout, cpu;
+ int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- cpu = ++cpucount;
+ ++cpucount;
+
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
*/
- idle = fork_idle(cpu);
+ idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary;
@@ -839,13 +945,16 @@ static int __init do_boot_cpu(int apicid)
inquire_remote_apic(apicid);
}
}
- x86_cpu_to_apicid[cpu] = apicid;
+
if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--;
+ } else {
+ x86_cpu_to_apicid[cpu] = apicid;
+ cpu_set(cpu, cpu_present_map);
}
/* mark "stuck" area as not stuck */
@@ -854,6 +963,75 @@ static int __init do_boot_cpu(int apicid)
return boot_error;
}
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_exit_clear(void)
+{
+ int cpu = raw_smp_processor_id();
+
+ idle_task_exit();
+
+ cpucount --;
+ cpu_uninit();
+ irq_ctx_exit(cpu);
+
+ cpu_clear(cpu, cpu_callout_map);
+ cpu_clear(cpu, cpu_callin_map);
+ cpu_clear(cpu, cpu_present_map);
+
+ cpu_clear(cpu, smp_commenced_mask);
+ unmap_cpu_to_logical_apicid(cpu);
+}
+
+struct warm_boot_cpu_info {
+ struct completion *complete;
+ int apicid;
+ int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+ struct warm_boot_cpu_info *info = p;
+ do_boot_cpu(info->apicid, info->cpu);
+ complete(info->complete);
+}
+
+int __devinit smp_prepare_cpu(int cpu)
+{
+ DECLARE_COMPLETION(done);
+ struct warm_boot_cpu_info info;
+ struct work_struct task;
+ int apicid, ret;
+
+ lock_cpu_hotplug();
+ apicid = x86_cpu_to_apicid[cpu];
+ if (apicid == BAD_APICID) {
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ info.complete = &done;
+ info.apicid = apicid;
+ info.cpu = cpu;
+ INIT_WORK(&task, do_warm_boot_cpu, &info);
+
+ tsc_sync_disabled = 1;
+
+ /* init low mem mapping */
+ memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+ flush_tlb_all();
+ schedule_work(&task);
+ wait_for_completion(&done);
+
+ tsc_sync_disabled = 0;
+ zap_low_mappings();
+ ret = 0;
+exit:
+ unlock_cpu_hotplug();
+ return ret;
+}
+#endif
+
static void smp_tune_scheduling (void)
{
unsigned long cachesize; /* kB */
@@ -895,13 +1073,6 @@ void *xquad_portio;
EXPORT_SYMBOL(xquad_portio);
#endif
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-#ifdef CONFIG_X86_HT
-EXPORT_SYMBOL(cpu_sibling_map);
-#endif
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
-
static void __init smp_boot_cpus(unsigned int max_cpus)
{
int apicid, cpu, bit, kicked;
@@ -1013,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (max_cpus <= cpucount+1)
continue;
- if (do_boot_cpu(apicid))
+ if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
else
@@ -1065,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpus_clear(cpu_core_map[cpu]);
}
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- struct cpuinfo_x86 *c = cpu_data + cpu;
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings) {
- printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
- smp_num_siblings = siblings;
- }
-
- if (c->x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- }
- }
- } else {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- }
- }
+ cpu_set(0, cpu_sibling_map[0]);
+ cpu_set(0, cpu_core_map[0]);
smpboot_setup_io_apic();
@@ -1119,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ smp_commenced_mask = cpumask_of_cpu(0);
+ cpu_callin_map = cpumask_of_cpu(0);
+ mb();
smp_boot_cpus(max_cpus);
}
@@ -1126,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
+ cpu_set(smp_processor_id(), cpu_present_map);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static void
+remove_siblinginfo(int cpu)
{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
- return -ENOSYS;
+ int sibling;
+
+ for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
+ cpu_clear(cpu, cpu_sibling_map[sibling]);
+ for_each_cpu_mask(sibling, cpu_core_map[cpu])
+ cpu_clear(cpu, cpu_core_map[sibling]);
+ cpus_clear(cpu_sibling_map[cpu]);
+ cpus_clear(cpu_core_map[cpu]);
+ phys_proc_id[cpu] = BAD_APICID;
+ cpu_core_id[cpu] = BAD_APICID;
+}
+
+int __cpu_disable(void)
+{
+ cpumask_t map = cpu_online_map;
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ /* We enable the timer again on the exit path of the death loop */
+ disable_APIC_timer();
+ /* Allow any queued timer interrupts to get serviced */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ remove_siblinginfo(cpu);
+
+ cpu_clear(cpu, map);
+ fixup_irqs(map);
+ /* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpu_online_map);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We don't do anything here: idle task is faking death itself. */
+ unsigned int i;
+
+ for (i = 0; i < 10; i++) {
+ /* They ack this in play_dead by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+ printk ("CPU %d is now offline\n", cpu);
+ return;
+ }
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
}
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We said "no" in __cpu_disable */
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+int __devinit __cpu_up(unsigned int cpu)
+{
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
+ printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
local_irq_enable();
return -EIO;
}
local_irq_enable();
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
@@ -1156,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
zap_low_mappings();
+#ifndef CONFIG_HOTPLUG_CPU
/*
* Disable executability of the SMP trampoline:
*/
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
}
void __init smp_intr_init(void)
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index d408afaf649..468500a7e89 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -283,9 +283,14 @@ ENTRY(sys_call_table)
.long sys_mq_timedreceive /* 280 */
.long sys_mq_notify
.long sys_mq_getsetattr
- .long sys_ni_syscall /* reserved for kexec */
+ .long sys_kexec_load
.long sys_waitid
.long sys_ni_syscall /* 285 */ /* available */
.long sys_add_key
.long sys_request_key
.long sys_keyctl
+ .long sys_ioprio_set
+ .long sys_ioprio_get /* 290 */
+ .long sys_inotify_init
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 960d8bd137d..0bada1870bd 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -21,11 +21,16 @@
extern asmlinkage void sysenter_entry(void);
-void enable_sep_cpu(void *info)
+void enable_sep_cpu(void)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
+
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
@@ -41,7 +46,7 @@ void enable_sep_cpu(void *info)
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static int __init sysenter_setup(void)
+int __init sysenter_setup(void)
{
void *page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -58,8 +63,5 @@ static int __init sysenter_setup(void)
&vsyscall_sysenter_start,
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
- on_each_cpu(enable_sep_cpu, NULL, 1, 1);
return 0;
}
-
-__initcall(sysenter_setup);
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index e68d9fdb075..0ee9dee8af0 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -68,7 +68,8 @@
#include "io_ports.h"
-extern spinlock_t i8259A_lock;
+#include <asm/i8259.h>
+
int pit_latch_buggy; /* extern */
#include "do_timer.h"
@@ -85,10 +86,12 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
+#include <asm/i8253.h>
+
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
-struct timer_opts *cur_timer = &timer_none;
+struct timer_opts *cur_timer __read_mostly = &timer_none;
/*
* This is a special lock that is owned by the CPU and holds the index
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 10a0cbb88e7..658c0629ba6 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a)
* comparator value and continue. Next tick can be caught by checking
* for a change in the comparator value. Used in apic.c.
*/
-static void __init wait_hpet_tick(void)
+static void __devinit wait_hpet_tick(void)
{
unsigned int start_cmp_val, end_cmp_val;
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
index 37353bd3180..8163fe0cf1f 100644
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -86,7 +86,7 @@ bad_ctc:
#define CALIBRATE_CNT_HPET (5 * hpet_tick)
#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
+unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
{
unsigned long tsc_startlow, tsc_starthigh;
unsigned long tsc_endlow, tsc_endhigh;
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index f6f1206a11b..13892a65c94 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -17,9 +17,9 @@
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
-#include "io_ports.h"
+#include <asm/i8253.h>
-extern spinlock_t i8253_lock;
+#include "io_ports.h"
/* Number of usecs that the last interrupt was delayed */
static int delay_at_last_interrupt;
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index d766e0963ac..ef8dac5dd33 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,7 +18,7 @@
#include "mach_timer.h"
#include <asm/hpet.h>
-static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */
+static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */
static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
static unsigned long hpet_last; /* hpet counter value at last tick*/
static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
@@ -180,7 +180,7 @@ static int __init init_hpet(char* override)
/************************************************************/
/* tsc timer_opts struct */
-static struct timer_opts timer_hpet = {
+static struct timer_opts timer_hpet __read_mostly = {
.name = "hpet",
.mark_offset = mark_offset_hpet,
.get_offset = get_offset_hpet,
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 967d5453cd0..06de036a820 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -15,9 +15,8 @@
#include <asm/smp.h>
#include <asm/io.h>
#include <asm/arch_hooks.h>
+#include <asm/i8253.h>
-extern spinlock_t i8259A_lock;
-extern spinlock_t i8253_lock;
#include "do_timer.h"
#include "io_ports.h"
@@ -166,7 +165,6 @@ struct init_timer_opts __initdata timer_pit_init = {
void setup_pit_timer(void)
{
- extern spinlock_t i8253_lock;
unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 54c36b18202..8f4e4d5bc56 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -24,6 +24,7 @@
#include "mach_timer.h"
#include <asm/hpet.h>
+#include <asm/i8253.h>
#ifdef CONFIG_HPET_TIMER
static unsigned long hpet_usec_quotient;
@@ -33,9 +34,7 @@ static struct timer_opts timer_tsc;
static inline void cpufreq_delayed_get(void);
-int tsc_disable __initdata = 0;
-
-extern spinlock_t i8253_lock;
+int tsc_disable __devinitdata = 0;
static int use_tsc;
/* Number of usecs that the last interrupt was delayed */
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e4d4e2162c7..a61f33d06ea 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -27,6 +27,7 @@
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/kprobes.h>
+#include <linux/kexec.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -234,22 +235,22 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (in_kernel) {
- u8 *eip;
+ u8 __user *eip;
printk("\nStack: ");
show_stack(NULL, (unsigned long*)esp);
printk("Code: ");
- eip = (u8 *)regs->eip - 43;
+ eip = (u8 __user *)regs->eip - 43;
for (i = 0; i < 64; i++, eip++) {
unsigned char c;
- if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
printk(" Bad EIP value.");
break;
}
- if (eip == (u8 *)regs->eip)
+ if (eip == (u8 __user *)regs->eip)
printk("<%02x> ", c);
else
printk("%02x ", c);
@@ -273,13 +274,13 @@ static void handle_BUG(struct pt_regs *regs)
if (eip < PAGE_OFFSET)
goto no_bug;
- if (__get_user(ud2, (unsigned short *)eip))
+ if (__get_user(ud2, (unsigned short __user *)eip))
goto no_bug;
if (ud2 != 0x0b0f)
goto no_bug;
- if (__get_user(line, (unsigned short *)(eip + 2)))
+ if (__get_user(line, (unsigned short __user *)(eip + 2)))
goto bug;
- if (__get_user(file, (char **)(eip + 4)) ||
+ if (__get_user(file, (char * __user *)(eip + 4)) ||
(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
file = "<bad filename>";
@@ -294,6 +295,9 @@ bug:
printk("Kernel BUG\n");
}
+/* This is gone through when something in the kernel
+ * has done something bad and is about to be terminated.
+*/
void die(const char * str, struct pt_regs * regs, long err)
{
static struct {
@@ -341,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(0);
die.lock_owner = -1;
spin_unlock_irq(&die.lock);
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
if (in_interrupt())
panic("Fatal exception in interrupt");
@@ -361,6 +369,10 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
static void do_trap(int trapnr, int signr, char *str, int vm86,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
+ struct task_struct *tsk = current;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
if (regs->eflags & VM_MASK) {
if (vm86)
goto vm86_trap;
@@ -371,9 +383,6 @@ static void do_trap(int trapnr, int signr, char *str, int vm86,
goto kernel_trap;
trap_signal: {
- struct task_struct *tsk = current;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
if (info)
force_sig_info(signr, info, tsk);
else
@@ -486,6 +495,9 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
}
put_cpu();
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
+
if (regs->eflags & VM_MASK)
goto gp_in_vm86;
@@ -570,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg)
console_silent();
spin_unlock(&nmi_print_lock);
bust_spinlocks(0);
+
+ /* If we are in kernel we are probably nested up pretty bad
+ * and might aswell get out now while we still can.
+ */
+ if (!user_mode(regs)) {
+ current->thread.trap_no = 2;
+ crash_kexec(regs);
+ }
+
do_exit(SIGSEGV);
}
@@ -625,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
nmi_enter();
cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (!cpu_online(cpu)) {
+ nmi_exit();
+ return;
+ }
+#endif
+
++nmi_count(cpu);
if (!nmi_callback(regs, cpu))
@@ -872,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
error_code);
return;
}
- die_if_kernel("cache flush denied", regs, error_code);
current->thread.trap_no = 19;
current->thread.error_code = error_code;
+ die_if_kernel("cache flush denied", regs, error_code);
force_sig(SIGSEGV, current);
}
}
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index e0512cc8bea..761972f8cb6 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -2,20 +2,23 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
*/
+#define LOAD_OFFSET __PAGE_OFFSET
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/page.h>
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
-ENTRY(startup_32)
+ENTRY(phys_startup_32)
jiffies = jiffies_64;
SECTIONS
{
- . = __PAGE_OFFSET + 0x100000;
+ . = __KERNEL_START;
+ phys_startup_32 = startup_32 - LOAD_OFFSET;
/* read-only */
_text = .; /* Text and read-only data */
- .text : {
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
*(.text)
SCHED_TEXT
LOCK_TEXT
@@ -27,49 +30,58 @@ SECTIONS
. = ALIGN(16); /* Exception table */
__start___ex_table = .;
- __ex_table : { *(__ex_table) }
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
__stop___ex_table = .;
RODATA
/* writeable */
- .data : { /* Data */
+ .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
CONSTRUCTORS
}
. = ALIGN(4096);
__nosave_begin = .;
- .data_nosave : { *(.data.nosave) }
+ .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
. = ALIGN(4096);
__nosave_end = .;
. = ALIGN(4096);
- .data.page_aligned : { *(.data.idt) }
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+ *(.data.idt)
+ }
. = ALIGN(32);
- .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+ *(.data.cacheline_aligned)
+ }
+ /* rarely changed data like cpu maps */
+ . = ALIGN(32);
+ .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
. = ALIGN(THREAD_SIZE); /* init_task */
- .data.init_task : { *(.data.init_task) }
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+ *(.data.init_task)
+ }
/* will be freed after init */
. = ALIGN(4096); /* Init code and data */
__init_begin = .;
- .init.text : {
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
*(.init.text)
_einittext = .;
}
- .init.data : { *(.init.data) }
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
. = ALIGN(16);
__setup_start = .;
- .init.setup : { *(.init.setup) }
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
__setup_end = .;
__initcall_start = .;
- .initcall.init : {
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
*(.initcall1.init)
*(.initcall2.init)
*(.initcall3.init)
@@ -80,33 +92,41 @@ SECTIONS
}
__initcall_end = .;
__con_initcall_start = .;
- .con_initcall.init : { *(.con_initcall.init) }
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+ *(.con_initcall.init)
+ }
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(4);
__alt_instructions = .;
- .altinstructions : { *(.altinstructions) }
+ .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+ *(.altinstructions)
+ }
__alt_instructions_end = .;
- .altinstr_replacement : { *(.altinstr_replacement) }
+ .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+ *(.altinstr_replacement)
+ }
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
- .exit.text : { *(.exit.text) }
- .exit.data : { *(.exit.data) }
+ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
+ .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
. = ALIGN(4096);
__initramfs_start = .;
- .init.ramfs : { *(.init.ramfs) }
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
__initramfs_end = .;
. = ALIGN(32);
__per_cpu_start = .;
- .data.percpu : { *(.data.percpu) }
+ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
. = ALIGN(4096);
__init_end = .;
/* freed after init ends here */
__bss_start = .; /* BSS */
- .bss : {
+ .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
*(.bss.page_aligned)
+ }
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
*(.bss)
}
. = ALIGN(4);