summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c57
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/acpi/sleep.c32
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/apic_32.c29
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd_64.c11
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c12
-rw-r--r--arch/x86/kernel/cpu/common_64.c17
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_64.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpuid.c27
-rw-r--r--arch/x86/kernel/e820.c161
-rw-r--r--arch/x86/kernel/early-quirks.c26
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/entry_32.S73
-rw-r--r--arch/x86/kernel/entry_64.S139
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c75
-rw-r--r--arch/x86/kernel/hpet.c20
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/io_apic_32.c42
-rw-r--r--arch/x86/kernel/io_apic_64.c61
-rw-r--r--arch/x86/kernel/irqinit_64.c4
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode.c6
-rw-r--r--arch/x86/kernel/mpparse.c20
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/nmi.c9
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c8
-rw-r--r--arch/x86/kernel/process.c30
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c73
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/smp.c158
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/smpcommon.c56
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/time_32.c3
-rw-r--r--arch/x86/kernel/time_64.c18
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps_32.c177
-rw-r--r--arch/x86/kernel/traps_64.c512
-rw-r--r--arch/x86/kernel/tsc.c535
-rw-r--r--arch/x86/kernel/tsc_32.c455
-rw-r--r--arch/x86/kernel/tsc_64.c357
-rw-r--r--arch/x86/kernel/visws_quirks.c709
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmiclock_32.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S10
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
67 files changed, 2581 insertions, 1682 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 54829e2b516..da140611bb5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
+ifdef CONFIG_FTRACE
+# Do not profile debug utilities
+CFLAGS_REMOVE_tsc.o = -pg
+CFLAGS_REMOVE_rtc.o = -pg
+endif
+
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@@ -13,12 +19,13 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
-CFLAGS_tsc_64.o := $(nostackp)
+CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps_$(BITS).o irq_$(BITS).o
obj-y += time_$(BITS).o ioport.o ldt.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
+obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
@@ -26,7 +33,7 @@ obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o
-obj-y += tsc_$(BITS).o io_delay.o rtc.o
+obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5c0107602b6..f489d7a9be9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,6 +37,7 @@
#include <asm/pgtable.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
+#include <asm/genapic.h>
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
@@ -83,8 +84,6 @@ int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
-static int disable_irq0_through_ioapic __initdata;
-
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
int acpi_skip_timer_override __initdata;
@@ -108,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
*/
enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
-#ifdef CONFIG_X86_64
-
-/* rely on all ACPI tables being in the direct mapping */
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
- if (!phys_addr || !size)
- return NULL;
-
- if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
- return __va(phys_addr);
-
- return NULL;
-}
-
-#else
/*
* Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -141,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
unsigned long base, offset, mapped_size;
int idx;
- if (phys + size < 8 * 1024 * 1024)
+ if (!phys || !size)
+ return NULL;
+
+ if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
return __va(phys);
offset = phys & (PAGE_SIZE - 1);
mapped_size = PAGE_SIZE - offset;
+ clear_fixmap(FIX_ACPI_END);
set_fixmap(FIX_ACPI_END, phys);
base = fix_to_virt(FIX_ACPI_END);
@@ -157,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
if (--idx < FIX_ACPI_BEGIN)
return NULL; /* cannot handle this */
phys += PAGE_SIZE;
+ clear_fixmap(idx);
set_fixmap(idx, phys);
mapped_size += PAGE_SIZE;
}
return ((unsigned char *)base + offset);
}
-#endif
#ifdef CONFIG_PCI_MMCONFIG
/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
@@ -992,10 +980,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
int pin;
struct mp_config_intsrc mp_irq;
- /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
- if (bus_irq == 0 && disable_irq0_through_ioapic)
- return;
-
/*
* Convert 'gsi' to 'ioapic.pin'.
*/
@@ -1062,10 +1046,6 @@ void __init mp_config_acpi_legacy_irqs(void)
for (i = 0; i < 16; i++) {
int idx;
- /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
- if (i == 0 && disable_irq0_through_ioapic)
- continue;
-
for (idx = 0; idx < mp_irq_entries; idx++) {
struct mp_config_intsrc *irq = mp_irqs + idx;
@@ -1373,8 +1353,6 @@ static void __init acpi_process_madt(void)
return;
}
-#ifdef __i386__
-
static int __init disable_acpi_irq(const struct dmi_system_id *d)
{
if (!acpi_force) {
@@ -1425,13 +1403,12 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
}
/*
- * Don't register any I/O APIC entries for the 8254 timer IRQ.
+ * Force ignoring BIOS IRQ0 pin2 override
*/
-static int __init
-dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
+static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
- pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident);
- disable_irq0_through_ioapic = 1;
+ pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
+ acpi_skip_timer_override = 1;
return 0;
}
@@ -1609,11 +1586,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
- * is not connected at all. Abandon any attempts to route
- * IRQ 0 through the I/O APIC therefore.
+ * is not connected at all. Force ignoring BIOS IRQ0 pin2
+ * override in that cases.
*/
{
- .callback = dmi_disable_irq0_through_ioapic,
+ .callback = dmi_ignore_irq0_timer_override,
.ident = "HP NX6125 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1621,7 +1598,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
},
},
{
- .callback = dmi_disable_irq0_through_ioapic,
+ .callback = dmi_ignore_irq0_timer_override,
.ident = "HP NX6325 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1631,8 +1608,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
{}
};
-#endif /* __i386__ */
-
/*
* acpi_boot_table_init() and acpi_boot_init()
* called from setup_arch(), always.
@@ -1660,9 +1635,7 @@ int __init acpi_boot_table_init(void)
{
int error;
-#ifdef __i386__
dmi_check_system(acpi_dmi_table);
-#endif
/*
* If acpi_disabled, bail out
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index de2d2e4ebad..7c074eec39f 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_ACPI))
buf[2] |= ACPI_PDC_T_FFH;
+ /*
+ * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+ */
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
+ buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+
obj->type = ACPI_TYPE_BUFFER;
obj->buffer.length = 12;
obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b564cca..868de3d5c39 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
static char temp_stack[10240];
#endif
+/* XXX: this macro should move to asm-x86/segment.h and be shared with the
+ boot code... */
+#define GDT_ENTRY(flags, base, limit) \
+ (((u64)(base & 0xff000000) << 32) | \
+ ((u64)flags << 40) | \
+ ((u64)(limit & 0x00ff0000) << 32) | \
+ ((u64)(base & 0x00ffffff) << 16) | \
+ ((u64)(limit & 0x0000ffff)))
+
/**
* acpi_save_state_mem - save kernel state
*
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
header->video_mode = saved_video_mode;
header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
+
+ /*
+ * Set up the wakeup GDT. We set these up as Big Real Mode,
+ * that is, with limits set to 4 GB. At least the Lenovo
+ * Thinkpad X61 is known to need this for the video BIOS
+ * initialization quirk to work; this is likely to also
+ * be the case for other laptops or integrated video devices.
+ */
+
/* GDT[0]: GDT self-pointer */
header->wakeup_gdt[0] =
(u64)(sizeof(header->wakeup_gdt) - 1) +
((u64)(acpi_wakeup_address +
((char *)&header->wakeup_gdt - (char *)acpi_realmode))
<< 16);
- /* GDT[1]: real-mode-like code segment */
- header->wakeup_gdt[1] = (0x009bULL << 40) +
- ((u64)acpi_wakeup_address << 16) + 0xffff;
- /* GDT[2]: real-mode-like data segment */
- header->wakeup_gdt[2] = (0x0093ULL << 40) +
- ((u64)acpi_wakeup_address << 16) + 0xffff;
+ /* GDT[1]: big real mode-like code segment */
+ header->wakeup_gdt[1] =
+ GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
+ /* GDT[2]: big real mode-like data segment */
+ header->wakeup_gdt[2] =
+ GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
acpi_realmode_flags |= 2;
if (strncmp(str, "s3_beep", 7) == 0)
acpi_realmode_flags |= 4;
+ if (strncmp(str, "old_ordering", 12) == 0)
+ acpi_old_suspend_ordering();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90d..2763cb37b55 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/kprobes.h>
#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
-static inline const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
{ -1, NULL }
};
-static const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
const unsigned char *const *noptable = intel_nops;
int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
+static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(smp->locks, smp->locks_end,
smp->text, smp->text_end);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
if (smp_alt_once || noreplace_smp)
return;
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
return;
}
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
/*
* Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
mod->text, mod->text_end);
}
smp_mode = smp;
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 6dea8306d8c..a437d027f20 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -82,6 +82,11 @@ int pic_mode;
/* Have we found an MP table */
int smp_found_config;
+static struct resource lapic_resource = {
+ .name = "Local APIC",
+ .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
static unsigned int calibration_result;
static int lapic_next_event(unsigned long delta,
@@ -969,7 +974,7 @@ void __cpuinit setup_local_APIC(void)
* Double-check whether this APIC is really registered.
*/
if (!apic_id_registered())
- BUG();
+ WARN_ON_ONCE(1);
/*
* Intel recommends to set DFR, LDR and TPR before enabling
@@ -1335,6 +1340,10 @@ void __init smp_intr_init(void)
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+ /* IPI for single call function */
+ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+ call_function_single_interrupt);
}
#endif
@@ -1720,3 +1729,21 @@ static int __init apic_set_verbosity(char *str)
}
__setup("apic=", apic_set_verbosity);
+static int __init lapic_insert_resource(void)
+{
+ if (!apic_phys)
+ return -1;
+
+ /* Put local APIC into the resource map. */
+ lapic_resource.start = apic_phys;
+ lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+ insert_resource(&iomem_resource, &lapic_resource);
+
+ return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 00e6d137095..bf9b441331e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,6 +204,7 @@
#include <linux/module.h>
#include <linux/poll.h>
+#include <linux/smp_lock.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/timer.h>
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
- device_power_up();
+ device_power_up(PMSG_RESUME);
local_irq_enable();
- device_resume();
+ device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
for (as = user_list; as != NULL; as = as->next) {
@@ -1239,7 +1240,7 @@ static void standby(void)
apm_error("standby", err);
local_irq_disable();
- device_power_up();
+ device_power_up(PMSG_RESUME);
local_irq_enable();
}
@@ -1325,7 +1326,7 @@ static void check_events(void)
ignore_bounce = 1;
if ((event != APM_NORMAL_RESUME)
|| (ignore_normal_resume == 0)) {
- device_resume();
+ device_resume(PMSG_RESUME);
queue_event(event, NULL);
}
ignore_normal_resume = 0;
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp)
{
struct apm_user *as;
+ lock_kernel();
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
sizeof(*as));
+ unlock_kernel();
return -ENOMEM;
}
as->magic = APM_BIOS_MAGIC;
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp)
user_list = as;
spin_unlock(&user_list_lock);
filp->private_data = as;
+ unlock_kernel();
return 0;
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 3295e7c08fe..bacf5deeec2 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -34,7 +34,7 @@ int main(void)
ENTRY(pid);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
+#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
ENTRY(flags);
ENTRY(addr_limit);
ENTRY(preempt_count);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 958526d6a74..7c36fb8a28d 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,15 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* Don't do it for gbpages because there seems very little
* benefit in doing so.
*/
- if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
- (tseg >> PMD_SHIFT) <
- (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ if ((tseg>>PMD_SHIFT) <
+ (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+ ((tseg>>PMD_SHIFT) <
+ (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+ (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
set_memory_4k((unsigned long)__va(tseg), 1);
+ }
}
}
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 13526fd5cce..1d181c40e2e 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -10,20 +10,12 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x6 && c->x86_model >= 0xf)
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
}
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
- /* Cache sizes */
- unsigned n;
-
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 75185023529..7b8cc72feb4 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
- unsigned int n, dummy, eax, ebx, ecx, edx;
+ unsigned int n, dummy, ebx, ecx, edx;
n = c->extended_cpuid_level;
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
c->x86_cache_size, ecx & 0xFF);
}
- if (n >= 0x80000008) {
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ if (c->extended_cpuid_level >= 0x80000008) {
+ u32 eax = cpuid_eax(0x80000008);
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+ }
+
+ /* Assume all 64-bit CPUs support 32-bit syscall */
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
+
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
cpu_devs[c->x86_vendor]->c_early_init)
cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d3..70609efdf1d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts)
ds_init_intel(c);
+
+#ifdef CONFIG_X86_NUMAQ
+ numaq_tsc_disable();
+#endif
}
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index fcb1cc9d75c..1019c58d39f 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
}
/*
@@ -52,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
- /* Cache sizes */
- unsigned n;
-
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -76,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts)
ds_init_intel(c);
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
if (c->x86 == 6)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 501ca1cea27..c4a7ec31394 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/string.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
static void mcheck_timer(struct work_struct *work)
{
- on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+ on_each_cpu(mcheck_check_cpu, NULL, 1);
/*
* Alert userspace if needed. If we logged an MCE, reduce the
@@ -532,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
static int mce_open(struct inode *inode, struct file *file)
{
+ lock_kernel();
spin_lock(&mce_state_lock);
if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return -EBUSY;
}
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
open_count++;
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return nonseekable_open(inode, file);
}
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
* Collect entries that were still getting written before the
* synchronize.
*/
- on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+ on_each_cpu(collect_tscs, cpu_tsc, 1);
for (i = next; i < MCE_LOG_LEN; i++) {
if (mcelog.entry[i].finished &&
mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -742,7 +746,7 @@ static void mce_restart(void)
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
- on_each_cpu(mce_init, NULL, 1, 1);
+ on_each_cpu(mce_init, NULL, 1);
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec..cc1fccdd31e 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
- on_each_cpu(mce_checkregs, NULL, 1, 1);
+ on_each_cpu(mce_checkregs, NULL, 1);
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 105afe12beb..6f23969c8fa 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
atomic_set(&data.gate,0);
/* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+ if (smp_call_function(ipi_handler, &data, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
local_irq_save(flags);
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void)
*/
void mtrr_save_state(void)
{
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
}
static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e9bef6e3aa..6d4bdc02388 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void)
if (atomic_read(&nmi_active) <= 0)
return;
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
if (wd_ops)
wd_ops->unreserve();
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
return;
}
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
touch_nmi_watchdog();
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a6224..2de5fa2bbf7 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
for (; count; count -= 16) {
cmd.eax = pos;
cmd.ecx = pos >> 32;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
if (copy_to_user(tmp, &cmd, 16))
return -EFAULT;
tmp += 16;
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
static int cpuid_open(struct inode *inode, struct file *file)
{
- unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
+ unsigned int cpu;
+ struct cpuinfo_x86 *c;
+ int ret = 0;
+
+ lock_kernel();
+
+ cpu = iminor(file->f_path.dentry->d_inode);
+ if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ ret = -ENXIO; /* No such CPU */
+ goto out;
+ }
+ c = &cpu_data(cpu);
if (c->cpuid_level < 0)
- return -EIO; /* CPUID not supported */
-
- return 0;
+ ret = -EIO; /* CPUID not supported */
+out:
+ unlock_kernel();
+ return ret;
}
/*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d0335853ff5..28c29180b38 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
+#include <linux/firmware-map.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -27,7 +28,22 @@
#include <asm/setup.h>
#include <asm/trampoline.h>
+/*
+ * The e820 map is the map that gets modified e.g. with command line parameters
+ * and that is also registered with modifications in the kernel resource tree
+ * with the iomem_resource as parent.
+ *
+ * The e820_saved is directly saved after the BIOS-provided memory map is
+ * copied. It doesn't get modified afterwards. It's registered for the
+ * /sys/firmware/memmap interface.
+ *
+ * That memory map is not modified and is used as base for kexec. The kexec'd
+ * kernel should get the same memory map as the firmware provides. Then the
+ * user can e.g. boot the original kernel with mem=1G while still booting the
+ * next kernel with full memory.
+ */
struct e820map e820;
+struct e820map e820_saved;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -398,8 +414,9 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
return __append_e820_map(biosmap, nr_map);
}
-u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
- unsigned new_type)
+static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
+ u64 size, unsigned old_type,
+ unsigned new_type)
{
int i;
u64 real_updated_size = 0;
@@ -410,7 +427,7 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
size = ULLONG_MAX - start;
for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ struct e820entry *ei = &e820x->map[i];
u64 final_start, final_end;
if (ei->type != old_type)
continue;
@@ -438,6 +455,19 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
return real_updated_size;
}
+u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
+ unsigned new_type)
+{
+ return e820_update_range_map(&e820, start, size, old_type, new_type);
+}
+
+static u64 __init e820_update_range_saved(u64 start, u64 size,
+ unsigned old_type, unsigned new_type)
+{
+ return e820_update_range_map(&e820_saved, start, size, old_type,
+ new_type);
+}
+
/* make e820 not cover the range */
u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
int checktype)
@@ -487,6 +517,15 @@ void __init update_e820(void)
printk(KERN_INFO "modified physical RAM map:\n");
e820_print_map("modified");
}
+static void __init update_e820_saved(void)
+{
+ int nr_map;
+
+ nr_map = e820_saved.nr_map;
+ if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
+ return;
+ e820_saved.nr_map = nr_map;
+}
#define MAX_GAP_END 0x100000000ull
/*
* Search for a gap in the e820 memory space from start_addr to end_addr.
@@ -991,8 +1030,10 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
addr = round_down(start + size - sizet, align);
e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+ e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
printk(KERN_INFO "update e820 for early_reserve_e820\n");
update_e820();
+ update_e820_saved();
return addr;
}
@@ -1008,30 +1049,51 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
#endif
/*
- * Last pfn which the user wants to use.
- */
-unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
-
-/*
* Find the highest page frame number we have available
*/
-unsigned long __init e820_end_of_ram(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
{
- unsigned long last_pfn;
+ int i;
+ unsigned long last_pfn = 0;
unsigned long max_arch_pfn = MAX_ARCH_PFN;
- last_pfn = find_max_pfn_with_active_regions();
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+
+ if (ei->type != type)
+ continue;
+
+ start_pfn = ei->addr >> PAGE_SHIFT;
+ end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+ if (start_pfn >= limit_pfn)
+ continue;
+ if (end_pfn > limit_pfn) {
+ last_pfn = limit_pfn;
+ break;
+ }
+ if (end_pfn > last_pfn)
+ last_pfn = end_pfn;
+ }
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- if (last_pfn > end_user_pfn)
- last_pfn = end_user_pfn;
printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
last_pfn, max_arch_pfn);
return last_pfn;
}
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+ return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+ return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
/*
* Finds an active region in the address range from start_pfn to last_pfn and
* returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1062,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei,
if (*ei_endpfn > last_pfn)
*ei_endpfn = last_pfn;
- /* Obey end_user_pfn to save on memmap */
- if (*ei_startpfn >= end_user_pfn)
- return 0;
- if (*ei_endpfn > end_user_pfn)
- *ei_endpfn = end_user_pfn;
-
return 1;
}
@@ -1113,6 +1169,8 @@ static void early_panic(char *msg)
panic(msg);
}
+static int userdef __initdata;
+
/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
@@ -1128,22 +1186,22 @@ static int __init parse_memopt(char *p)
}
#endif
+ userdef = 1;
mem_size = memparse(p, &p);
- end_user_pfn = mem_size>>PAGE_SHIFT;
- e820_update_range(mem_size, ULLONG_MAX - mem_size,
- E820_RAM, E820_RESERVED);
+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
}
early_param("mem", parse_memopt);
-static int userdef __initdata;
-
static int __init parse_memmap_opt(char *p)
{
char *oldp;
u64 start_at, mem_size;
+ if (!p)
+ return -EINVAL;
+
if (!strcmp(p, "exactmap")) {
#ifdef CONFIG_CRASH_DUMP
/*
@@ -1151,9 +1209,7 @@ static int __init parse_memmap_opt(char *p)
* the real mem size before original memory map is
* reset.
*/
- e820_register_active_regions(0, 0, -1UL);
- saved_max_pfn = e820_end_of_ram();
- remove_all_active_ranges();
+ saved_max_pfn = e820_end_of_ram_pfn();
#endif
e820.nr_map = 0;
userdef = 1;
@@ -1175,11 +1231,9 @@ static int __init parse_memmap_opt(char *p)
} else if (*p == '$') {
start_at = memparse(p+1, &p);
e820_add_region(start_at, mem_size, E820_RESERVED);
- } else {
- end_user_pfn = (mem_size >> PAGE_SHIFT);
- e820_update_range(mem_size, ULLONG_MAX - mem_size,
- E820_RAM, E820_RESERVED);
- }
+ } else
+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
+
return *p == '\0' ? 0 : -EINVAL;
}
early_param("memmap", parse_memmap_opt);
@@ -1198,6 +1252,17 @@ void __init finish_e820_parsing(void)
}
}
+static inline const char *e820_type_to_string(int e820_type)
+{
+ switch (e820_type) {
+ case E820_RESERVED_KERN:
+ case E820_RAM: return "System RAM";
+ case E820_ACPI: return "ACPI Tables";
+ case E820_NVS: return "ACPI Non-volatile Storage";
+ default: return "reserved";
+ }
+}
+
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
@@ -1209,13 +1274,6 @@ void __init e820_reserve_resources(void)
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
for (i = 0; i < e820.nr_map; i++) {
- switch (e820.map[i].type) {
- case E820_RESERVED_KERN:
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
end = e820.map[i].addr + e820.map[i].size - 1;
#ifndef CONFIG_RESOURCES_64BIT
if (end > 0x100000000ULL) {
@@ -1223,6 +1281,7 @@ void __init e820_reserve_resources(void)
continue;
}
#endif
+ res->name = e820_type_to_string(e820.map[i].type);
res->start = e820.map[i].addr;
res->end = end;
@@ -1230,8 +1289,20 @@ void __init e820_reserve_resources(void)
insert_resource(&iomem_resource, res);
res++;
}
+
+ for (i = 0; i < e820_saved.nr_map; i++) {
+ struct e820entry *entry = &e820_saved.map[i];
+ firmware_map_add_early(entry->addr,
+ entry->addr + entry->size - 1,
+ e820_type_to_string(entry->type));
+ }
}
+/*
+ * Non-standard memory setup can be specified via this quirk:
+ */
+char * (*arch_memory_setup_quirk)(void);
+
char *__init default_machine_specific_memory_setup(void)
{
char *who = "BIOS-e820";
@@ -1272,6 +1343,12 @@ char *__init default_machine_specific_memory_setup(void)
char *__init __attribute__((weak)) machine_specific_memory_setup(void)
{
+ if (arch_memory_setup_quirk) {
+ char *who = arch_memory_setup_quirk();
+
+ if (who)
+ return who;
+ }
return default_machine_specific_memory_setup();
}
@@ -1283,8 +1360,12 @@ char * __init __attribute__((weak)) memory_setup(void)
void __init setup_memory_map(void)
{
+ char *who;
+
+ who = memory_setup();
+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- e820_print_map(memory_setup());
+ e820_print_map(who);
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f37cfc..a0e11c0cc87 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
{}
};
-static void __init check_dev_quirk(int num, int slot, int func)
+/**
+ * check_dev_quirk - apply early quirks to a given PCI device
+ * @num: bus number
+ * @slot: slot number
+ * @func: PCI function
+ *
+ * Check the vendor & device ID against the early quirks table.
+ *
+ * If the device is single function, let early_quirks() know so we don't
+ * poke at this device again.
+ */
+static int __init check_dev_quirk(int num, int slot, int func)
{
u16 class;
u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
if (class == 0xffff)
- return;
+ return -1; /* no class, treat as single function */
vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);
if (!(type & 0x80))
- return;
+ return -1;
+
+ return 0;
}
void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++)
for (slot = 0; slot < 32; slot++)
- for (func = 0; func < 8; func++)
- check_dev_quirk(num, slot, func);
+ for (func = 0; func < 8; func++) {
+ /* Only probe function 0 on single fn devices */
+ if (check_dev_quirk(num, slot, func))
+ break;
+ }
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 643fd861b72..ff9e7350da5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -196,7 +196,7 @@ static struct console simnow_console = {
static struct console *early_console = &early_vga_console;
static int early_console_initialized;
-void early_printk(const char *fmt, ...)
+asmlinkage void early_printk(const char *fmt, ...)
{
char buf[512];
int n;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 94382faeadb..06cc8d4254b 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
- if (PFN_UP(end) <= max_pfn_mapped)
+ if (PFN_UP(end) <= max_low_pfn_mapped)
va = __va(md->phys_addr);
else
va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 53393c306e1..6bc07f0f120 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
+#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
/*
@@ -1024,6 +1025,7 @@ ENTRY(xen_sysenter_target)
RING0_INT_FRAME
addl $5*4, %esp /* remove xen-provided frame */
jmp sysenter_past_esp
+ CFI_ENDPROC
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
@@ -1110,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
#endif /* CONFIG_XEN */
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+.globl ftrace_stub
+ftrace_stub:
+ ret
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+ cmpl $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ ret
+
+ /* taken from glibc */
+trace:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+ call *ftrace_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07d69f26233..ae63e584c34 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
+#include <asm/ftrace.h>
.code64
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(mcount)
+
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ retq
+END(mcount)
+
+ENTRY(ftrace_caller)
+
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+.globl ftrace_stub
+ftrace_stub:
+ retq
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+ENTRY(mcount)
+ cmpq $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ retq
+
+trace:
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+ call *ftrace_trace_function
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -168,13 +274,13 @@ ENTRY(ret_from_fork)
CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jnz rff_trace
rff_action:
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
- testl $_TIF_IA32,threadinfo_flags(%rcx)
+ testl $_TIF_IA32,TI_flags(%rcx)
jnz int_ret_from_sys_call
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
jmp ret_from_sys_call
@@ -243,7 +349,8 @@ ENTRY(system_call_after_swapgs)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
+ TI_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
@@ -262,7 +369,7 @@ sysret_check:
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- movl threadinfo_flags(%rcx),%edx
+ movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE
@@ -305,7 +412,7 @@ sysret_signal:
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
-1: movl $_TIF_NEED_RESCHED,%edi
+1: movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -347,10 +454,10 @@ int_ret_from_sys_call:
int_with_check:
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%edx
+ movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz int_careful
- andl $~TS_COMPAT,threadinfo_status(%rcx)
+ andl $~TS_COMPAT,TI_status(%rcx)
jmp retint_swapgs
/* Either reschedule or signal or syscall exit tracking needed. */
@@ -393,7 +500,7 @@ int_signal:
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
call do_notify_resume
-1: movl $_TIF_NEED_RESCHED,%edi
+1: movl $_TIF_WORK_MASK,%edi
int_restore_rest:
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -558,7 +665,7 @@ retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
LOCKDEP_SYS_EXIT_IRQ
- movl threadinfo_flags(%rcx),%edx
+ movl TI_flags(%rcx),%edx
andl %edi,%edx
CFI_REMEMBER_STATE
jnz retint_careful
@@ -646,17 +753,16 @@ retint_signal:
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- movl $_TIF_NEED_RESCHED,%edi
GET_THREAD_INFO(%rcx)
- jmp retint_check
+ jmp retint_with_reschedule
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel)
- cmpl $0,threadinfo_preempt_count(%rcx)
+ cmpl $0,TI_preempt_count(%rcx)
jnz retint_restore_args
- bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
ENTRY(irq_move_cleanup_interrupt)
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
END(irq_move_cleanup_interrupt)
@@ -819,7 +928,7 @@ paranoid_restore\trace:
jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
- movl threadinfo_flags(%rcx),%ebx
+ movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
jz paranoid_swapgs\trace
movq %rsp,%rdi /* &pt_regs */
@@ -917,7 +1026,7 @@ error_exit:
testl %eax,%eax
jne retint_kernel
LOCKDEP_SYS_EXIT_IRQ
- movl threadinfo_flags(%rcx),%edx
+ movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
jnz retint_careful
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 00000000000..ab115cd15fd
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/alternative.h>
+#include <asm/ftrace.h>
+
+
+/* Long is fine, even if it is only 4 bytes ;-) */
+static long *ftrace_nop;
+
+union ftrace_code_union {
+ char code[MCOUNT_INSN_SIZE];
+ struct {
+ char e8;
+ int offset;
+ } __attribute__((packed));
+};
+
+
+static int notrace ftrace_calc_offset(long ip, long addr)
+{
+ return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static union ftrace_code_union calc;
+
+ calc.e8 = 0xe8;
+ calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return calc.code;
+}
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned replaced;
+ unsigned old = *(unsigned *)old_code; /* 4 bytes */
+ unsigned new = *(unsigned *)new_code; /* 4 bytes */
+ unsigned char newch = new_code[4];
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lock\n"
+ " cmpxchg %3, (%2)\n"
+ " jnz 2f\n"
+ " movb %b4, 4(%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : "=r"(faulted), "=a"(replaced)
+ : "r"(ip), "r"(new), "c"(newch),
+ "0"(faulted), "a"(old)
+ : "memory");
+ sync_core();
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ const unsigned char *const *noptable = find_nop_table();
+
+ /* This is running in kstop_machine */
+
+ ftrace_mcount_set(data);
+
+ ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+
+ return 0;
+}
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 45e84acca8a..711f11c30b0 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -8,6 +8,7 @@
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
*/
+#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
@@ -20,6 +21,7 @@
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <asm/pgtable.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -208,14 +210,79 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
BUG();
}
+static __init void map_low_mmrs(void)
+{
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+enum map_type {map_wb, map_uc};
+
+static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
+{
+ unsigned long bytes, paddr;
+
+ paddr = base << shift;
+ bytes = (1UL << shift);
+ printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+ paddr + bytes);
+ if (map_type == map_uc)
+ init_extra_mapping_uc(paddr, bytes);
+ else
+ init_extra_mapping_wb(paddr, bytes);
+
+}
+static __init void map_gru_high(int max_pnode)
+{
+ union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+ int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+ if (gru.s.enable)
+ map_high("GRU", gru.s.base, shift, map_wb);
+}
+
+static __init void map_config_high(int max_pnode)
+{
+ union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
+ int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
+ if (cfg.s.enable)
+ map_high("CONFIG", cfg.s.base, shift, map_uc);
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+ union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+ int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+ if (mmr.s.enable)
+ map_high("MMR", mmr.s.base, shift, map_uc);
+}
+
+static __init void map_mmioh_high(int max_pnode)
+{
+ union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+ int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+ if (mmioh.s.enable)
+ map_high("MMIOH", mmioh.s.base, shift, map_uc);
+}
+
static __init void uv_system_init(void)
{
union uvh_si_addr_map_config_u m_n_config;
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+ int max_pnode = 0;
unsigned long mmr_base, present;
+ map_low_mmrs();
+
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
@@ -281,12 +348,18 @@ static __init void uv_system_init(void)
uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
+ max_pnode = max(pnode, max_pnode);
- printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, "
+ printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
"lcpu %d, blade %d\n",
cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
lcpu, blade);
}
+
+ map_gru_high(max_pnode);
+ map_mmr_high(max_pnode);
+ map_config_high(max_pnode);
+ map_mmioh_high(max_pnode);
}
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec6905..0ea6a19bfdf 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
}
#ifdef CONFIG_X86_64
-
#include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
- set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
- __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
- hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
- hpet_virt_address = NULL;
-}
-
-#else
+#endif
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_64
+ __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+#endif
}
static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
iounmap(hpet_virt_address);
hpet_virt_address = NULL;
}
-#endif
/*
* HPET command line enable / disable
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e92..dd7ebee446a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
#include <linux/module.h>
+
#include <asm/checksum.h>
-#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 337ec3438a8..558abf4c796 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1569,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy)
void print_all_local_APICs(void)
{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
+ on_each_cpu(print_local_APIC, NULL, 1);
}
void /*__init*/ print_PIC(void)
@@ -2020,7 +2020,7 @@ static inline void init_IO_APIC_traps(void)
* The local APIC irq-chip implementation:
*/
-static void ack_apic(unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
{
ack_APIC_irq();
}
@@ -2045,9 +2045,17 @@ static struct irq_chip lapic_chip __read_mostly = {
.name = "local-APIC",
.mask = mask_lapic_irq,
.unmask = unmask_lapic_irq,
- .eoi = ack_apic,
+ .ack = ack_lapic_irq,
};
+static void lapic_register_intr(int irq, int vector)
+{
+ irq_desc[irq].status &= ~IRQ_LEVEL;
+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ "edge");
+ set_intr_gate(vector, interrupt[irq]);
+}
+
static void __init setup_nmi(void)
{
/*
@@ -2247,8 +2255,7 @@ static inline void __init check_timer(void)
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
- set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
- "fasteoi");
+ lapic_register_intr(0, vector);
apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
enable_8259A_irq(0);
@@ -2280,11 +2287,21 @@ out:
}
/*
- *
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices. However there may be an I/O APIC pin available for
+ * this interrupt regardless. The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A. In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table. With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default. We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now. No actual device should request
+ * it anyway. --macro
*/
#define PIC_IRQS (1 << PIC_CASCADE_IR)
@@ -2298,10 +2315,7 @@ void __init setup_IO_APIC(void)
enable_IO_APIC();
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
- io_apic_irqs = ~PIC_IRQS;
+ io_apic_irqs = ~PIC_IRQS;
printk("ENABLING IO-APIC IRQs\n");
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2b4c40bc12c..6510cde36b3 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1160,7 +1160,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
void print_all_local_APICs (void)
{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
+ on_each_cpu(print_local_APIC, NULL, 1);
}
void __apicdebuginit print_PIC(void)
@@ -1554,7 +1554,7 @@ static inline void init_IO_APIC_traps(void)
}
}
-static void enable_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
{
unsigned long v;
@@ -1562,7 +1562,7 @@ static void enable_lapic_irq (unsigned int irq)
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq(unsigned int irq)
{
unsigned long v;
@@ -1575,19 +1575,20 @@ static void ack_lapic_irq (unsigned int irq)
ack_APIC_irq();
}
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
-
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- .name = "local-APIC",
- .typename = "local-APIC-edge",
- .startup = NULL, /* startup_irq() not used for IRQ0 */
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- .enable = enable_lapic_irq,
- .disable = disable_lapic_irq,
- .ack = ack_lapic_irq,
- .end = end_lapic_irq,
+static struct irq_chip lapic_chip __read_mostly = {
+ .name = "local-APIC",
+ .mask = mask_lapic_irq,
+ .unmask = unmask_lapic_irq,
+ .ack = ack_lapic_irq,
};
+static void lapic_register_intr(int irq)
+{
+ irq_desc[irq].status &= ~IRQ_LEVEL;
+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ "edge");
+}
+
static void __init setup_nmi(void)
{
/*
@@ -1714,11 +1715,6 @@ static inline void __init check_timer(void)
apic2 = apic1;
}
- replace_pin_at_irq(0, 0, 0, apic1, pin1);
- apic1 = 0;
- pin1 = 0;
- setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
-
if (pin1 != -1) {
/*
* Ok, does IRQ0 through the IOAPIC work?
@@ -1779,7 +1775,7 @@ static inline void __init check_timer(void)
apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
- irq_desc[0].chip = &lapic_irq_type;
+ lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
@@ -1817,11 +1813,21 @@ static int __init notimercheck(char *s)
__setup("no_timer_check", notimercheck);
/*
- *
- * IRQs that are handled by the PIC in the MPS IOAPIC case.
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
- * Linux doesn't really care, as it's not actually used
- * for any interrupt handling anyway.
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices. However there may be an I/O APIC pin available for
+ * this interrupt regardless. The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A. In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table. With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default. We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now. No actual device should request
+ * it anyway. --macro
*/
#define PIC_IRQS (1<<2)
@@ -1832,10 +1838,7 @@ void __init setup_IO_APIC(void)
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
- if (acpi_ioapic)
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
- else
- io_apic_irqs = ~PIC_IRQS;
+ io_apic_irqs = ~PIC_IRQS;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31f49e8f46a..0373e88de95 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void)
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ /* IPI for generic single function call */
+ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+ call_function_single_interrupt);
+
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
#endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 21f2bae98c1..a8449571858 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
load_LDT(pc);
mask = cpumask_of_cpu(smp_processor_id());
if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, current->mm, 1, 1);
+ smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f4960171bc6..8864230d55a 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4a838..9dd9262693a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
#include <linux/string.h>
#include <linux/reboot.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9758fea87c5..56b933119a0 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -76,6 +76,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -423,6 +424,7 @@ out:
static int microcode_open (struct inode *unused1, struct file *unused2)
{
+ cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#define microcode_dev_exit() do { } while(0)
#endif
-static long get_next_ucode_from_buffer(void **mc, void *buf,
+static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
unsigned long size, long offset)
{
microcode_header_t *mc_header;
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu)
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
- void *buf;
+ const u8 *buf;
unsigned long size;
long offset = 0;
int error;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8b6b1e05c30..3b25e49380c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -726,12 +726,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
static struct intel_mp_floating *mpf_found;
/*
+ * Machine specific quirk for finding the SMP config before other setup
+ * activities destroy the table:
+ */
+int (*mach_get_smp_config_quirk)(unsigned int early);
+
+/*
* Scan the memory blocks for an SMP configuration block.
*/
-static void __init __get_smp_config(unsigned early)
+static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
+ if (mach_get_smp_config_quirk) {
+ if (mach_get_smp_config_quirk(early))
+ return;
+ }
if (acpi_lapic && early)
return;
/*
@@ -889,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0;
}
-static void __init __find_smp_config(unsigned reserve)
+int (*mach_find_smp_config_quirk)(unsigned int reserve);
+
+static void __init __find_smp_config(unsigned int reserve)
{
unsigned int address;
+ if (mach_find_smp_config_quirk) {
+ if (mach_find_smp_config_quirk(reserve))
+ return;
+ }
/*
* FIXME: Linux assumes you have 640K of base ram..
* this continues the error...
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 1f3abe048e9..a153b3905f6 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ int ret = 0;
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (!cpu_has(c, X86_FEATURE_MSR))
- return -EIO; /* MSR not supported */
+ lock_kernel();
+ cpu = iminor(file->f_path.dentry->d_inode);
+ if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ ret = -ENXIO; /* No such CPU */
+ goto out;
+ }
+ c = &cpu_data(cpu);
+ if (!cpu_has(c, X86_FEATURE_MSR))
+ ret = -EIO; /* MSR not supported */
+out:
+ unlock_kernel();
return 0;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8dfe9db87a9..ec024b3baad 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void)
#ifdef CONFIG_SMP
if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+ smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
#endif
for_each_possible_cpu(cpu)
@@ -171,6 +171,9 @@ int __init check_nmi_watchdog(void)
error:
if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
disable_8259A_irq(0);
+#ifdef CONFIG_X86_32
+ timer_ack = 0;
+#endif
return -1;
}
@@ -269,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused)
void acpi_nmi_enable(void)
{
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+ on_each_cpu(__acpi_nmi_enable, NULL, 1);
}
static void __acpi_nmi_disable(void *__unused)
@@ -283,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused)
void acpi_nmi_disable(void)
{
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+ on_each_cpu(__acpi_nmi_disable, NULL, 1);
}
void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1c4a1..a23e8233b9a 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_tsc_disable(void)
+void __init numaq_tsc_disable(void)
{
+ if (!found_numaq)
+ return;
+
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
- return 0;
}
-arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e7e5652f65b..e0f571d58c1 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = {
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
.sched_clock = native_sched_clock,
- .get_cpu_khz = native_calculate_cpu_khz,
+ .get_tsc_khz = native_calibrate_tsc,
};
struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db5d2a..c3fe78406d1 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
struct pci_dev *dev;
void *gatt;
int i, error;
+ unsigned long start_pfn, end_pfn;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
aper_base, aper_size>>10);
+
+ /* need to map that range */
+ end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+ if (end_pfn > max_low_pfn_mapped) {
+ start_pfn = (aper_base>>PAGE_SHIFT);
+ init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ }
return 0;
nommu:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4061d63aabe..4d629c62f4f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
+#include <asm/system.h>
+
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
@@ -132,7 +138,7 @@ void cpu_idle_wait(void)
{
smp_mb();
/* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
+ smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
pm_idle = poll_idle;
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
- else
+ else if (!strcmp(str, "halt")) {
+ /*
+ * When the boot option of idle=halt is added, halt is
+ * forced to be used for CPU idle. In such case CPU C2/C3
+ * won't be used again.
+ * To continue to load the CPU idle driver, don't touch
+ * the boot_option_idle_override.
+ */
+ pm_idle = default_idle;
+ idle_halt = 1;
+ return 0;
+ } else if (!strcmp(str, "nomwait")) {
+ /*
+ * If the boot option of "idle=nomwait" is added,
+ * it means that mwait will be disabled for CPU C2/C3
+ * states. In such case it won't touch the variable
+ * of boot_option_idle_override.
+ */
+ idle_nomwait = 1;
+ return 0;
+ } else
return -1;
boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6c9df..0c3927accb0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
pm_idle();
+ start_critical_timings();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb963e4d..a8e53626ac9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
*/
local_irq_disable();
enter_idle();
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
pm_idle();
+ start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd11c66..d1385881810 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
hpet_print_force_info();
}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
+ old_ich_force_enable_hpet_user);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
old_ich_force_enable_hpet_user);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cfcfbefee0b..531b55b8e81 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -394,11 +394,10 @@ static void __init parse_setup_data(void)
}
}
-static void __init reserve_setup_data(void)
+static void __init e820_reserve_setup_data(void)
{
struct setup_data *data;
u64 pa_data;
- char buf[32];
int found = 0;
if (boot_params.hdr.version < 0x0209)
@@ -406,8 +405,6 @@ static void __init reserve_setup_data(void)
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_ioremap(pa_data, sizeof(*data));
- sprintf(buf, "setup data %x", data->type);
- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
e820_update_range(pa_data, sizeof(*data)+data->len,
E820_RAM, E820_RESERVED_KERN);
found = 1;
@@ -418,10 +415,29 @@ static void __init reserve_setup_data(void)
return;
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("reserve setup_data");
}
+static void __init reserve_early_setup_data(void)
+{
+ struct setup_data *data;
+ u64 pa_data;
+ char buf[32];
+
+ if (boot_params.hdr.version < 0x0209)
+ return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, sizeof(*data));
+ sprintf(buf, "setup data %x", data->type);
+ reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+ pa_data = data->next;
+ early_iounmap(data, sizeof(*data));
+ }
+}
+
/*
* --------- Crashkernel reservation ------------------------------
*/
@@ -580,6 +596,7 @@ void __init setup_arch(char **cmdline_p)
{
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+ visws_early_detect();
pre_setup_arch_hook();
early_cpu_init();
#else
@@ -626,6 +643,8 @@ void __init setup_arch(char **cmdline_p)
setup_memory_map();
parse_setup_data();
+ /* update the e820_saved too */
+ e820_reserve_setup_data();
copy_edd();
@@ -656,7 +675,7 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
/* after early param, so could get panic from serial */
- reserve_setup_data();
+ reserve_early_setup_data();
if (acpi_mps_check()) {
#ifdef CONFIG_X86_LOCAL_APIC
@@ -665,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
}
+#ifdef CONFIG_PCI
+ if (pci_early_dump_regs)
+ early_dump_pci_devices();
+#endif
+
finish_e820_parsing();
#ifdef CONFIG_X86_32
@@ -691,22 +715,18 @@ void __init setup_arch(char **cmdline_p)
early_gart_iommu_check();
#endif
- e820_register_active_regions(0, 0, -1UL);
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
- max_pfn = e820_end_of_ram();
+ max_pfn = e820_end_of_ram_pfn();
/* preallocate 4k for mptable mpc */
early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
mtrr_bp_init();
- if (mtrr_trim_uncached_memory(max_pfn)) {
- remove_all_active_ranges();
- e820_register_active_regions(0, 0, -1UL);
- max_pfn = e820_end_of_ram();
- }
+ if (mtrr_trim_uncached_memory(max_pfn))
+ max_pfn = e820_end_of_ram_pfn();
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
@@ -718,12 +738,26 @@ void __init setup_arch(char **cmdline_p)
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
- max_low_pfn = max_pfn;
+ if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+ max_low_pfn = e820_end_of_low_ram_pfn();
+ else
+ max_low_pfn = max_pfn;
+
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif
/* max_pfn_mapped is updated here */
- max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+ max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+ if (max_pfn > max_low_pfn) {
+ max_pfn_mapped = init_memory_mapping(1UL<<32,
+ max_pfn<<PAGE_SHIFT);
+ /* can we preseve max_low_pfn ?*/
+ max_low_pfn = max_pfn;
+ }
+#endif
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -749,9 +783,6 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_table_init();
- /* Remove active ranges so rediscovery with NUMA-awareness happens */
- remove_all_active_ranges();
-
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
@@ -823,6 +854,14 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
#endif
+#ifdef CONFIG_X86_NUMAQ
+ /*
+ * need to check online nodes num, call it
+ * here before time_init/tsc_init
+ */
+ numaq_tsc_disable();
+#endif
+
init_apic_mappings();
ioapic_init_mappings();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f746f..cac68430d31 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
-cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *_node_to_cpumask_ptr(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
- return &cpu_online_map;
+ return (const cpumask_t *)&cpu_online_map;
}
if (node >= nr_node_ids) {
printk(KERN_WARNING
"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
- return (cpumask_t *)&cpu_mask_none;
+ return &cpu_mask_none;
}
- return (cpumask_t *)&node_to_cpumask_map[node];
+ return &node_to_cpumask_map[node];
}
EXPORT_SYMBOL(_node_to_cpumask_ptr);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aadc87c..361b7a4c640 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-void lock_ipi_call_lock(void)
+void native_send_call_func_single_ipi(int cpu)
{
- spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = num_online_cpus() - 1;
-
- if (!cpus)
- return;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb();
-
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on. Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
+void native_send_call_func_ipi(cpumask_t mask)
{
- struct call_data_struct data;
cpumask_t allbutself;
- int cpus;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
- cpus_and(mask, mask, allbutself);
- cpus = cpus_weight(mask);
-
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
-
- /* Send a message to other CPUs */
if (cpus_equal(mask, allbutself) &&
cpus_equal(cpu_online_map, cpu_callout_map))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- spin_unlock(&call_lock);
-
- return 0;
}
static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
static void native_smp_send_stop(void)
{
- int nolock;
unsigned long flags;
if (reboot_force)
return;
- /* Don't deadlock on the call lock in panic */
- nolock = !spin_trylock(&call_lock);
+ smp_call_function(stop_this_cpu, NULL, 0);
local_irq_save(flags);
- __smp_call_function(stop_this_cpu, NULL, 0, 0);
- if (!nolock)
- spin_unlock(&call_lock);
disable_local_APIC();
local_irq_restore(flags);
}
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
void smp_call_function_interrupt(struct pt_regs *regs)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
ack_APIC_irq();
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
irq_enter();
- (*func)(info);
+ generic_smp_call_function_interrupt();
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
#else
add_pda(irq_call_count, 1);
#endif
irq_exit();
+}
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
+void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+ __get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
+ irq_exit();
}
struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
.smp_send_stop = native_smp_send_stop,
.smp_send_reschedule = native_smp_send_reschedule,
- .smp_call_function_mask = native_smp_call_function_mask,
+
+ .send_call_func_ipi = native_send_call_func_ipi,
+ .send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1200b202ed..687376ab07e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -327,12 +327,12 @@ static void __cpuinit start_secondary(void *unused)
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
*/
- lock_ipi_call_lock();
+ ipi_call_lock_irq();
#ifdef CONFIG_X86_IO_APIC
setup_vector_irq(smp_processor_id());
#endif
cpu_set(smp_processor_id(), cpu_online_map);
- unlock_ipi_call_lock();
+ ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
setup_secondary_clock();
@@ -939,9 +939,9 @@ do_rest:
inquire_remote_apic(apicid);
}
}
-
+#ifdef CONFIG_X86_64
restore_state:
-
+#endif
if (boot_error) {
/* Try to put things back the way they were before ... */
numa_remove_cpu(cpu); /* was set by numa_add_cpu */
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064d141..99941b37eca 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
per_cpu(cpu_number, cpu) = cpu;
}
#endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
- int wait)
-{
- return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU. Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- /* prevent preemption and reschedule on another processor */
- int ret;
- int me = get_cpu();
- if (cpu == me) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- put_cpu();
- return 0;
- }
-
- ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
- put_cpu();
- return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342c162..a03e7f6d90c 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 5f29f12da50..059ca6ee59b 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -39,9 +39,6 @@
#include "do_timer.h"
-unsigned int cpu_khz; /* Detected as we calibrate the TSC */
-EXPORT_SYMBOL(cpu_khz);
-
int timer_ack;
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 39ae8511a13..e3d49c553af 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
/* calibrate_cpu is used on systems with fixed rate TSCs to determine
* processor frequency */
#define TICK_COUNT 100000000
-unsigned long __init native_calculate_cpu_khz(void)
+unsigned long __init calibrate_cpu(void)
{
int tsc_start, tsc_now;
int i, no_ctr_free;
@@ -116,25 +116,11 @@ void __init hpet_time_init(void)
void __init time_init(void)
{
- tsc_calibrate();
-
- cpu_khz = tsc_khz;
- if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
- cpu_khz = calculate_cpu_khz();
-
- lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
-
- if (unsynchronized_tsc())
- mark_tsc_unstable("TSCs unsynchronized");
-
+ tsc_init();
if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
vgetcpu_mode = VGETCPU_RDTSCP;
else
vgetcpu_mode = VGETCPU_LSL;
- printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- init_tsc_clocksource();
late_time_init = choose_time_init();
}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363851a..fec1ecedc9b 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
}
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 5039d0f097a..dcbf7a1159e 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index d7cc292691f..8a768973c4f 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
@@ -60,8 +61,6 @@
#include "mach_traps.h"
-int panic_on_unrecovered_nmi;
-
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
@@ -98,19 +97,22 @@ asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
+int panic_on_unrecovered_nmi;
int kstack_depth_to_print = 24;
static unsigned int code_bytes = 64;
+static int ignore_nmis;
+static int die_counter;
void printk_address(unsigned long address, int reliable)
{
#ifdef CONFIG_KALLSYMS
- char namebuf[KSYM_NAME_LEN];
unsigned long offset = 0;
unsigned long symsize;
const char *symname;
- char reliab[4] = "";
- char *delim = ":";
char *modname;
+ char *delim = ":";
+ char namebuf[KSYM_NAME_LEN];
+ char reliab[4] = "";
symname = kallsyms_lookup(address, &symsize, &offset,
&modname, namebuf);
@@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable)
#endif
}
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size)
{
- return p > (void *)tinfo &&
- p <= (void *)tinfo + THREAD_SIZE - size;
+ void *t = tinfo;
+ return p > t && p <= t + THREAD_SIZE - size;
}
/* The form of the top of the frame on the stack */
struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
+ struct stack_frame *next_frame;
+ unsigned long return_address;
};
static inline unsigned long
print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data)
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data)
{
struct stack_frame *frame = (struct stack_frame *)bp;
@@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo,
return bp;
}
-#define MSG(msg) ops->warning(data, msg)
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
@@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
-
stack = &dummy;
if (task != current)
stack = (unsigned long *)task->thread.sp;
@@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
#endif
- while (1) {
+ for (;;) {
struct thread_info *context;
context = (struct thread_info *)
@@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
};
static void
@@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs)
printk(KERN_EMERG "Code: ");
ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
/* try starting at EIP */
ip = (u8 *)regs->ip;
code_len = code_len - code_prologue + 1;
}
for (i = 0; i < code_len; i++, ip++) {
if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
+ probe_kernel_address(ip, c)) {
printk(" Bad EIP value.");
break;
}
@@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static int die_counter;
-
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
unsigned short ss;
@@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
-
if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
-
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss & 0xffff;
- }
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
- return 0;
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
}
-
- return 1;
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ return 0;
}
/*
@@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
{ \
trace_hardirqs_fixup(); \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ == NOTIFY_STOP) \
return; \
do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
}
@@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
info.si_code = sicode; \
info.si_addr = (void __user *)siaddr; \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ == NOTIFY_STOP) \
return; \
do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
}
@@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
void do_##name(struct pt_regs *regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ == NOTIFY_STOP) \
return; \
do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
}
@@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code) \
info.si_addr = (void __user *)siaddr; \
trace_hardirqs_fixup(); \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
+ == NOTIFY_STOP) \
return; \
do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
}
-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
#ifndef CONFIG_KPROBES
DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
#endif
DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
-void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
+void __kprobes
+do_general_protection(struct pt_regs *regs, long error_code)
{
+ struct task_struct *tsk;
struct thread_struct *thread;
struct tss_struct *tss;
int cpu;
@@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
if (regs->flags & X86_VM_MASK)
goto gp_in_vm86;
+ tsk = current;
if (!user_mode(regs))
goto gp_in_kernel;
- current->thread.error_code = error_code;
- current->thread.trap_no = 13;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
- if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
- printk_ratelimit()) {
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
printk(KERN_INFO
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
- current->comm, task_pid_nr(current),
- regs->ip, regs->sp, error_code);
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
+ tsk->comm, task_pid_nr(tsk),
+ regs->ip, regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
printk("\n");
}
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV, tsk);
return;
gp_in_vm86:
@@ -672,14 +668,15 @@ gp_in_vm86:
return;
gp_in_kernel:
- if (!fixup_exception(regs)) {
- current->thread.error_code = error_code;
- current->thread.trap_no = 13;
- if (notify_die(DIE_GPF, "general protection fault", regs,
+ if (fixup_exception(regs))
+ return;
+
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+ if (notify_die(DIE_GPF, "general protection fault", regs,
error_code, 13, SIGSEGV) == NOTIFY_STOP)
- return;
- die("general protection fault", regs, error_code);
- }
+ return;
+ die("general protection fault", regs, error_code);
}
static notrace __kprobes void
@@ -792,14 +789,17 @@ void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
+ int cpu;
+
+ cpu = smp_processor_id();
- /* Only the BSP gets external NMIs from the system: */
- if (!smp_processor_id())
+ /* Only the BSP gets external NMIs from the system. */
+ if (!cpu)
reason = get_nmi_reason();
if (!(reason & 0xc0)) {
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
+ == NOTIFY_STOP)
return;
#ifdef CONFIG_X86_LOCAL_APIC
/*
@@ -808,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
*/
if (nmi_watchdog_tick(regs, reason))
return;
- if (!do_nmi_callback(regs, smp_processor_id()))
+ if (!do_nmi_callback(regs, cpu))
unknown_nmi_error(reason, regs);
#else
unknown_nmi_error(reason, regs);
@@ -818,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
}
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
return;
+
+ /* AK: following checks seem to be broken on modern chipsets. FIXME */
if (reason & 0x80)
mem_parity_error(reason, regs);
if (reason & 0x40)
@@ -829,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
reassert_nmi();
}
-static int ignore_nmis;
-
notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
{
int cpu;
@@ -915,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
tsk->thread.debugctlmsr = 0;
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
if (regs->flags & X86_EFLAGS_IF)
@@ -976,9 +976,8 @@ clear_TF_reenable:
void math_error(void __user *ip)
{
struct task_struct *task;
- unsigned short cwd;
- unsigned short swd;
siginfo_t info;
+ unsigned short cwd, swd;
/*
* Save the info for the exception handler and clear the error.
@@ -997,7 +996,7 @@ void math_error(void __user *ip)
* C1 reg you need in case of a stack fault, 0x040 is the stack
* fault bit. We should only be taking one exception at a time,
* so if this combination doesn't produce any single exception,
- * then we have a bad program that isn't syncronizing its FPU usage
+ * then we have a bad program that isn't synchronizing its FPU usage
* and it will suffer the consequences since we won't be able to
* fully reproduce the context of the exception
*/
@@ -1006,7 +1005,7 @@ void math_error(void __user *ip)
switch (swd & ~cwd & 0x3f) {
case 0x000: /* No unmasked exception */
return;
- default: /* Multiple exceptions */
+ default: /* Multiple exceptions */
break;
case 0x001: /* Invalid Op */
/*
@@ -1042,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code)
static void simd_math_error(void __user *ip)
{
struct task_struct *task;
- unsigned short mxcsr;
siginfo_t info;
+ unsigned short mxcsr;
/*
* Save the info for the exception handler and clear the error.
@@ -1198,16 +1197,16 @@ void __init trap_init(void)
early_iounmap(p, 4);
#endif
- set_trap_gate(0, &divide_error);
- set_intr_gate(1, &debug);
- set_intr_gate(2, &nmi);
- set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
- set_system_gate(4, &overflow);
- set_trap_gate(5, &bounds);
- set_trap_gate(6, &invalid_op);
- set_trap_gate(7, &device_not_available);
- set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
- set_trap_gate(9, &coprocessor_segment_overrun);
+ set_trap_gate(0, &divide_error);
+ set_intr_gate(1, &debug);
+ set_intr_gate(2, &nmi);
+ set_system_intr_gate(3, &int3); /* int3 can be called from all */
+ set_system_gate(4, &overflow); /* int4 can be called from all */
+ set_trap_gate(5, &bounds);
+ set_trap_gate(6, &invalid_op);
+ set_trap_gate(7, &device_not_available);
+ set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
+ set_trap_gate(9, &coprocessor_segment_overrun);
set_trap_gate(10, &invalid_TSS);
set_trap_gate(11, &segment_not_present);
set_trap_gate(12, &stack_segment);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 80ba6d37bfe..2696a683778 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -10,49 +10,49 @@
* 'Traps.c' handles hardware traps and faults after we have saved some
* state in 'entry.S'.
*/
-#include <linux/sched.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/kdebug.h>
#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
#include <linux/string.h>
+#include <linux/unwind.h>
+#include <linux/delay.h>
#include <linux/errno.h>
-#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/sched.h>
#include <linux/timer.h>
-#include <linux/mm.h>
#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nmi.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-#include <linux/unwind.h>
-#include <linux/uaccess.h>
#include <linux/bug.h>
-#include <linux/kdebug.h>
-#include <linux/utsname.h>
-
-#include <mach_traps.h>
+#include <linux/nmi.h>
+#include <linux/mm.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
+#include <asm/stacktrace.h>
+#include <asm/processor.h>
#include <asm/debugreg.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/unwind.h>
#include <asm/desc.h>
#include <asm/i387.h>
-#include <asm/processor.h>
-#include <asm/unwind.h>
+#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/io.h>
#include <asm/pgalloc.h>
-#include <asm/pda.h>
#include <asm/proto.h>
-#include <asm/nmi.h>
-#include <asm/stacktrace.h>
+#include <asm/pda.h>
+
+#include <mach_traps.h>
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -72,12 +72,14 @@ asmlinkage void page_fault(void);
asmlinkage void coprocessor_error(void);
asmlinkage void simd_coprocessor_error(void);
asmlinkage void alignment_check(void);
-asmlinkage void machine_check(void);
asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
int panic_on_unrecovered_nmi;
+int kstack_depth_to_print = 12;
static unsigned int code_bytes = 64;
-static unsigned ignore_nmis;
+static int ignore_nmis;
+static int die_counter;
static inline void conditional_sti(struct pt_regs *regs)
{
@@ -101,34 +103,9 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
dec_preempt_count();
}
-int kstack_depth_to_print = 12;
-
void printk_address(unsigned long address, int reliable)
{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char *delim = ":";
- char namebuf[KSYM_NAME_LEN];
- char reliab[4] = "";
-
- symname = kallsyms_lookup(address, &symsize, &offset,
- &modname, namebuf);
- if (!symname) {
- printk(" [<%016lx>]\n", address);
- return;
- }
- if (!reliable)
- strcpy(reliab, "? ");
-
- if (!modname)
- modname = delim = "";
- printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
- address, reliab, delim, modname, delim, symname, offset, symsize);
-#else
- printk(" [<%016lx>]\n", address);
-#endif
+ printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -205,8 +182,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
return NULL;
}
-#define MSG(txt) ops->warning(data, txt)
-
/*
* x86-64 can have up to three kernel stacks:
* process stack
@@ -233,11 +208,11 @@ struct stack_frame {
unsigned long return_address;
};
-
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
+static inline unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
{
struct stack_frame *frame = (struct stack_frame *)bp;
@@ -259,7 +234,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
return bp;
}
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
@@ -268,36 +243,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
unsigned used = 0;
struct thread_info *tinfo;
- if (!tsk)
- tsk = current;
- tinfo = task_thread_info(tsk);
+ if (!task)
+ task = current;
if (!stack) {
unsigned long dummy;
stack = &dummy;
- if (tsk && tsk != current)
- stack = (unsigned long *)tsk->thread.sp;
+ if (task && task != current)
+ stack = (unsigned long *)task->thread.sp;
}
#ifdef CONFIG_FRAME_POINTER
if (!bp) {
- if (tsk == current) {
+ if (task == current) {
/* Grab bp right from our regs */
- asm("movq %%rbp, %0" : "=r" (bp):);
+ asm("movq %%rbp, %0" : "=r" (bp) :);
} else {
/* bp is the last reg pushed by switch_to */
- bp = *(unsigned long *) tsk->thread.sp;
+ bp = *(unsigned long *) task->thread.sp;
}
}
#endif
-
-
/*
* Print function call entries in all stacks, starting at the
* current stack address. If the stacks consist of nested
* exceptions
*/
+ tinfo = task_thread_info(task);
for (;;) {
char *id;
unsigned long *estack_end;
@@ -382,18 +355,17 @@ static const struct stacktrace_ops print_trace_ops = {
.address = print_trace_address,
};
-void
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
- unsigned long bp)
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
{
printk("\nCall Trace:\n");
- dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
printk("\n");
}
static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
- unsigned long bp)
+_show_stack(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp)
{
unsigned long *stack;
int i;
@@ -405,14 +377,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
// back trace for this cpu.
if (sp == NULL) {
- if (tsk)
- sp = (unsigned long *)tsk->thread.sp;
+ if (task)
+ sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
}
stack = sp;
- for(i=0; i < kstack_depth_to_print; i++) {
+ for (i = 0; i < kstack_depth_to_print; i++) {
if (stack >= irqstack && stack <= irqstack_end) {
if (stack == irqstack_end) {
stack = (unsigned long *) (irqstack_end[-1]);
@@ -427,12 +399,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
- show_trace(tsk, regs, sp, bp);
+ show_trace(task, regs, sp, bp);
}
-void show_stack(struct task_struct *tsk, unsigned long * sp)
+void show_stack(struct task_struct *task, unsigned long *sp)
{
- _show_stack(tsk, NULL, sp, 0);
+ _show_stack(task, NULL, sp, 0);
}
/*
@@ -440,8 +412,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp)
*/
void dump_stack(void)
{
- unsigned long dummy;
unsigned long bp = 0;
+ unsigned long stack;
#ifdef CONFIG_FRAME_POINTER
if (!bp)
@@ -453,7 +425,7 @@ void dump_stack(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- show_trace(NULL, NULL, &dummy, bp);
+ show_trace(NULL, NULL, &stack, bp);
}
EXPORT_SYMBOL(dump_stack);
@@ -464,12 +436,8 @@ void show_registers(struct pt_regs *regs)
unsigned long sp;
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
- u8 *ip;
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
sp = regs->sp;
- ip = (u8 *) regs->ip - code_prologue;
printk("CPU %d ", cpu);
__show_regs(regs);
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -480,15 +448,21 @@ void show_registers(struct pt_regs *regs)
* time of the fault..
*/
if (!user_mode(regs)) {
+ unsigned int code_prologue = code_bytes * 43 / 64;
+ unsigned int code_len = code_bytes;
unsigned char c;
+ u8 *ip;
+
printk("Stack: ");
_show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
printk("\n");
printk(KERN_EMERG "Code: ");
+
+ ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
/* try starting at RIP */
- ip = (u8 *) regs->ip;
+ ip = (u8 *)regs->ip;
code_len = code_len - code_prologue + 1;
}
for (i = 0; i < code_len; i++, ip++) {
@@ -504,7 +478,7 @@ void show_registers(struct pt_regs *regs)
}
}
printk("\n");
-}
+}
int is_valid_bugaddr(unsigned long ip)
{
@@ -562,10 +536,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
do_exit(signr);
}
-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
{
- static int die_counter;
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
+ printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
@@ -576,8 +549,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
- if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
return 1;
+
show_registers(regs);
add_taint(TAINT_DIE);
/* Executive summary in case the oops scrolled away */
@@ -589,7 +564,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
return 0;
}
-void die(const char * str, struct pt_regs * regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags = oops_begin();
@@ -606,8 +581,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
{
unsigned long flags;
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
- NOTIFY_STOP)
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
return;
flags = oops_begin();
@@ -629,44 +603,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
do_exit(SIGBUS);
}
-static void __kprobes do_trap(int trapnr, int signr, char *str,
- struct pt_regs * regs, long error_code,
- siginfo_t *info)
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+ long error_code, siginfo_t *info)
{
struct task_struct *tsk = current;
- if (user_mode(regs)) {
- /*
- * We want error_code and trap_no set for userspace
- * faults and kernelspace faults which result in
- * die(), but not kernelspace faults which are fixed
- * up. die() gives the process no chance to handle
- * the signal and notice the kernel fault information,
- * so that won't result in polluting the information
- * about previously queued, but not yet delivered,
- * faults. See also do_general_protection below.
- */
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = trapnr;
-
- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
- printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
- tsk->comm, tsk->pid, str,
- regs->ip, regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
+ if (!user_mode(regs))
+ goto kernel_trap;
- if (info)
- force_sig_info(signr, info, tsk);
- else
- force_sig(signr, tsk);
- return;
+ /*
+ * We want error_code and trap_no set for userspace faults and
+ * kernelspace faults which result in die(), but not
+ * kernelspace faults which are fixed up. die() gives the
+ * process no chance to handle the signal and notice the
+ * kernel fault information, so that won't result in polluting
+ * the information about previously queued, but not yet
+ * delivered, faults. See also do_general_protection below.
+ */
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
+ printk_ratelimit()) {
+ printk(KERN_INFO
+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+ tsk->comm, tsk->pid, str,
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
}
+ if (info)
+ force_sig_info(signr, info, tsk);
+ else
+ force_sig(signr, tsk);
+ return;
+kernel_trap:
if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_no = trapnr;
@@ -676,38 +650,38 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
}
#define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
}
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = (void __user *)siaddr; \
- trace_hardirqs_fixup(); \
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
- == NOTIFY_STOP) \
- return; \
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ siginfo_t info; \
+ info.si_signo = signr; \
+ info.si_errno = 0; \
+ info.si_code = sicode; \
+ info.si_addr = (void __user *)siaddr; \
+ trace_hardirqs_fixup(); \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
conditional_sti(regs); \
- do_trap(trapnr, signr, str, regs, error_code, &info); \
+ do_trap(trapnr, signr, str, regs, error_code, &info); \
}
-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_ERROR(4, SIGSEGV, "overflow", overflow)
+DO_ERROR(5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
/* Runs on IST stack */
@@ -738,31 +712,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
die(str, regs, error_code);
}
-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
- long error_code)
+asmlinkage void __kprobes
+do_general_protection(struct pt_regs *regs, long error_code)
{
- struct task_struct *tsk = current;
+ struct task_struct *tsk;
conditional_sti(regs);
- if (user_mode(regs)) {
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 13;
-
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
- tsk->comm, tsk->pid,
- regs->ip, regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
+ tsk = current;
+ if (!user_mode(regs))
+ goto gp_in_kernel;
- force_sig(SIGSEGV, tsk);
- return;
- }
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
+ printk(KERN_INFO
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
+ tsk->comm, tsk->pid,
+ regs->ip, regs->sp, error_code);
+ print_vma_addr(" in ", regs->ip);
+ printk("\n");
+ }
+
+ force_sig(SIGSEGV, tsk);
+ return;
+
+gp_in_kernel:
if (fixup_exception(regs))
return;
@@ -775,14 +752,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
}
static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
+mem_parity_error(unsigned char reason, struct pt_regs *regs)
{
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
reason);
printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
#if defined(CONFIG_EDAC)
- if(edac_handler_set()) {
+ if (edac_handler_set()) {
edac_atomic_assert_error();
return;
}
@@ -799,7 +776,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
}
static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs * regs)
+io_check_error(unsigned char reason, struct pt_regs *regs)
{
printk("NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
@@ -829,14 +806,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
/* Runs on IST stack. This code must keep interrupts off all the time.
Nested NMIs are prevented by the CPU. */
-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
int cpu;
cpu = smp_processor_id();
- /* Only the BSP gets external NMIs from the system. */
+ /* Only the BSP gets external NMIs from the system. */
if (!cpu)
reason = get_nmi_reason();
@@ -848,18 +825,17 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
*/
- if (nmi_watchdog_tick(regs,reason))
+ if (nmi_watchdog_tick(regs, reason))
return;
- if (!do_nmi_callback(regs,cpu))
+ if (!do_nmi_callback(regs, cpu))
unknown_nmi_error(reason, regs);
return;
}
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
+ return;
/* AK: following checks seem to be broken on modern chipsets. FIXME */
-
if (reason & 0x80)
mem_parity_error(reason, regs);
if (reason & 0x40)
@@ -870,9 +846,12 @@ asmlinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
{
nmi_enter();
+
add_pda(__nmi_count, 1);
+
if (!ignore_nmis)
default_do_nmi(regs);
+
nmi_exit();
}
@@ -889,13 +868,14 @@ void restart_nmi(void)
}
/* runs on IST stack. */
-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
+asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
trace_hardirqs_fixup();
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+ == NOTIFY_STOP)
return;
- }
+
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
preempt_conditional_cli(regs);
@@ -926,8 +906,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
asmlinkage void __kprobes do_debug(struct pt_regs * regs,
unsigned long error_code)
{
- unsigned long condition;
struct task_struct *tsk = current;
+ unsigned long condition;
siginfo_t info;
trace_hardirqs_fixup();
@@ -948,21 +928,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7) {
+ if (!tsk->thread.debugreg7)
goto clear_dr7;
- }
}
tsk->thread.debugreg6 = condition;
-
/*
* Single-stepping through TF: make sure we ignore any events in
* kernel space (but re-enable TF when returning to user mode).
*/
if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if (!user_mode(regs))
+ goto clear_TF_reenable;
}
/* Ok, finally something we can handle */
@@ -975,7 +953,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
- set_debugreg(0UL, 7);
+ set_debugreg(0, 7);
preempt_conditional_cli(regs);
return;
@@ -983,6 +961,7 @@ clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
preempt_conditional_cli(regs);
+ return;
}
static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
@@ -1005,7 +984,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
asmlinkage void do_coprocessor_error(struct pt_regs *regs)
{
void __user *ip = (void __user *)(regs->ip);
- struct task_struct * task;
+ struct task_struct *task;
siginfo_t info;
unsigned short cwd, swd;
@@ -1038,30 +1017,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
switch (swd & ~cwd & 0x3f) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- /*
- * swd & 0x240 == 0x040: Stack Underflow
- * swd & 0x240 == 0x240: Stack Overflow
- * User must clear the SF bit (0x40) if set
- */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
+ case 0x000: /* No unmasked exception */
+ default: /* Multiple exceptions */
+ break;
+ case 0x001: /* Invalid Op */
+ /*
+ * swd & 0x240 == 0x040: Stack Underflow
+ * swd & 0x240 == 0x240: Stack Overflow
+ * User must clear the SF bit (0x40) if set
+ */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
}
force_sig_info(SIGFPE, &info, task);
}
@@ -1074,7 +1053,7 @@ asmlinkage void bad_intr(void)
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
{
void __user *ip = (void __user *)(regs->ip);
- struct task_struct * task;
+ struct task_struct *task;
siginfo_t info;
unsigned short mxcsr;
@@ -1102,25 +1081,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
*/
mxcsr = get_fpu_mxcsr(task);
switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
- case 0x000:
- default:
- break;
- case 0x001: /* Invalid Op */
- info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
- info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
- info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
- info.si_code = FPE_FLTRES;
- break;
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
}
force_sig_info(SIGFPE, &info, task);
}
@@ -1138,7 +1117,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
}
/*
- * 'math_state_restore()' saves the current math information in the
+ * 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
*
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
@@ -1163,7 +1142,7 @@ asmlinkage void math_state_restore(void)
local_irq_disable();
}
- clts(); /* Allow maths ops (or we recurse) */
+ clts(); /* Allow maths ops (or we recurse) */
restore_fpu_checking(&me->thread.xstate->fxsave);
task_thread_info(me)->status |= TS_USEDFPU;
me->fpu_counter++;
@@ -1172,64 +1151,61 @@ EXPORT_SYMBOL_GPL(math_state_restore);
void __init trap_init(void)
{
- set_intr_gate(0,&divide_error);
- set_intr_gate_ist(1,&debug,DEBUG_STACK);
- set_intr_gate_ist(2,&nmi,NMI_STACK);
- set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
- set_system_gate(4,&overflow); /* int4 can be called from all */
- set_intr_gate(5,&bounds);
- set_intr_gate(6,&invalid_op);
- set_intr_gate(7,&device_not_available);
- set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK);
- set_intr_gate(9,&coprocessor_segment_overrun);
- set_intr_gate(10,&invalid_TSS);
- set_intr_gate(11,&segment_not_present);
- set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
- set_intr_gate(13,&general_protection);
- set_intr_gate(14,&page_fault);
- set_intr_gate(15,&spurious_interrupt_bug);
- set_intr_gate(16,&coprocessor_error);
- set_intr_gate(17,&alignment_check);
+ set_intr_gate(0, &divide_error);
+ set_intr_gate_ist(1, &debug, DEBUG_STACK);
+ set_intr_gate_ist(2, &nmi, NMI_STACK);
+ set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
+ set_system_gate(4, &overflow); /* int4 can be called from all */
+ set_intr_gate(5, &bounds);
+ set_intr_gate(6, &invalid_op);
+ set_intr_gate(7, &device_not_available);
+ set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
+ set_intr_gate(9, &coprocessor_segment_overrun);
+ set_intr_gate(10, &invalid_TSS);
+ set_intr_gate(11, &segment_not_present);
+ set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
+ set_intr_gate(13, &general_protection);
+ set_intr_gate(14, &page_fault);
+ set_intr_gate(15, &spurious_interrupt_bug);
+ set_intr_gate(16, &coprocessor_error);
+ set_intr_gate(17, &alignment_check);
#ifdef CONFIG_X86_MCE
- set_intr_gate_ist(18,&machine_check, MCE_STACK);
+ set_intr_gate_ist(18, &machine_check, MCE_STACK);
#endif
- set_intr_gate(19,&simd_coprocessor_error);
+ set_intr_gate(19, &simd_coprocessor_error);
#ifdef CONFIG_IA32_EMULATION
set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif
-
/*
* initialize the per thread extended state:
*/
- init_thread_xstate();
+ init_thread_xstate();
/*
- * Should be a barrier for any external CPU state.
+ * Should be a barrier for any external CPU state:
*/
cpu_init();
}
-
static int __init oops_setup(char *s)
-{
+{
if (!s)
return -EINVAL;
if (!strcmp(s, "panic"))
panic_on_oops = 1;
return 0;
-}
+}
early_param("oops", oops_setup);
static int __init kstack_setup(char *s)
{
if (!s)
return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s,NULL,0);
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
return 0;
}
early_param("kstack", kstack_setup);
-
static int __init code_bytes_setup(char *s)
{
code_bytes = simple_strtoul(s, NULL, 0);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
new file mode 100644
index 00000000000..7603c055390
--- /dev/null
+++ b/arch/x86/kernel/tsc.c
@@ -0,0 +1,535 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/cpufreq.h>
+#include <linux/dmi.h>
+#include <linux/delay.h>
+#include <linux/clocksource.h>
+#include <linux/percpu.h>
+
+#include <asm/hpet.h>
+#include <asm/timer.h>
+#include <asm/vgtod.h>
+#include <asm/time.h>
+#include <asm/delay.h>
+
+unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
+unsigned int tsc_khz;
+EXPORT_SYMBOL(tsc_khz);
+
+/*
+ * TSC can be unstable due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+
+/* native_sched_clock() is called before tsc_init(), so
+ we must start with the TSC soft disabled to prevent
+ erroneous rdtsc usage on !cpu_has_tsc processors */
+static int tsc_disabled = -1;
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+u64 native_sched_clock(void)
+{
+ u64 this_offset;
+
+ /*
+ * Fall back to jiffies if there's no TSC available:
+ * ( But note that we still use it if the TSC is marked
+ * unstable. We do this because unlike Time Of Day,
+ * the scheduler clock tolerates small errors and it's
+ * very important for it to be as fast as the platform
+ * can achive it. )
+ */
+ if (unlikely(tsc_disabled)) {
+ /* No locking but a rare wrong value is not a big deal: */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+ }
+
+ /* read the Time Stamp Counter: */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return cycles_2_ns(this_offset);
+}
+
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+EXPORT_SYMBOL_GPL(check_tsc_unstable);
+
+#ifdef CONFIG_X86_TSC
+int __init notsc_setup(char *str)
+{
+ printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+ "cannot disable TSC completely.\n");
+ tsc_disabled = 1;
+ return 1;
+}
+#else
+/*
+ * disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c
+ */
+int __init notsc_setup(char *str)
+{
+ setup_clear_cpu_cap(X86_FEATURE_TSC);
+ return 1;
+}
+#endif
+
+__setup("notsc", notsc_setup);
+
+#define MAX_RETRIES 5
+#define SMI_TRESHOLD 50000
+
+/*
+ * Read TSC and the reference counters. Take care of SMI disturbance
+ */
+static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
+{
+ u64 t1, t2;
+ int i;
+
+ for (i = 0; i < MAX_RETRIES; i++) {
+ t1 = get_cycles();
+ if (hpet)
+ *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+ else
+ *pm = acpi_pm_read_early();
+ t2 = get_cycles();
+ if ((t2 - t1) < SMI_TRESHOLD)
+ return t2;
+ }
+ return ULLONG_MAX;
+}
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+ unsigned long flags;
+ u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
+ int hpet = is_hpet_enabled();
+ unsigned int tsc_khz_val = 0;
+
+ local_irq_save(flags);
+
+ tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ outb(0xb0, 0x43);
+ outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
+ outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+ tr1 = get_cycles();
+ while ((inb(0x61) & 0x20) == 0);
+ tr2 = get_cycles();
+
+ tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+
+ local_irq_restore(flags);
+
+ /*
+ * Preset the result with the raw and inaccurate PIT
+ * calibration value
+ */
+ delta = (tr2 - tr1);
+ do_div(delta, 50);
+ tsc_khz_val = delta;
+
+ /* hpet or pmtimer available ? */
+ if (!hpet && !pm1 && !pm2) {
+ printk(KERN_INFO "TSC calibrated against PIT\n");
+ goto out;
+ }
+
+ /* Check, whether the sampling was disturbed by an SMI */
+ if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
+ printk(KERN_WARNING "TSC calibration disturbed by SMI, "
+ "using PIT calibration result\n");
+ goto out;
+ }
+
+ tsc2 = (tsc2 - tsc1) * 1000000LL;
+
+ if (hpet) {
+ printk(KERN_INFO "TSC calibrated against HPET\n");
+ if (hpet2 < hpet1)
+ hpet2 += 0x100000000ULL;
+ hpet2 -= hpet1;
+ tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+ do_div(tsc1, 1000000);
+ } else {
+ printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
+ if (pm2 < pm1)
+ pm2 += (u64)ACPI_PM_OVRRUN;
+ pm2 -= pm1;
+ tsc1 = pm2 * 1000000000LL;
+ do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ }
+
+ do_div(tsc2, tsc1);
+ tsc_khz_val = tsc2;
+
+out:
+ return tsc_khz_val;
+}
+
+
+#ifdef CONFIG_X86_32
+/* Only called from the Powernow K7 cpu freq driver */
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+ unsigned long cpu_khz_old = cpu_khz;
+
+ if (cpu_has_tsc) {
+ tsc_khz = calibrate_tsc();
+ cpu_khz = tsc_khz;
+ cpu_data(0).loops_per_jiffy =
+ cpufreq_scale(cpu_data(0).loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+ return 0;
+ } else
+ return -ENODEV;
+#else
+ return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+#endif /* CONFIG_X86_32 */
+
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better precision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+
+DEFINE_PER_CPU(unsigned long, cyc2ns);
+
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+{
+ unsigned long long tsc_now, ns_now;
+ unsigned long flags, *scale;
+
+ local_irq_save(flags);
+ sched_clock_idle_sleep_event();
+
+ scale = &per_cpu(cyc2ns, cpu);
+
+ rdtscll(tsc_now);
+ ns_now = __cycles_2_ns(tsc_now);
+
+ if (cpu_khz)
+ *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+ sched_clock_idle_wakeup_event(0);
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+ * changes.
+ *
+ * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
+ * not that important because current Opteron setups do not support
+ * scaling on SMP anyroads.
+ *
+ * Should fix up last_tsc too. Currently gettimeofday in the
+ * first tick after the change will be slightly wrong.
+ */
+
+static unsigned int ref_freq;
+static unsigned long loops_per_jiffy_ref;
+static unsigned long tsc_khz_ref;
+
+static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ unsigned long *lpj, dummy;
+
+ if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
+ return 0;
+
+ lpj = &dummy;
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+#ifdef CONFIG_SMP
+ lpj = &cpu_data(freq->cpu).loops_per_jiffy;
+#else
+ lpj = &boot_cpu_data.loops_per_jiffy;
+#endif
+
+ if (!ref_freq) {
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = *lpj;
+ tsc_khz_ref = tsc_khz;
+ }
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+ tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ mark_tsc_unstable("cpufreq changes");
+ }
+
+ set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif /* CONFIG_CPU_FREQ */
+
+/* clocksource code */
+
+static struct clocksource clocksource_tsc;
+
+/*
+ * We compare the TSC to the cycle_last value in the clocksource
+ * structure to avoid a nasty time-warp. This can be observed in a
+ * very small window right after one CPU updated cycle_last under
+ * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
+ * is smaller than the cycle_last reference value due to a TSC which
+ * is slighty behind. This delta is nowhere else observable, but in
+ * that case it results in a forward time jump in the range of hours
+ * due to the unsigned delta calculation of the time keeping core
+ * code, which is necessary to support wrapping clocksources like pm
+ * timer.
+ */
+static cycle_t read_tsc(void)
+{
+ cycle_t ret = (cycle_t)get_cycles();
+
+ return ret >= clocksource_tsc.cycle_last ?
+ ret : clocksource_tsc.cycle_last;
+}
+
+#ifdef CONFIG_X86_64
+static cycle_t __vsyscall_fn vread_tsc(void)
+{
+ cycle_t ret = (cycle_t)vget_cycles();
+
+ return ret >= __vsyscall_gtod_data.clock.cycle_last ?
+ ret : __vsyscall_gtod_data.clock.cycle_last;
+}
+#endif
+
+static struct clocksource clocksource_tsc = {
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .shift = 22,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS |
+ CLOCK_SOURCE_MUST_VERIFY,
+#ifdef CONFIG_X86_64
+ .vread = vread_tsc,
+#endif
+};
+
+void mark_tsc_unstable(char *reason)
+{
+ if (!tsc_unstable) {
+ tsc_unstable = 1;
+ printk("Marking TSC unstable due to %s\n", reason);
+ /* Change only the rating, when not registered */
+ if (clocksource_tsc.mult)
+ clocksource_change_rating(&clocksource_tsc, 0);
+ else
+ clocksource_tsc.rating = 0;
+ }
+}
+
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+ d->ident);
+ tsc_unstable = 1;
+ return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+ {
+ .callback = dmi_mark_tsc_unstable,
+ .ident = "IBM Thinkpad 380XD",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+ },
+ },
+ {}
+};
+
+/*
+ * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
+ */
+#ifdef CONFIG_MGEODE_LX
+/* RTSC counts during suspend */
+#define RTSC_SUSP 0x100
+
+static void __init check_geode_tsc_reliable(void)
+{
+ unsigned long res_low, res_high;
+
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ if (res_low & RTSC_SUSP)
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+}
+#else
+static inline void check_geode_tsc_reliable(void) { }
+#endif
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+__cpuinit int unsynchronized_tsc(void)
+{
+ if (!cpu_has_tsc || tsc_unstable)
+ return 1;
+
+#ifdef CONFIG_SMP
+ if (apic_is_clustered_box())
+ return 1;
+#endif
+
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return 0;
+ /*
+ * Intel systems are normally all synchronized.
+ * Exceptions must mark TSC as unstable:
+ */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ /* assume multi socket systems are not synchronized: */
+ if (num_possible_cpus() > 1)
+ tsc_unstable = 1;
+ }
+
+ return tsc_unstable;
+}
+
+static void __init init_tsc_clocksource(void)
+{
+ clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
+ clocksource_tsc.shift);
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable()) {
+ clocksource_tsc.rating = 0;
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
+ }
+ clocksource_register(&clocksource_tsc);
+}
+
+void __init tsc_init(void)
+{
+ u64 lpj;
+ int cpu;
+
+ if (!cpu_has_tsc)
+ return;
+
+ tsc_khz = calibrate_tsc();
+ cpu_khz = tsc_khz;
+
+ if (!tsc_khz) {
+ mark_tsc_unstable("could not calculate TSC khz");
+ return;
+ }
+
+#ifdef CONFIG_X86_64
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
+ cpu_khz = calibrate_cpu();
+#endif
+
+ lpj = ((u64)tsc_khz * 1000);
+ do_div(lpj, HZ);
+ lpj_fine = lpj;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ /*
+ * Secondary CPUs do not run through tsc_init(), so set up
+ * all the scale factors for all CPUs, assuming the same
+ * speed as the bootup CPU. (cpufreq notifiers will fix this
+ * up if their speed diverges)
+ */
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(cpu_khz, cpu);
+
+ if (tsc_disabled > 0)
+ return;
+
+ /* now allow native_sched_clock() to use rdtsc */
+ tsc_disabled = 0;
+
+ use_tsc_delay();
+ /* Check and install the TSC clocksource */
+ dmi_check_system(bad_tsc_dmi_table);
+
+ if (unsynchronized_tsc())
+ mark_tsc_unstable("TSCs unsynchronized");
+
+ check_geode_tsc_reliable();
+ init_tsc_clocksource();
+}
+
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
deleted file mode 100644
index 6240922e497..00000000000
--- a/arch/x86/kernel/tsc_32.c
+++ /dev/null
@@ -1,455 +0,0 @@
-#include <linux/sched.h>
-#include <linux/clocksource.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-#include <linux/jiffies.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-#include <linux/percpu.h>
-
-#include <asm/delay.h>
-#include <asm/tsc.h>
-#include <asm/io.h>
-#include <asm/timer.h>
-
-#include "mach_timer.h"
-
-/* native_sched_clock() is called before tsc_init(), so
- we must start with the TSC soft disabled to prevent
- erroneous rdtsc usage on !cpu_has_tsc processors */
-static int tsc_disabled = -1;
-
-/*
- * On some systems the TSC frequency does not
- * change with the cpu frequency. So we need
- * an extra value to store the TSC freq
- */
-unsigned int tsc_khz;
-EXPORT_SYMBOL_GPL(tsc_khz);
-
-#ifdef CONFIG_X86_TSC
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC completely.\n");
- tsc_disabled = 1;
- return 1;
-}
-#else
-/*
- * disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c
- */
-static int __init tsc_setup(char *str)
-{
- setup_clear_cpu_cap(X86_FEATURE_TSC);
- return 1;
-}
-#endif
-
-__setup("notsc", tsc_setup);
-
-/*
- * code to mark and check if the TSC is unstable
- * due to cpufreq or due to unsynced TSCs
- */
-static int tsc_unstable;
-
-int check_tsc_unstable(void)
-{
- return tsc_unstable;
-}
-EXPORT_SYMBOL_GPL(check_tsc_unstable);
-
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better precision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
-{
- unsigned long long tsc_now, ns_now;
- unsigned long flags, *scale;
-
- local_irq_save(flags);
- sched_clock_idle_sleep_event();
-
- scale = &per_cpu(cyc2ns, cpu);
-
- rdtscll(tsc_now);
- ns_now = __cycles_2_ns(tsc_now);
-
- if (cpu_khz)
- *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-
- /*
- * Start smoothly with the new frequency:
- */
- sched_clock_idle_wakeup_event(0);
- local_irq_restore(flags);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long native_sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * Fall back to jiffies if there's no TSC available:
- * ( But note that we still use it if the TSC is marked
- * unstable. We do this because unlike Time Of Day,
- * the scheduler clock tolerates small errors and it's
- * very important for it to be as fast as the platform
- * can achive it. )
- */
- if (unlikely(tsc_disabled))
- /* No locking but a rare wrong value is not a big deal: */
- return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-
- /* read the Time Stamp Counter: */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-/* We need to define a real function for sched_clock, to override the
- weak default version */
-#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
-{
- return paravirt_sched_clock();
-}
-#else
-unsigned long long sched_clock(void)
- __attribute__((alias("native_sched_clock")));
-#endif
-
-unsigned long native_calculate_cpu_khz(void)
-{
- unsigned long long start, end;
- unsigned long count;
- u64 delta64 = (u64)ULLONG_MAX;
- int i;
- unsigned long flags;
-
- local_irq_save(flags);
-
- /* run 3 times to ensure the cache is warm and to get an accurate reading */
- for (i = 0; i < 3; i++) {
- mach_prepare_counter();
- rdtscll(start);
- mach_countup(&count);
- rdtscll(end);
-
- /*
- * Error: ECTCNEVERSET
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- if (count <= 1)
- continue;
-
- /* cpu freq too slow: */
- if ((end - start) <= CALIBRATE_TIME_MSEC)
- continue;
-
- /*
- * We want the minimum time of all runs in case one of them
- * is inaccurate due to SMI or other delay
- */
- delta64 = min(delta64, (end - start));
- }
-
- /* cpu freq too fast (or every run was bad): */
- if (delta64 > (1ULL<<32))
- goto err;
-
- delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
- do_div(delta64,CALIBRATE_TIME_MSEC);
-
- local_irq_restore(flags);
- return (unsigned long)delta64;
-err:
- local_irq_restore(flags);
- return 0;
-}
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned long cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- cpu_khz = calculate_cpu_khz();
- tsc_khz = cpu_khz;
- cpu_data(0).loops_per_jiffy =
- cpufreq_scale(cpu_data(0).loops_per_jiffy,
- cpu_khz_old, cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-#ifdef CONFIG_CPU_FREQ
-
-/*
- * if the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-static unsigned int ref_freq;
-static unsigned long loops_per_jiffy_ref;
-static unsigned long cpu_khz_ref;
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (!ref_freq) {
- if (!freq->old){
- ref_freq = freq->new;
- return 0;
- }
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
- cpu_khz_ref = cpu_khz;
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data(freq->cpu).loops_per_jiffy =
- cpufreq_scale(loops_per_jiffy_ref,
- ref_freq, freq->new);
-
- if (cpu_khz) {
-
- if (num_online_cpus() == 1)
- cpu_khz = cpufreq_scale(cpu_khz_ref,
- ref_freq, freq->new);
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- tsc_khz = cpu_khz;
- set_cyc2ns_scale(cpu_khz, freq->cpu);
- /*
- * TSC based sched_clock turns
- * to junk w/ cpufreq
- */
- mark_tsc_unstable("cpufreq changes");
- }
- }
- }
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
- return cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-}
-core_initcall(cpufreq_tsc);
-
-#endif
-
-/* clock source code */
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp issue. This can be observed in
- * a very small window right after one CPU updated cycle_last under
- * xtime lock and the other CPU reads a TSC value which is smaller
- * than the cycle_last reference value due to a TSC which is slighty
- * behind. This delta is nowhere else observable, but in that case it
- * results in a forward time jump in the range of hours due to the
- * unsigned delta calculation of the time keeping core code, which is
- * necessary to support wrapping clocksources like pm timer.
- */
-static cycle_t read_tsc(void)
-{
- cycle_t ret;
-
- rdtscll(ret);
-
- return ret >= clocksource_tsc.cycle_last ?
- ret : clocksource_tsc.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
- .name = "tsc",
- .rating = 300,
- .read = read_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .mult = 0, /* to be set */
- .shift = 22,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY,
-};
-
-void mark_tsc_unstable(char *reason)
-{
- if (!tsc_unstable) {
- tsc_unstable = 1;
- printk("Marking TSC unstable due to: %s.\n", reason);
- /* Can be called before registration */
- if (clocksource_tsc.mult)
- clocksource_change_rating(&clocksource_tsc, 0);
- else
- clocksource_tsc.rating = 0;
- }
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
- d->ident);
- tsc_unstable = 1;
- return 0;
-}
-
-/* List of systems that have known TSC problems */
-static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
- {
- .callback = dmi_mark_tsc_unstable,
- .ident = "IBM Thinkpad 380XD",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
- DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
- },
- },
- {}
-};
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
- if (!cpu_has_tsc || tsc_unstable)
- return 1;
-
- /* Anything with constant TSC should be synchronized */
- if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- /*
- * Intel systems are normally all synchronized.
- * Exceptions must mark TSC as unstable:
- */
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
- /* assume multi socket systems are not synchronized: */
- if (num_possible_cpus() > 1)
- tsc_unstable = 1;
- }
- return tsc_unstable;
-}
-
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
-#ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
-#define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
- unsigned long res_low, res_high;
-
- rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
- if (res_low & RTSC_SUSP)
- clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
-#endif
-
-
-void __init tsc_init(void)
-{
- int cpu;
- u64 lpj;
-
- if (!cpu_has_tsc || tsc_disabled > 0)
- return;
-
- cpu_khz = calculate_cpu_khz();
- tsc_khz = cpu_khz;
-
- if (!cpu_khz) {
- mark_tsc_unstable("could not calculate TSC khz");
- return;
- }
-
- lpj = ((u64)tsc_khz * 1000);
- do_div(lpj, HZ);
- lpj_fine = lpj;
-
- /* now allow native_sched_clock() to use rdtsc */
- tsc_disabled = 0;
-
- printk("Detected %lu.%03lu MHz processor.\n",
- (unsigned long)cpu_khz / 1000,
- (unsigned long)cpu_khz % 1000);
-
- /*
- * Secondary CPUs do not run through tsc_init(), so set up
- * all the scale factors for all CPUs, assuming the same
- * speed as the bootup CPU. (cpufreq notifiers will fix this
- * up if their speed diverges)
- */
- for_each_possible_cpu(cpu)
- set_cyc2ns_scale(cpu_khz, cpu);
-
- use_tsc_delay();
-
- /* Check and install the TSC clocksource */
- dmi_check_system(bad_tsc_dmi_table);
-
- unsynchronized_tsc();
- check_geode_tsc_reliable();
- clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
- clocksource_tsc.shift);
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }
- clocksource_register(&clocksource_tsc);
-}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
deleted file mode 100644
index 9898fb01edf..00000000000
--- a/arch/x86/kernel/tsc_64.c
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/clocksource.h>
-#include <linux/time.h>
-#include <linux/acpi.h>
-#include <linux/cpufreq.h>
-#include <linux/acpi_pmtmr.h>
-
-#include <asm/hpet.h>
-#include <asm/timex.h>
-#include <asm/timer.h>
-#include <asm/vgtod.h>
-
-static int notsc __initdata = 0;
-
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
-EXPORT_SYMBOL(cpu_khz);
-unsigned int tsc_khz;
-EXPORT_SYMBOL(tsc_khz);
-
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better precision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-DEFINE_PER_CPU(unsigned long, cyc2ns);
-
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
-{
- unsigned long long tsc_now, ns_now;
- unsigned long flags, *scale;
-
- local_irq_save(flags);
- sched_clock_idle_sleep_event();
-
- scale = &per_cpu(cyc2ns, cpu);
-
- rdtscll(tsc_now);
- ns_now = __cycles_2_ns(tsc_now);
-
- if (cpu_khz)
- *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
-
- sched_clock_idle_wakeup_event(0);
- local_irq_restore(flags);
-}
-
-unsigned long long native_sched_clock(void)
-{
- unsigned long a = 0;
-
- /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
- * which means it is not completely exact and may not be monotonous
- * between CPUs. But the errors should be too small to matter for
- * scheduling purposes.
- */
-
- rdtscll(a);
- return cycles_2_ns(a);
-}
-
-/* We need to define a real function for sched_clock, to override the
- weak default version */
-#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
-{
- return paravirt_sched_clock();
-}
-#else
-unsigned long long
-sched_clock(void) __attribute__((alias("native_sched_clock")));
-#endif
-
-
-static int tsc_unstable;
-
-int check_tsc_unstable(void)
-{
- return tsc_unstable;
-}
-EXPORT_SYMBOL_GPL(check_tsc_unstable);
-
-#ifdef CONFIG_CPU_FREQ
-
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
- * changes.
- *
- * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
- *
- * Should fix up last_tsc too. Currently gettimeofday in the
- * first tick after the change will be slightly wrong.
- */
-
-static unsigned int ref_freq;
-static unsigned long loops_per_jiffy_ref;
-static unsigned long tsc_khz_ref;
-
-static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
- unsigned long *lpj, dummy;
-
- if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- lpj = &dummy;
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-#ifdef CONFIG_SMP
- lpj = &cpu_data(freq->cpu).loops_per_jiffy;
-#else
- lpj = &boot_cpu_data.loops_per_jiffy;
-#endif
-
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = *lpj;
- tsc_khz_ref = tsc_khz;
- }
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- *lpj =
- cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-
- tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- mark_tsc_unstable("cpufreq changes");
- }
-
- set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
- cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- return 0;
-}
-
-core_initcall(cpufreq_tsc);
-
-#endif
-
-#define MAX_RETRIES 5
-#define SMI_TRESHOLD 50000
-
-/*
- * Read TSC and the reference counters. Take care of SMI disturbance
- */
-static unsigned long __init tsc_read_refs(unsigned long *pm,
- unsigned long *hpet)
-{
- unsigned long t1, t2;
- int i;
-
- for (i = 0; i < MAX_RETRIES; i++) {
- t1 = get_cycles();
- if (hpet)
- *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
- else
- *pm = acpi_pm_read_early();
- t2 = get_cycles();
- if ((t2 - t1) < SMI_TRESHOLD)
- return t2;
- }
- return ULONG_MAX;
-}
-
-/**
- * tsc_calibrate - calibrate the tsc on boot
- */
-void __init tsc_calibrate(void)
-{
- unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled(), cpu;
-
- local_irq_save(flags);
-
- tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
-
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
-
- outb(0xb0, 0x43);
- outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
- outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- tr1 = get_cycles();
- while ((inb(0x61) & 0x20) == 0);
- tr2 = get_cycles();
-
- tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
-
- local_irq_restore(flags);
-
- /*
- * Preset the result with the raw and inaccurate PIT
- * calibration value
- */
- tsc_khz = (tr2 - tr1) / 50;
-
- /* hpet or pmtimer available ? */
- if (!hpet && !pm1 && !pm2) {
- printk(KERN_INFO "TSC calibrated against PIT\n");
- goto out;
- }
-
- /* Check, whether the sampling was disturbed by an SMI */
- if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
- printk(KERN_WARNING "TSC calibration disturbed by SMI, "
- "using PIT calibration result\n");
- goto out;
- }
-
- tsc2 = (tsc2 - tsc1) * 1000000L;
-
- if (hpet) {
- printk(KERN_INFO "TSC calibrated against HPET\n");
- if (hpet2 < hpet1)
- hpet2 += 0x100000000UL;
- hpet2 -= hpet1;
- tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
- } else {
- printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
- if (pm2 < pm1)
- pm2 += ACPI_PM_OVRRUN;
- pm2 -= pm1;
- tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
- }
-
- tsc_khz = tsc2 / tsc1;
-
-out:
- for_each_possible_cpu(cpu)
- set_cyc2ns_scale(tsc_khz, cpu);
-}
-
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
- if (tsc_unstable)
- return 1;
-
-#ifdef CONFIG_SMP
- if (apic_is_clustered_box())
- return 1;
-#endif
-
- if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- /* Assume multi socket systems are not synchronized */
- return num_present_cpus() > 1;
-}
-
-int __init notsc_setup(char *s)
-{
- notsc = 1;
- return 1;
-}
-
-__setup("notsc", notsc_setup);
-
-static struct clocksource clocksource_tsc;
-
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp. This can be observed in a
- * very small window right after one CPU updated cycle_last under
- * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
- * is smaller than the cycle_last reference value due to a TSC which
- * is slighty behind. This delta is nowhere else observable, but in
- * that case it results in a forward time jump in the range of hours
- * due to the unsigned delta calculation of the time keeping core
- * code, which is necessary to support wrapping clocksources like pm
- * timer.
- */
-static cycle_t read_tsc(void)
-{
- cycle_t ret = (cycle_t)get_cycles();
-
- return ret >= clocksource_tsc.cycle_last ?
- ret : clocksource_tsc.cycle_last;
-}
-
-static cycle_t __vsyscall_fn vread_tsc(void)
-{
- cycle_t ret = (cycle_t)vget_cycles();
-
- return ret >= __vsyscall_gtod_data.clock.cycle_last ?
- ret : __vsyscall_gtod_data.clock.cycle_last;
-}
-
-static struct clocksource clocksource_tsc = {
- .name = "tsc",
- .rating = 300,
- .read = read_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .shift = 22,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY,
- .vread = vread_tsc,
-};
-
-void mark_tsc_unstable(char *reason)
-{
- if (!tsc_unstable) {
- tsc_unstable = 1;
- printk("Marking TSC unstable due to %s\n", reason);
- /* Change only the rating, when not registered */
- if (clocksource_tsc.mult)
- clocksource_change_rating(&clocksource_tsc, 0);
- else
- clocksource_tsc.rating = 0;
- }
-}
-EXPORT_SYMBOL_GPL(mark_tsc_unstable);
-
-void __init init_tsc_clocksource(void)
-{
- if (!notsc) {
- clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
- clocksource_tsc.shift);
- if (check_tsc_unstable())
- clocksource_tsc.rating = 0;
-
- clocksource_register(&clocksource_tsc);
- }
-}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
new file mode 100644
index 00000000000..e94bdb6add1
--- /dev/null
+++ b/arch/x86/kernel/visws_quirks.c
@@ -0,0 +1,709 @@
+/*
+ * SGI Visual Workstation support and quirks, unmaintained.
+ *
+ * Split out from setup.c by davej@suse.de
+ *
+ * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
+ *
+ * SGI Visual Workstation interrupt controller
+ *
+ * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
+ * which serves as the main interrupt controller in the system. Non-legacy
+ * hardware in the system uses this controller directly. Legacy devices
+ * are connected to the PIIX4 which in turn has its 8259(s) connected to
+ * a of the Cobalt APIC entry.
+ *
+ * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
+ *
+ * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/visws/cobalt.h>
+#include <asm/visws/piix4.h>
+#include <asm/arch_hooks.h>
+#include <asm/fixmap.h>
+#include <asm/reboot.h>
+#include <asm/setup.h>
+#include <asm/e820.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+
+#include <mach_ipi.h>
+
+#include "mach_apic.h"
+
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/irq_vectors.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
+#include <asm/visws/piix4.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+extern int no_broadcast;
+
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/arch_hooks.h>
+#include <asm/visws/cobalt.h>
+#include <asm/visws/lithium.h>
+
+char visws_board_type = -1;
+char visws_board_rev = -1;
+
+int is_visws_box(void)
+{
+ return visws_board_type >= 0;
+}
+
+static int __init visws_time_init_quirk(void)
+{
+ printk(KERN_INFO "Starting Cobalt Timer system clock\n");
+
+ /* Set the countdown value */
+ co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
+
+ /* Start the timer */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
+
+ /* Enable (unmask) the timer interrupt */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
+
+ /*
+ * Zero return means the generic timer setup code will set up
+ * the standard vector:
+ */
+ return 0;
+}
+
+static int __init visws_pre_intr_init_quirk(void)
+{
+ init_VISWS_APIC_irqs();
+
+ /*
+ * We dont want ISA irqs to be set up by the generic code:
+ */
+ return 1;
+}
+
+/* Quirk for machine specific memory setup. */
+
+#define MB (1024 * 1024)
+
+unsigned long sgivwfb_mem_phys;
+unsigned long sgivwfb_mem_size;
+EXPORT_SYMBOL(sgivwfb_mem_phys);
+EXPORT_SYMBOL(sgivwfb_mem_size);
+
+long long mem_size __initdata = 0;
+
+static char * __init visws_memory_setup_quirk(void)
+{
+ long long gfx_mem_size = 8 * MB;
+
+ mem_size = boot_params.alt_mem_k;
+
+ if (!mem_size) {
+ printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
+ mem_size = 128 * MB;
+ }
+
+ /*
+ * this hardcodes the graphics memory to 8 MB
+ * it really should be sized dynamically (or at least
+ * set as a boot param)
+ */
+ if (!sgivwfb_mem_size) {
+ printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
+ sgivwfb_mem_size = 8 * MB;
+ }
+
+ /*
+ * Trim to nearest MB
+ */
+ sgivwfb_mem_size &= ~((1 << 20) - 1);
+ sgivwfb_mem_phys = mem_size - gfx_mem_size;
+
+ e820_add_region(0, LOWMEMSIZE(), E820_RAM);
+ e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+ e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+
+ return "PROM";
+}
+
+static void visws_machine_emergency_restart(void)
+{
+ /*
+ * Visual Workstations restart after this
+ * register is poked on the PIIX4
+ */
+ outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
+}
+
+static void visws_machine_power_off(void)
+{
+ unsigned short pm_status;
+/* extern unsigned int pci_bus0; */
+
+ while ((pm_status = inw(PMSTS_PORT)) & 0x100)
+ outw(pm_status, PMSTS_PORT);
+
+ outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
+
+ mdelay(10);
+
+#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
+ (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
+
+/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
+ outl(PIIX_SPECIAL_STOP, 0xCFC);
+}
+
+static int __init visws_get_smp_config_quirk(unsigned int early)
+{
+ /*
+ * Prevent MP-table parsing by the generic code:
+ */
+ return 1;
+}
+
+extern unsigned int __cpuinitdata maxcpus;
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesn't have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver, logical_apicid;
+ physid_mask_t apic_cpus;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ logical_apicid = m->mpc_apicid;
+ printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+ m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
+ boot_cpu_physical_apicid = m->mpc_apicid;
+
+ ver = m->mpc_apicver;
+ if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
+ printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ return;
+ }
+
+ apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+}
+
+int __init visws_find_smp_config_quirk(unsigned int reserve)
+{
+ struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
+ unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
+
+ if (ncpus > CO_CPU_MAX) {
+ printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
+ ncpus, mp);
+
+ ncpus = CO_CPU_MAX;
+ }
+
+ if (ncpus > maxcpus)
+ ncpus = maxcpus;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ smp_found_config = 1;
+#endif
+ while (ncpus--)
+ MP_processor_info(mp++);
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ return 1;
+}
+
+extern int visws_trap_init_quirk(void);
+
+void __init visws_early_detect(void)
+{
+ int raw;
+
+ visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
+ >> PIIX_GPI_BD_SHIFT;
+
+ if (visws_board_type < 0)
+ return;
+
+ /*
+ * Install special quirks for timer, interrupt and memory setup:
+ */
+ arch_time_init_quirk = visws_time_init_quirk;
+ arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
+ arch_memory_setup_quirk = visws_memory_setup_quirk;
+
+ /*
+ * Fall back to generic behavior for traps:
+ */
+ arch_intr_init_quirk = NULL;
+ arch_trap_init_quirk = visws_trap_init_quirk;
+
+ /*
+ * Install reboot quirks:
+ */
+ pm_power_off = visws_machine_power_off;
+ machine_ops.emergency_restart = visws_machine_emergency_restart;
+
+ /*
+ * Do not use broadcast IPIs:
+ */
+ no_broadcast = 0;
+
+ /*
+ * Override generic MP-table parsing:
+ */
+ mach_get_smp_config_quirk = visws_get_smp_config_quirk;
+ mach_find_smp_config_quirk = visws_find_smp_config_quirk;
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Turn off IO-APIC detection and initialization:
+ */
+ skip_ioapic_setup = 1;
+#endif
+
+ /*
+ * Get Board rev.
+ * First, we have to initialize the 307 part to allow us access
+ * to the GPIO registers. Let's map them at 0x0fc0 which is right
+ * after the PIIX4 PM section.
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable GPIO registers. */
+
+ /*
+ * Now, we have to map the power management section to write
+ * a bit which enables access to the GPIO registers.
+ * What lunatic came up with this shit?
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable PM registers. */
+
+ /*
+ * Now, write the PM register which enables the GPIO registers.
+ */
+ outb_p(SIO_PM_FER2, SIO_PM_INDEX);
+ outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
+
+ /*
+ * Now, initialize the GPIO registers.
+ * We want them all to be inputs which is the
+ * power on default, so let's leave them alone.
+ * So, let's just read the board rev!
+ */
+ raw = inb_p(SIO_GP_DATA1);
+ raw &= 0x7f; /* 7 bits of valid board revision ID. */
+
+ if (visws_board_type == VISWS_320) {
+ if (raw < 0x6) {
+ visws_board_rev = 4;
+ } else if (raw < 0xc) {
+ visws_board_rev = 5;
+ } else {
+ visws_board_rev = 6;
+ }
+ } else if (visws_board_type == VISWS_540) {
+ visws_board_rev = 2;
+ } else {
+ visws_board_rev = raw;
+ }
+
+ printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
+ (visws_board_type == VISWS_320 ? "320" :
+ (visws_board_type == VISWS_540 ? "540" :
+ "unknown")), visws_board_rev);
+}
+
+#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
+#define BCD (LI_INTB | LI_INTC | LI_INTD)
+#define ALLDEVS (A01234 | BCD)
+
+static __init void lithium_init(void)
+{
+ set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
+ set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
+
+ if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+ (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+ printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
+/* panic("This machine is not SGI Visual Workstation 320/540"); */
+ }
+
+ if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
+ (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
+ printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
+/* panic("This machine is not SGI Visual Workstation 320/540"); */
+ }
+
+ li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
+ li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
+}
+
+static __init void cobalt_init(void)
+{
+ /*
+ * On normal SMP PC this is used only with SMP, but we have to
+ * use it and set it up here to start the Cobalt clock
+ */
+ set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
+ setup_local_APIC();
+ printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+ (unsigned int)apic_read(APIC_LVR),
+ (unsigned int)apic_read(APIC_ID));
+
+ set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
+ set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
+ printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
+ co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
+
+ /* Enable Cobalt APIC being careful to NOT change the ID! */
+ co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
+
+ printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
+ co_apic_read(CO_APIC_ID));
+}
+
+int __init visws_trap_init_quirk(void)
+{
+ lithium_init();
+ cobalt_init();
+
+ return 1;
+}
+
+/*
+ * IRQ controller / APIC support:
+ */
+
+static DEFINE_SPINLOCK(cobalt_lock);
+
+/*
+ * Set the given Cobalt APIC Redirection Table entry to point
+ * to the given IDT vector/index.
+ */
+static inline void co_apic_set(int entry, int irq)
+{
+ co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
+ co_apic_write(CO_APIC_HI(entry), 0);
+}
+
+/*
+ * Cobalt (IO)-APIC functions to handle PCI devices.
+ */
+static inline int co_apic_ide0_hack(void)
+{
+ extern char visws_board_type;
+ extern char visws_board_rev;
+
+ if (visws_board_type == VISWS_320 && visws_board_rev == 5)
+ return 5;
+ return CO_APIC_IDE0;
+}
+
+static int is_co_apic(unsigned int irq)
+{
+ if (IS_CO_APIC(irq))
+ return CO_APIC(irq);
+
+ switch (irq) {
+ case 0: return CO_APIC_CPU;
+ case CO_IRQ_IDE0: return co_apic_ide0_hack();
+ case CO_IRQ_IDE1: return CO_APIC_IDE1;
+ default: return -1;
+ }
+}
+
+
+/*
+ * This is the SGI Cobalt (IO-)APIC:
+ */
+
+static void enable_cobalt_irq(unsigned int irq)
+{
+ co_apic_set(is_co_apic(irq), irq);
+}
+
+static void disable_cobalt_irq(unsigned int irq)
+{
+ int entry = is_co_apic(irq);
+
+ co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
+ co_apic_read(CO_APIC_LO(entry));
+}
+
+/*
+ * "irq" really just serves to identify the device. Here is where we
+ * map this to the Cobalt APIC entry where it's physically wired.
+ * This is called via request_irq -> setup_irq -> irq_desc->startup()
+ */
+static unsigned int startup_cobalt_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cobalt_lock, flags);
+ if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
+ irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
+ enable_cobalt_irq(irq);
+ spin_unlock_irqrestore(&cobalt_lock, flags);
+ return 0;
+}
+
+static void ack_cobalt_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cobalt_lock, flags);
+ disable_cobalt_irq(irq);
+ apic_write(APIC_EOI, APIC_EIO_ACK);
+ spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static void end_cobalt_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cobalt_lock, flags);
+ if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+ enable_cobalt_irq(irq);
+ spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct irq_chip cobalt_irq_type = {
+ .typename = "Cobalt-APIC",
+ .startup = startup_cobalt_irq,
+ .shutdown = disable_cobalt_irq,
+ .enable = enable_cobalt_irq,
+ .disable = disable_cobalt_irq,
+ .ack = ack_cobalt_irq,
+ .end = end_cobalt_irq,
+};
+
+
+/*
+ * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
+ * -- not the manner expected by the code in i8259.c.
+ *
+ * there is a 'master' physical interrupt source that gets sent to
+ * the CPU. But in the chipset there are various 'virtual' interrupts
+ * waiting to be handled. We represent this to Linux through a 'master'
+ * interrupt controller type, and through a special virtual interrupt-
+ * controller. Device drivers only see the virtual interrupt sources.
+ */
+static unsigned int startup_piix4_master_irq(unsigned int irq)
+{
+ init_8259A(0);
+
+ return startup_cobalt_irq(irq);
+}
+
+static void end_piix4_master_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cobalt_lock, flags);
+ enable_cobalt_irq(irq);
+ spin_unlock_irqrestore(&cobalt_lock, flags);
+}
+
+static struct irq_chip piix4_master_irq_type = {
+ .typename = "PIIX4-master",
+ .startup = startup_piix4_master_irq,
+ .ack = ack_cobalt_irq,
+ .end = end_piix4_master_irq,
+};
+
+
+static struct irq_chip piix4_virtual_irq_type = {
+ .typename = "PIIX4-virtual",
+ .shutdown = disable_8259A_irq,
+ .enable = enable_8259A_irq,
+ .disable = disable_8259A_irq,
+};
+
+
+/*
+ * PIIX4-8259 master/virtual functions to handle interrupt requests
+ * from legacy devices: floppy, parallel, serial, rtc.
+ *
+ * None of these get Cobalt APIC entries, neither do they have IDT
+ * entries. These interrupts are purely virtual and distributed from
+ * the 'master' interrupt source: CO_IRQ_8259.
+ *
+ * When the 8259 interrupts its handler figures out which of these
+ * devices is interrupting and dispatches to its handler.
+ *
+ * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
+ * enable_irq gets the right irq. This 'master' irq is never directly
+ * manipulated by any driver.
+ */
+static irqreturn_t piix4_master_intr(int irq, void *dev_id)
+{
+ int realirq;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ /* Find out what's interrupting in the PIIX4 master 8259 */
+ outb(0x0c, 0x20); /* OCW3 Poll command */
+ realirq = inb(0x20);
+
+ /*
+ * Bit 7 == 0 means invalid/spurious
+ */
+ if (unlikely(!(realirq & 0x80)))
+ goto out_unlock;
+
+ realirq &= 7;
+
+ if (unlikely(realirq == 2)) {
+ outb(0x0c, 0xa0);
+ realirq = inb(0xa0);
+
+ if (unlikely(!(realirq & 0x80)))
+ goto out_unlock;
+
+ realirq = (realirq & 7) + 8;
+ }
+
+ /* mask and ack interrupt */
+ cached_irq_mask |= 1 << realirq;
+ if (unlikely(realirq > 7)) {
+ inb(0xa1);
+ outb(cached_slave_mask, 0xa1);
+ outb(0x60 + (realirq & 7), 0xa0);
+ outb(0x60 + 2, 0x20);
+ } else {
+ inb(0x21);
+ outb(cached_master_mask, 0x21);
+ outb(0x60 + realirq, 0x20);
+ }
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ desc = irq_desc + realirq;
+
+ /*
+ * handle this 'virtual interrupt' as a Cobalt one now.
+ */
+ kstat_cpu(smp_processor_id()).irqs[realirq]++;
+
+ if (likely(desc->action != NULL))
+ handle_IRQ_event(realirq, desc->action);
+
+ if (!(desc->status & IRQ_DISABLED))
+ enable_8259A_irq(realirq);
+
+ return IRQ_HANDLED;
+
+out_unlock:
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ return IRQ_NONE;
+}
+
+static struct irqaction master_action = {
+ .handler = piix4_master_intr,
+ .name = "PIIX4-8259",
+};
+
+static struct irqaction cascade_action = {
+ .handler = no_action,
+ .name = "cascade",
+};
+
+
+void init_VISWS_APIC_irqs(void)
+{
+ int i;
+
+ for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 1;
+
+ if (i == 0) {
+ irq_desc[i].chip = &cobalt_irq_type;
+ }
+ else if (i == CO_IRQ_IDE0) {
+ irq_desc[i].chip = &cobalt_irq_type;
+ }
+ else if (i == CO_IRQ_IDE1) {
+ irq_desc[i].chip = &cobalt_irq_type;
+ }
+ else if (i == CO_IRQ_8259) {
+ irq_desc[i].chip = &piix4_master_irq_type;
+ }
+ else if (i < CO_IRQ_APIC0) {
+ irq_desc[i].chip = &piix4_virtual_irq_type;
+ }
+ else if (IS_CO_APIC(i)) {
+ irq_desc[i].chip = &cobalt_irq_type;
+ }
+ }
+
+ setup_irq(CO_IRQ_8259, &master_action);
+ setup_irq(2, &cascade_action);
+}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 946bf13b44a..b15346092b7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void)
pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
#endif
pv_time_ops.sched_clock = vmi_sched_clock;
- pv_time_ops.get_cpu_khz = vmi_cpu_khz;
+ pv_time_ops.get_tsc_khz = vmi_tsc_khz;
/* We have true wallclock functions; disable CMOS clock sync */
no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index ba7d19e102b..6953859fe28 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void)
return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
}
-/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
-unsigned long vmi_cpu_khz(void)
+/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
+unsigned long vmi_tsc_khz(void)
{
unsigned long long khz;
khz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2674f579627..cdb2363697d 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -49,16 +49,14 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x9090
+ NOTES :text :note
+
. = ALIGN(16); /* Exception table */
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
- }
-
- NOTES :text :note
-
- BUG_TABLE :text
+ } :text = 0x9090
RODATA
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fd246e22fe6..63e5c1a22e8 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,7 +19,7 @@ PHDRS {
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
+ note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
{
@@ -40,16 +40,14 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x9090
+ NOTES :text :note
+
. = ALIGN(16); /* Exception table */
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
- }
-
- NOTES :text :note
-
- BUG_TABLE :text
+ } :text = 0x9090
RODATA
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd84c3e..0b8b6690a86 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
#include <asm/topology.h>
#include <asm/vgtod.h>
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"
/*
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
return NOTIFY_DONE;
}
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void)
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2);
#endif
- on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+ on_each_cpu(cpu_vsyscall_init, NULL, 1);
hotcpu_notifier(cpu_vsyscall_notifier, 0);
return 0;
}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a82689..b545f371b5f 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
All C exports should go in the respective C files. */
#include <linux/module.h>
-#include <net/checksum.h>
#include <linux/smp.h>
+#include <net/checksum.h>
+
#include <asm/processor.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
EXPORT_SYMBOL(kernel_thread);