summaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig10
-rw-r--r--arch/i386/Kconfig.debug3
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/acpi/Makefile4
-rw-r--r--arch/i386/kernel/acpi/boot.c3
-rw-r--r--arch/i386/kernel/acpi/cstate.c103
-rw-r--r--arch/i386/kernel/acpi/wakeup.S5
-rw-r--r--arch/i386/kernel/apic.c4
-rw-r--r--arch/i386/kernel/apm.c7
-rw-r--r--arch/i386/kernel/cpu/common.c16
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c7
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c12
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c43
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h32
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c2
-rw-r--r--arch/i386/kernel/cpu/cyrix.c6
-rw-r--r--arch/i386/kernel/cpu/intel.c9
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c29
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/cpu/transmeta.c6
-rw-r--r--arch/i386/kernel/crash.c2
-rw-r--r--arch/i386/kernel/doublefault.c2
-rw-r--r--arch/i386/kernel/efi.c110
-rw-r--r--arch/i386/kernel/entry.S9
-rw-r--r--arch/i386/kernel/head.S48
-rw-r--r--arch/i386/kernel/i387.c11
-rw-r--r--arch/i386/kernel/i8237.c67
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/ldt.c7
-rw-r--r--arch/i386/kernel/machine_kexec.c32
-rw-r--r--arch/i386/kernel/microcode.c7
-rw-r--r--arch/i386/kernel/mpparse.c27
-rw-r--r--arch/i386/kernel/msr.c31
-rw-r--r--arch/i386/kernel/nmi.c9
-rw-r--r--arch/i386/kernel/numaq.c9
-rw-r--r--arch/i386/kernel/process.c65
-rw-r--r--arch/i386/kernel/ptrace.c63
-rw-r--r--arch/i386/kernel/reboot.c28
-rw-r--r--arch/i386/kernel/semaphore.c162
-rw-r--r--arch/i386/kernel/setup.c36
-rw-r--r--arch/i386/kernel/signal.c13
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/kernel/smpboot.c7
-rw-r--r--arch/i386/kernel/syscall_table.S5
-rw-r--r--arch/i386/kernel/time.c10
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c19
-rw-r--r--arch/i386/kernel/timers/timer_pit.c27
-rw-r--r--arch/i386/kernel/timers/timer_pm.c9
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c14
-rw-r--r--arch/i386/kernel/traps.c21
-rw-r--r--arch/i386/kernel/vm86.c10
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S3
-rw-r--r--arch/i386/mach-es7000/es7000.h5
-rw-r--r--arch/i386/mach-es7000/es7000plat.c45
-rw-r--r--arch/i386/mach-generic/bigsmp.c5
-rw-r--r--arch/i386/mach-generic/probe.c20
-rw-r--r--arch/i386/mach-visws/reboot.c15
-rw-r--r--arch/i386/mach-visws/setup.c2
-rw-r--r--arch/i386/mach-voyager/voyager_basic.c31
-rw-r--r--arch/i386/mach-voyager/voyager_smp.c11
-rw-r--r--arch/i386/math-emu/get_address.c13
-rw-r--r--arch/i386/mm/discontig.c11
-rw-r--r--arch/i386/mm/fault.c37
-rw-r--r--arch/i386/mm/hugetlbpage.c27
-rw-r--r--arch/i386/mm/init.c7
-rw-r--r--arch/i386/mm/pageattr.c7
-rw-r--r--arch/i386/mm/pgtable.c10
-rw-r--r--arch/i386/pci/acpi.c1
-rw-r--r--arch/i386/pci/common.c7
-rw-r--r--arch/i386/pci/i386.c49
-rw-r--r--arch/i386/pci/irq.c24
-rw-r--r--arch/i386/pci/pci.h1
-rw-r--r--arch/i386/pci/visws.c4
-rw-r--r--arch/i386/power/cpu.c43
74 files changed, 868 insertions, 684 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 6c02336fe2e..3b3b017e1c1 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config MMU
bool
default y
@@ -454,8 +458,9 @@ config HPET_TIMER
Choose N to continue using the legacy 8254 timer.
config HPET_EMULATE_RTC
- bool "Provide RTC interrupt"
+ bool
depends on HPET_TIMER && RTC=y
+ default y
config SMP
bool "Symmetric multi-processing support"
@@ -753,6 +758,7 @@ config NUMA
depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT)
+ select SPARSEMEM_STATIC
# Need comments to help the hapless user trying to turn on NUMA support
comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
@@ -1285,6 +1291,8 @@ source "fs/Kconfig.binfmt"
endmenu
+source "net/Kconfig"
+
source "drivers/Kconfig"
source "fs/Kconfig"
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index bfb2064f710..5228c40a6fb 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -18,6 +18,9 @@ config EARLY_PRINTK
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit.
config KPROBES
bool "Kprobes"
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 4cc83b322b3..64682a0edac 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o
+ doublefault.o quirks.o i8237.o
obj-y += cpu/
obj-y += timers/
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index ee75cb286cf..5e291a20c03 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -2,3 +2,7 @@ obj-$(CONFIG_ACPI_BOOT) := boot.o
obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y += cstate.o
+endif
+
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index b7808a89d94..34ee500c26e 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -833,6 +833,9 @@ acpi_process_madt(void)
if (!error) {
acpi_lapic = 1;
+#ifdef CONFIG_X86_GENERICARCH
+ generic_bigsmp_probe();
+#endif
/*
* Parse MADT IO-APIC entries
*/
diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
new file mode 100644
index 00000000000..4c3036ba65d
--- /dev/null
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -0,0 +1,103 @@
+/*
+ * arch/i386/kernel/acpi/cstate.c
+ *
+ * Copyright (C) 2005 Intel Corporation
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * - Added _PDC for SMP C-states on Intel CPUs
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+
+#include <acpi/processor.h>
+#include <asm/acpi.h>
+
+static void acpi_processor_power_init_intel_pdc(struct acpi_processor_power
+ *pow)
+{
+ struct acpi_object_list *obj_list;
+ union acpi_object *obj;
+ u32 *buf;
+
+ /* allocate and initialize pdc. It will be used later. */
+ obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
+ if (!obj_list) {
+ printk(KERN_ERR "Memory allocation error\n");
+ return;
+ }
+
+ obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+ if (!obj) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj_list);
+ return;
+ }
+
+ buf = kmalloc(12, GFP_KERNEL);
+ if (!buf) {
+ printk(KERN_ERR "Memory allocation error\n");
+ kfree(obj);
+ kfree(obj_list);
+ return;
+ }
+
+ buf[0] = ACPI_PDC_REVISION_ID;
+ buf[1] = 1;
+ buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = 12;
+ obj->buffer.pointer = (u8 *) buf;
+ obj_list->count = 1;
+ obj_list->pointer = obj;
+ pow->pdc = obj_list;
+
+ return;
+}
+
+/* Initialize _PDC data based on the CPU vendor */
+void acpi_processor_power_init_pdc(struct acpi_processor_power *pow,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ pow->pdc = NULL;
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ acpi_processor_power_init_intel_pdc(pow);
+
+ return;
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_pdc);
+
+/*
+ * Initialize bm_flags based on the CPU cache properties
+ * On SMP it depends on cache configuration
+ * - When cache is not shared among all CPUs, we flush cache
+ * before entering C3.
+ * - When cache is shared among all CPUs, we use bm_check
+ * mechanism as in UP case
+ *
+ * This routine is called only after all the CPUs are online
+ */
+void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
+ unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+
+ flags->bm_check = 0;
+ if (num_online_cpus() == 1)
+ flags->bm_check = 1;
+ else if (c->x86_vendor == X86_VENDOR_INTEL) {
+ /*
+ * Today all CPUs that support C3 share cache.
+ * TBD: This needs to look at cache shared map, once
+ * multi-core detection patch makes to the base.
+ */
+ flags->bm_check = 1;
+ }
+}
+
+EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 39d32484f6f..44d886c745e 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -74,8 +74,9 @@ wakeup_code:
movw %ax,%fs
movw $0x0e00 + 'i', %fs:(0x12)
- # need a gdt
- lgdt real_save_gdt - wakeup_code
+ # need a gdt -- use lgdtl to force 32-bit operands, in case
+ # the GDT is located past 16 megabytes.
+ lgdtl real_save_gdt - wakeup_code
movl real_save_cr0 - wakeup_code, %eax
movl %eax, %cr0
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index bd1dbf3bd22..a22a866de8f 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -726,15 +726,11 @@ __setup("apic=", apic_set_verbosity);
static int __init detect_init_APIC (void)
{
u32 h, l, features;
- extern void get_cpu_vendor(struct cpuinfo_x86*);
/* Disabled by kernel option? */
if (enable_local_apic < 0)
return -1;
- /* Workaround for us being called before identify_cpu(). */
- get_cpu_vendor(&boot_cpu_data);
-
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 064211d5f41..d7811c4e8b5 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -911,14 +911,7 @@ static void apm_power_off(void)
0xcd, 0x15 /* int $0x15 */
};
- /*
- * This may be called on an SMP machine.
- */
-#ifdef CONFIG_SMP
/* Some bioses don't like being called from CPU != 0 */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
-#endif
if (apm_info.realmode_power_off)
{
(void)apm_save_cpus();
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 4553ffd94b1..46ce9b248f5 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -613,8 +613,8 @@ void __devinit cpu_init(void)
memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
GDT_ENTRY_TLS_ENTRIES * 8);
- __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu]));
- __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ load_gdt(&cpu_gdt_descr[cpu]);
+ load_idt(&idt_descr);
/*
* Delete NT
@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
/* Clear all 6 debug registers: */
-
-#define CD(register) set_debugreg(0, register)
-
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+ set_debugreg(0, 0);
+ set_debugreg(0, 1);
+ set_debugreg(0, 2);
+ set_debugreg(0, 3);
+ set_debugreg(0, 6);
+ set_debugreg(0, 7);
/*
* Force FPU initialization:
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 963e17aa205..60a9e54dd20 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -442,6 +442,13 @@ acpi_cpufreq_cpu_init (
(u32) data->acpi_data.states[i].transition_latency);
cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+ /*
+ * the first call to ->target() should result in us actually
+ * writing something to the appropriate registers.
+ */
+ data->resume = 1;
+
return (result);
err_freqfree:
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 04e3563da4f..bf02b5026e6 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -64,8 +64,6 @@ static int dont_scale_voltage;
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
-#define __hlt() __asm__ __volatile__("hlt": : :"memory")
-
/* Clock ratios multiplied by 10 */
static int clock_ratio[32];
static int eblcr_table[32];
@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
outb(0xFE,0x21); /* TMR0 only */
outb(0xFF,0x80); /* delay */
- local_irq_enable();
-
- __hlt();
+ safe_halt();
wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
- __hlt();
+ halt();
local_irq_disable();
@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
bcr2.bits.CLOCKMUL = clock_ratio_index;
local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- local_irq_enable();
-
- __hlt();
+ safe_halt();
/* Disable software clock multiplier */
rdmsrl (MSR_VIA_BCR2, bcr2.val);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 10cc096c0ad..ab6e0611303 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,5 +1,5 @@
/*
- * (c) 2003, 2004 Advanced Micro Devices, Inc.
+ * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -44,7 +44,7 @@
#define PFX "powernow-k8: "
#define BFX PFX "BIOS error: "
-#define VERSION "version 1.40.2"
+#define VERSION "version 1.50.3"
#include "powernow-k8.h"
/* serialize freq changes */
@@ -110,14 +110,13 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 lo, hi;
u32 i = 0;
- lo = MSR_S_LO_CHANGE_PENDING;
- while (lo & MSR_S_LO_CHANGE_PENDING) {
+ do {
if (i++ > 0x1000000) {
printk(KERN_ERR PFX "detected change pending stuck\n");
return 1;
}
rdmsr(MSR_FIDVID_STATUS, lo, hi);
- }
+ } while (lo & MSR_S_LO_CHANGE_PENDING);
data->currvid = hi & MSR_S_HI_CURRENT_VID;
data->currfid = lo & MSR_S_LO_CURRENT_FID;
@@ -232,7 +231,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
/*
* Reduce the vid by the max of step or reqvid.
* Decreasing vid codes represent increasing voltages:
- * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off.
+ * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
*/
static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
{
@@ -467,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu)
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XFAM) != CPUID_XFAM_K8) ||
- ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) {
+ ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) {
printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
goto out;
}
@@ -696,6 +695,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
@@ -735,8 +735,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
}
for (i = 0; i < data->acpi_data.state_count; i++) {
- u32 fid = data->acpi_data.states[i].control & FID_MASK;
- u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ u32 fid;
+ u32 vid;
+
+ if (data->exttype) {
+ fid = data->acpi_data.states[i].status & FID_MASK;
+ vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK;
+ } else {
+ fid = data->acpi_data.states[i].control & FID_MASK;
+ vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ }
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -753,7 +761,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
}
/* verify voltage is OK - BIOSs are using "off" to indicate invalid */
- if (vid == 0x1f) {
+ if (vid == VID_OFF) {
dprintk("invalid vid %u, ignoring\n", vid);
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
@@ -930,15 +938,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
down(&fidvid_sem);
- for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
- /* make sure the sibling is initialized */
- if (!powernow_data[i]) {
- ret = 0;
- up(&fidvid_sem);
- goto err_out;
- }
- }
-
powernow_k8_acpi_pst_values(data, newstate);
if (transition_frequency(data, newstate)) {
@@ -978,7 +977,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
cpumask_t oldmask = CPU_MASK_ALL;
- int rc;
+ int rc, i;
if (!check_supported_cpu(pol->cpu))
return -ENODEV;
@@ -1064,7 +1063,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
- powernow_data[pol->cpu] = data;
+ for_each_cpu_mask(i, cpu_core_map[pol->cpu]) {
+ powernow_data[i] = data;
+ }
return 0;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 9ed5bf221cb..b1e85bb3639 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -1,5 +1,5 @@
/*
- * (c) 2003, 2004 Advanced Micro Devices, Inc.
+ * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -19,6 +19,7 @@ struct powernow_k8_data {
u32 vidmvs; /* usable value calculated from mvs */
u32 vstable; /* voltage stabilization time, units 20 us */
u32 plllock; /* pll lock time, units 1 us */
+ u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid */
u32 currvid, currfid;
@@ -41,7 +42,7 @@ struct powernow_k8_data {
#define CPUID_XFAM 0x0ff00000 /* extended family */
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
-#define CPUID_XMOD_REV_E 0x00020000
+#define CPUID_XMOD_REV_F 0x00040000
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
@@ -57,25 +58,26 @@ struct powernow_k8_data {
/* Field definitions within the FID VID Low Control MSR : */
#define MSR_C_LO_INIT_FID_VID 0x00010000
-#define MSR_C_LO_NEW_VID 0x00001f00
-#define MSR_C_LO_NEW_FID 0x0000002f
+#define MSR_C_LO_NEW_VID 0x00003f00
+#define MSR_C_LO_NEW_FID 0x0000003f
#define MSR_C_LO_VID_SHIFT 8
/* Field definitions within the FID VID High Control MSR : */
-#define MSR_C_HI_STP_GNT_TO 0x000fffff
+#define MSR_C_HI_STP_GNT_TO 0x000fffff
/* Field definitions within the FID VID Low Status MSR : */
-#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
-#define MSR_S_LO_MAX_RAMP_VID 0x1f000000
+#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */
+#define MSR_S_LO_MAX_RAMP_VID 0x3f000000
#define MSR_S_LO_MAX_FID 0x003f0000
#define MSR_S_LO_START_FID 0x00003f00
#define MSR_S_LO_CURRENT_FID 0x0000003f
/* Field definitions within the FID VID High Status MSR : */
-#define MSR_S_HI_MAX_WORKING_VID 0x001f0000
-#define MSR_S_HI_START_VID 0x00001f00
-#define MSR_S_HI_CURRENT_VID 0x0000001f
-#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
+#define MSR_S_HI_MIN_WORKING_VID 0x3f000000
+#define MSR_S_HI_MAX_WORKING_VID 0x003f0000
+#define MSR_S_HI_START_VID 0x00003f00
+#define MSR_S_HI_CURRENT_VID 0x0000003f
+#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
/*
* There are restrictions frequencies have to follow:
@@ -99,13 +101,15 @@ struct powernow_k8_data {
#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */
#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */
-#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */
+#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */
#define MIN_FREQ 800 /* Min and max freqs, per spec */
#define MAX_FREQ 5000
#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */
-#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */
+#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */
+
+#define VID_OFF 0x3f
#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
@@ -121,12 +125,14 @@ struct powernow_k8_data {
#define IRT_SHIFT 30
#define RVO_SHIFT 28
+#define EXT_TYPE_SHIFT 27
#define PLL_L_SHIFT 20
#define MVS_SHIFT 18
#define VST_SHIFT 11
#define VID_SHIFT 6
#define IRT_MASK 3
#define RVO_MASK 3
+#define EXT_TYPE_MASK 1
#define PLL_L_MASK 0x7f
#define MVS_MASK 3
#define VST_MASK 0x7f
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 7dcbf70fc16..327a55d4d1c 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -375,7 +375,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
arg0.buffer.pointer = (u8 *) arg0_buf;
arg0_buf[0] = ACPI_PDC_REVISION_ID;
arg0_buf[1] = 1;
- arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR;
+ arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP_MSR;
p.pdc = &arg_list;
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ba4b01138c8..ff87cc22b32 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
cr0 = 0x20000000;
- __asm__("movl %%cr0,%%eax\n\t"
- "orl %0,%%eax\n\t"
- "movl %%eax,%%cr0\n"
- : : "r" (cr0)
- :"ax");
+ write_cr0(read_cr0() | cr0);
/* CCR2 bit 2: lock NW bit and set WT1 */
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index a2c33c1a46c..43601de0f63 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
*/
static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
{
- unsigned int eax;
+ unsigned int eax, ebx, ecx, edx;
if (c->cpuid_level < 4)
return 1;
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (4), "c" (0)
- : "bx", "dx");
-
+ /* Intel has a non-standard dependency on %ecx for this CPUID level. */
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return ((eax >> 26) + 1);
else
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 1d768b26326..9e0d5f83cb9 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -128,7 +128,7 @@ static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
if (cache_eax.split.type == CACHE_TYPE_NULL)
- return -1;
+ return -EIO; /* better error ? */
this_leaf->eax.full = eax;
this_leaf->ebx.full = ebx;
@@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf;
unsigned long num_threads_sharing;
+#ifdef CONFIG_X86_HT
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+#endif
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
#ifdef CONFIG_X86_HT
else if (num_threads_sharing == smp_num_siblings)
this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
-#endif
+ else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
+ this_leaf->shared_cpu_map = cpu_core_map[cpu];
else
- printk(KERN_INFO "Number of CPUs sharing cache didn't match "
+ printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
"any known set of CPUs\n");
+#endif
}
#else
static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
@@ -334,6 +339,7 @@ static int __devinit detect_cache_attributes(unsigned int cpu)
struct _cpuid4_info *this_leaf;
unsigned long j;
int retval;
+ cpumask_t oldmask;
if (num_cache_leaves == 0)
return -ENOENT;
@@ -345,19 +351,26 @@ static int __devinit detect_cache_attributes(unsigned int cpu)
memset(cpuid4_info[cpu], 0,
sizeof(struct _cpuid4_info) * num_cache_leaves);
+ oldmask = current->cpus_allowed;
+ retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ if (retval)
+ goto out;
+
/* Do cpuid and store the results */
+ retval = 0;
for (j = 0; j < num_cache_leaves; j++) {
this_leaf = CPUID4_INFO_IDX(cpu, j);
retval = cpuid4_cache_lookup(j, this_leaf);
if (unlikely(retval < 0))
- goto err_out;
+ break;
cache_shared_cpu_map_setup(cpu, j);
}
- return 0;
+ set_cpus_allowed(current, oldmask);
-err_out:
- free_cache_attributes(cpu);
- return -ENOMEM;
+out:
+ if (retval)
+ free_cache_attributes(cpu);
+ return retval;
}
#ifdef CONFIG_SYSFS
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 764cac64e21..dd4ebd6af7e 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -561,7 +561,7 @@ struct mtrr_value {
static struct mtrr_value * mtrr_state;
-static int mtrr_save(struct sys_device * sysdev, u32 state)
+static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
int i;
int size = num_var_ranges * sizeof(struct mtrr_value);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index f57e5ee9494..fc426380366 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -76,6 +76,12 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
c->x86 = 6;
+
+#ifdef CONFIG_SYSCTL
+ /* randomize_va_space slows us down enormously;
+ it probably triggers retranslation of x86->native bytecode */
+ randomize_va_space = 0;
+#endif
}
static void transmeta_identify(struct cpuinfo_x86 * c)
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index e5fab12f792..913be77bb84 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
disable_local_APIC();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
- __asm__("hlt");
+ halt();
for(;;);
return 1;
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 789af3e9fb1..5edb1d379ad 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -20,7 +20,7 @@ static void doublefault_fn(void)
struct Xgt_desc_struct gdt_desc = {0, 0};
unsigned long gdt, tss;
- __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
+ store_gdt(&gdt_desc);
gdt = gdt_desc.address;
printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 385883ea8c1..ecad519fd39 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
* directory. If I have PSE, I just need to duplicate one entry in
* page directory.
*/
- __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+ cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) {
efi_bak_pg_dir_pointer[0].pgd =
@@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void)
local_flush_tlb();
cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
- __asm__ __volatile__("lgdt %0":"=m"
- (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
+ load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
}
static void efi_call_phys_epilog(void)
@@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void)
cpu_gdt_descr[0].address =
(unsigned long) __va(cpu_gdt_descr[0].address);
- __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
- __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+ load_gdt(&cpu_gdt_descr[0]);
+ cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) {
swapper_pg_dir[pgd_index(0)].pgd =
@@ -233,22 +232,23 @@ void __init efi_map_memmap(void)
{
memmap.map = NULL;
- memmap.map = (efi_memory_desc_t *)
- bt_ioremap((unsigned long) memmap.phys_map,
- (memmap.nr_map * sizeof(efi_memory_desc_t)));
-
+ memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
+ (memmap.nr_map * memmap.desc_size));
if (memmap.map == NULL)
printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
+
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
}
#if EFI_DEBUG
static void __init print_efi_memmap(void)
{
efi_memory_desc_t *md;
+ void *p;
int i;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
+ md = p;
printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
"range=[0x%016llx-0x%016llx) (%lluMB)\n",
i, md->type, md->attribute, md->phys_addr,
@@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
} prev, curr;
efi_memory_desc_t *md;
unsigned long start, end;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->num_pages == 0) || (!is_available_memory(md)))
continue;
@@ -325,6 +325,7 @@ void __init efi_init(void)
memmap.phys_map = EFI_MEMMAP;
memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
memmap.desc_version = EFI_MEMDESC_VERSION;
+ memmap.desc_size = EFI_MEMDESC_SIZE;
efi.systab = (efi_system_table_t *)
boot_ioremap((unsigned long) efi_phys.systab,
@@ -428,22 +429,30 @@ void __init efi_init(void)
printk(KERN_ERR PFX "Could not map the runtime service table!\n");
/* Map the EFI memory map for use until paging_init() */
-
- memmap.map = (efi_memory_desc_t *)
- boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
-
+ memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
if (memmap.map == NULL)
printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
- if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) {
- printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't "
- "match the one from EFI!\n");
- }
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
#if EFI_DEBUG
print_efi_memmap();
#endif
}
+static inline void __init check_range_for_systab(efi_memory_desc_t *md)
+{
+ if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
+ ((unsigned long)efi_phys.systab < md->phys_addr +
+ ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
+ unsigned long addr;
+
+ addr = md->virt_addr - md->phys_addr +
+ (unsigned long)efi_phys.systab;
+ efi.systab = (efi_system_table_t *)addr;
+ }
+}
+
/*
* This function will switch the EFI runtime services to virtual mode.
* Essentially, look through the EFI memmap and map every region that
@@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void)
{
efi_memory_desc_t *md;
efi_status_t status;
- int i;
+ void *p;
efi.systab = NULL;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
- if (md->attribute & EFI_MEMORY_RUNTIME) {
- md->virt_addr =
- (unsigned long)ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!(unsigned long)md->virt_addr) {
- printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
- (unsigned long)md->phys_addr);
- }
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
- if (((unsigned long)md->phys_addr <=
- (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab <
- md->phys_addr +
- ((unsigned long)md->num_pages <<
- EFI_PAGE_SHIFT))) {
- unsigned long addr;
-
- addr = md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab;
- efi.systab = (efi_system_table_t *)addr;
- }
+ md->virt_addr = (unsigned long)ioremap(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT);
+ if (!(unsigned long)md->virt_addr) {
+ printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
+ (unsigned long)md->phys_addr);
}
+ /* update the virtual address of the EFI system table */
+ check_range_for_systab(md);
}
if (!efi.systab)
BUG();
status = phys_efi_set_virtual_address_map(
- sizeof(efi_memory_desc_t) * memmap.nr_map,
- sizeof(efi_memory_desc_t),
+ memmap.desc_size * memmap.nr_map,
+ memmap.desc_size,
memmap.desc_version,
memmap.phys_map);
@@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
{
struct resource *res;
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
0x100000000ULL)
@@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
u32 efi_mem_type(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr <= phys_addr) && (phys_addr <
(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
return md->type;
@@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr)
u64 efi_mem_attributes(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr <= phys_addr) && (phys_addr <
(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
return md->attribute;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index a991d4e5edd..abb909793ef 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -203,7 +203,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -226,9 +226,9 @@ ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
- # system call tracing in operation
+ # system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -338,6 +338,9 @@ syscall_trace_entry:
movl %esp, %eax
xorl %edx,%edx
call do_syscall_trace
+ cmpl $0, %eax
+ jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
+ # so must skip actual syscall
movl ORIG_EAX(%esp), %eax
cmpl $(nr_syscalls), %eax
jnae syscall_call
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 4477bb10709..0480ca9e9e5 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -77,6 +77,32 @@ ENTRY(startup_32)
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
+/*
+ * Copy bootup parameters out of the way.
+ * Note: %esi still has the pointer to the real-mode data.
+ * With the kexec as boot loader, parameter segment might be loaded beyond
+ * kernel image and might not even be addressable by early boot page tables.
+ * (kexec on panic case). Hence copy out the parameters before initializing
+ * page tables.
+ */
+ movl $(boot_params - __PAGE_OFFSET),%edi
+ movl $(PARAM_SIZE/4),%ecx
+ cld
+ rep
+ movsl
+ movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+ andl %esi,%esi
+ jnz 2f # New command line protocol
+ cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
+ jne 1f
+ movzwl OLD_CL_OFFSET,%esi
+ addl $(OLD_CL_BASE_ADDR),%esi
+2:
+ movl $(saved_command_line - __PAGE_OFFSET),%edi
+ movl $(COMMAND_LINE_SIZE/4),%ecx
+ rep
+ movsl
+1:
/*
* Initialize page tables. This creates a PDE and a set of page
@@ -214,28 +240,6 @@ ENTRY(startup_32_smp)
*/
call setup_idt
-/*
- * Copy bootup parameters out of the way.
- * Note: %esi still has the pointer to the real-mode data.
- */
- movl $boot_params,%edi
- movl $(PARAM_SIZE/4),%ecx
- cld
- rep
- movsl
- movl boot_params+NEW_CL_POINTER,%esi
- andl %esi,%esi
- jnz 2f # New command line protocol
- cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
- jne 1f
- movzwl OLD_CL_OFFSET,%esi
- addl $(OLD_CL_BASE_ADDR),%esi
-2:
- movl $saved_command_line,%edi
- movl $(COMMAND_LINE_SIZE/4),%ecx
- rep
- movsl
-1:
checkCPUtype:
movl $-1,X86_CPUID # -1 for no CPUID initially
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
index b817168d9c6..d75524758da 100644
--- a/arch/i386/kernel/i387.c
+++ b/arch/i386/kernel/i387.c
@@ -82,17 +82,6 @@ void kernel_fpu_begin(void)
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
-void restore_fpu( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxrstor %0"
- : : "m" (tsk->thread.i387.fxsave) );
- } else {
- asm volatile( "frstor %0"
- : : "m" (tsk->thread.i387.fsave) );
- }
-}
-
/*
* FPU tag word conversions.
*/
diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c
new file mode 100644
index 00000000000..c36d1c006c2
--- /dev/null
+++ b/arch/i386/kernel/i8237.c
@@ -0,0 +1,67 @@
+/*
+ * i8237.c: 8237A DMA controller suspend functions.
+ *
+ * Written by Pierre Ossman, 2005.
+ */
+
+#include <linux/init.h>
+#include <linux/sysdev.h>
+
+#include <asm/dma.h>
+
+/*
+ * This module just handles suspend/resume issues with the
+ * 8237A DMA controller (used for ISA and LPC).
+ * Allocation is handled in kernel/dma.c and normal usage is
+ * in asm/dma.h.
+ */
+
+static int i8237A_resume(struct sys_device *dev)
+{
+ unsigned long flags;
+ int i;
+
+ flags = claim_dma_lock();
+
+ dma_outb(DMA1_RESET_REG, 0);
+ dma_outb(DMA2_RESET_REG, 0);
+
+ for (i = 0;i < 8;i++) {
+ set_dma_addr(i, 0x000000);
+ /* DMA count is a bit weird so this is not 0 */
+ set_dma_count(i, 1);
+ }
+
+ /* Enable cascade DMA or channel 0-3 won't work */
+ enable_dma(4);
+
+ release_dma_lock(flags);
+
+ return 0;
+}
+
+static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
+{
+ return 0;
+}
+
+static struct sysdev_class i8237_sysdev_class = {
+ set_kset_name("i8237"),
+ .suspend = i8237A_suspend,
+ .resume = i8237A_resume,
+};
+
+static struct sys_device device_i8237A = {
+ .id = 0,
+ .cls = &i8237_sysdev_class,
+};
+
+static int __init i8237A_init_sysfs(void)
+{
+ int error = sysdev_class_register(&i8237_sysdev_class);
+ if (!error)
+ error = sysdev_register(&device_i8237A);
+ return error;
+}
+
+device_initcall(i8237A_init_sysfs);
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c..f2b37654777 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
volatile struct pt_regs * regs = (struct pt_regs *) &unused;
unsigned int level = regs->ebx;
unsigned int old = (regs->eflags >> 12) & 3;
+ struct thread_struct *t = &current->thread;
if (level > 3)
return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
}
- regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
- /* Make sure we return the long way (not sysenter) */
- set_thread_flag(TIF_IRET);
+ t->iopl = level << 12;
+ regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+ set_iopl_mask(t->iopl);
return 0;
}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index bb50afbee92..fe1ffa55587 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct * mm = current->mm;
- __u32 entry_1, entry_2, *lp;
+ __u32 entry_1, entry_2;
int error;
struct user_desc ldt_info;
@@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
goto out_unlock;
}
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
-
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
@@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
/* Install the new entry ... */
install:
- *lp = entry_1;
- *(lp+1) = entry_2;
+ write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
error = 0;
out_unlock:
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index 52ed18d8b51..a912fed4848 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -16,13 +16,8 @@
#include <asm/io.h>
#include <asm/apic.h>
#include <asm/cpufeature.h>
-
-static inline unsigned long read_cr3(void)
-{
- unsigned long cr3;
- asm volatile("movl %%cr3,%0": "=r"(cr3));
- return cr3;
-}
+#include <asm/desc.h>
+#include <asm/system.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
@@ -90,34 +85,27 @@ static void identity_map_page(unsigned long address)
}
#endif
-
static void set_idt(void *newidt, __u16 limit)
{
- unsigned char curidt[6];
+ struct Xgt_desc_struct curidt;
/* ia32 supports unaliged loads & stores */
- (*(__u16 *)(curidt)) = limit;
- (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
- __asm__ __volatile__ (
- "lidt %0\n"
- : "=m" (curidt)
- );
+ load_idt(&curidt);
};
static void set_gdt(void *newgdt, __u16 limit)
{
- unsigned char curgdt[6];
+ struct Xgt_desc_struct curgdt;
/* ia32 supports unaligned loads & stores */
- (*(__u16 *)(curgdt)) = limit;
- (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
- __asm__ __volatile__ (
- "lgdt %0\n"
- : "=m" (curgdt)
- );
+ load_gdt(&curgdt);
};
static void load_segments(void)
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index a77c612aad0..165f13158c6 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused)
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+ /* see notes above for revision 1.07. Apparent chip bug */
+ serialize_cpu();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
@@ -377,7 +378,9 @@ static void do_update_one (void * unused)
(unsigned long) uci->mc->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+ /* see notes above for revision 1.07. Apparent chip bug */
+ serialize_cpu();
+
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index af917f609c7..5d0b9a8fc43 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -65,6 +65,8 @@ int nr_ioapics;
int pic_mode;
unsigned long mp_lapic_addr;
+unsigned int def_to_bigsmp = 0;
+
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
@@ -120,7 +122,7 @@ static int MP_valid_apicid(int apicid, int version)
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, apicid;
+ int ver, apicid, cpu, found_bsp = 0;
physid_mask_t tmp;
if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -179,6 +181,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
+ found_bsp = 1;
}
if (num_processors >= NR_CPUS) {
@@ -202,6 +205,11 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
return;
}
+ if (found_bsp)
+ cpu = 0;
+ else
+ cpu = num_processors - 1;
+ cpu_set(cpu, cpu_possible_map);
tmp = apicid_to_cpu_present(apicid);
physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
@@ -213,6 +221,13 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
+ if ((num_processors > 8) &&
+ APIC_XAPIC(ver) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+ def_to_bigsmp = 1;
+ else
+ def_to_bigsmp = 0;
+
bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
}
@@ -1116,7 +1131,15 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
*/
int irq = gsi;
if (gsi < MAX_GSI_NUM) {
- gsi = pci_irq++;
+ if (gsi > 15)
+ gsi = pci_irq++;
+#ifdef CONFIG_ACPI_BUS
+ /*
+ * Don't assign IRQ used by ACPI SCI
+ */
+ if (gsi == acpi_fadt.sci_int)
+ gsi = pci_irq++;
+#endif
gsi_to_irq[irq] = gsi;
} else {
printk(KERN_ERR "GSI %u is too high\n", gsi);
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index b2f03c39a6f..03100d6fc5d 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -46,23 +46,13 @@
static struct class *msr_class;
-/* Note: "err" is handled in a funny way below. Otherwise one version
- of gcc or another breaks. */
-
static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
{
int err;
- asm volatile ("1: wrmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err)
- :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
-
+ err = wrmsr_safe(reg, eax, edx);
+ if (err)
+ err = -EIO;
return err;
}
@@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
{
int err;
- asm volatile ("1: rdmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,3b\n"
- ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
- :"c"(reg), "i"(-EIO), "0"(0));
-
+ err = rdmsr_safe(reg, eax, edx);
+ if (err)
+ err = -EIO;
return err;
}
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index da6c46d667c..8bbdbda07a2 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -195,7 +195,7 @@ static void disable_lapic_nmi_watchdog(void)
wrmsr(MSR_P6_EVNTSEL0, 0, 0);
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
break;
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
@@ -432,7 +432,7 @@ void setup_apic_nmi_watchdog (void)
setup_p6_watchdog();
break;
case 15:
- if (boot_cpu_data.x86_model > 0x3)
+ if (boot_cpu_data.x86_model > 0x4)
return;
if (!setup_p4_watchdog())
@@ -501,8 +501,11 @@ void nmi_watchdog_tick (struct pt_regs * regs)
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*nmi_hz)
+ /*
+ * die_nmi will return ONLY if NOTIFY_STOP happens..
+ */
die_nmi(regs, "NMI Watchdog detected LOCKUP");
- } else {
+
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index e51edf0a656..5f5b075f860 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -31,6 +31,7 @@
#include <linux/nodemask.h>
#include <asm/numaq.h>
#include <asm/topology.h>
+#include <asm/processor.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@@ -77,3 +78,11 @@ int __init get_memcfg_numaq(void)
smp_dump_qct();
return 1;
}
+
+static int __init numaq_dsc_disable(void)
+{
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ tsc_disable = 1;
+ return 0;
+}
+core_initcall(numaq_dsc_disable);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index ba243a4cc11..b45cbf93d43 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -164,7 +164,7 @@ static inline void play_dead(void)
*/
local_irq_disable();
while (1)
- __asm__ __volatile__("hlt":::"memory");
+ halt();
}
#else
static inline void play_dead(void)
@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
printk(" DS: %04x ES: %04x\n",
0xffff & regs->xds,0xffff & regs->xes);
- __asm__("movl %%cr0, %0": "=r" (cr0));
- __asm__("movl %%cr2, %0": "=r" (cr2));
- __asm__("movl %%cr3, %0": "=r" (cr3));
- /* This could fault if %cr4 does not exist */
- __asm__("1: movl %%cr4, %0 \n"
- "2: \n"
- ".section __ex_table,\"a\" \n"
- ".long 1b,2b \n"
- ".previous \n"
- : "=r" (cr4): "0" (0));
+ cr0 = read_cr0();
+ cr2 = read_cr2();
+ cr3 = read_cr3();
+ if (current_cpu_data.x86 > 4) {
+ cr4 = read_cr4();
+ }
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
show_trace(NULL, &regs->esp);
}
@@ -682,41 +678,56 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
__unlazy_fpu(prev_p);
/*
- * Reload esp0, LDT and the page table pointer:
+ * Reload esp0.
*/
load_esp0(tss, next);
/*
- * Load the per-thread Thread-Local Storage descriptor.
+ * Save away %fs and %gs. No need to save %es and %ds, as
+ * those are always kernel segments while inside the kernel.
+ * Doing this before setting the new TLS descriptors avoids
+ * the situation where we temporarily have non-reloadable
+ * segments in %fs and %gs. This could be an issue if the
+ * NMI handler ever used %fs or %gs (it does not today), or
+ * if the kernel is running inside of a hypervisor layer.
*/
- load_TLS(next, cpu);
+ savesegment(fs, prev->fs);
+ savesegment(gs, prev->gs);
/*
- * Save away %fs and %gs. No need to save %es and %ds, as
- * those are always kernel segments while inside the kernel.
+ * Load the per-thread Thread-Local Storage descriptor.
*/
- asm volatile("mov %%fs,%0":"=m" (prev->fs));
- asm volatile("mov %%gs,%0":"=m" (prev->gs));
+ load_TLS(next, cpu);
/*
* Restore %fs and %gs if needed.
+ *
+ * Glibc normally makes %fs be zero, and %gs is one of
+ * the TLS segments.
*/
- if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
+ if (unlikely(prev->fs | next->fs))
loadsegment(fs, next->fs);
+
+ if (prev->gs | next->gs)
loadsegment(gs, next->gs);
- }
+
+ /*
+ * Restore IOPL if needed.
+ */
+ if (unlikely(prev->iopl != next->iopl))
+ set_iopl_mask(next->iopl);
/*
* Now maybe reload the debug registers
*/
if (unlikely(next->debugreg[7])) {
- set_debugreg(current->thread.debugreg[0], 0);
- set_debugreg(current->thread.debugreg[1], 1);
- set_debugreg(current->thread.debugreg[2], 2);
- set_debugreg(current->thread.debugreg[3], 3);
+ set_debugreg(next->debugreg[0], 0);
+ set_debugreg(next->debugreg[1], 1);
+ set_debugreg(next->debugreg[2], 2);
+ set_debugreg(next->debugreg[3], 3);
/* no 4 and 5 */
- set_debugreg(current->thread.debugreg[6], 6);
- set_debugreg(current->thread.debugreg[7], 7);
+ set_debugreg(next->debugreg[6], 6);
+ set_debugreg(next->debugreg[7], 7);
}
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
@@ -913,6 +924,8 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
+ memset(&info, 0, sizeof(info));
+
desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
info.entry_number = idx;
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0da59b42843..340980203b0 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child)
void ptrace_disable(struct task_struct *child)
{
clear_singlestep(child);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
/*
@@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
}
break;
+ case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
case PTRACE_CONT: /* restart after signal. */
ret = -EIO;
if (!valid_signal(data))
break;
- if (request == PTRACE_SYSCALL) {
+ if (request == PTRACE_SYSEMU) {
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ } else if (request == PTRACE_SYSCALL) {
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- }
- else {
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ } else {
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
child->exit_code = data;
@@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
wake_up_process(child);
break;
+ case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
case PTRACE_SINGLESTEP: /* set the trap flag. */
ret = -EIO;
if (!valid_signal(data))
break;
+
+ if (request == PTRACE_SYSEMU_SINGLESTEP)
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
set_singlestep(child);
child->exit_code = data;
@@ -678,26 +692,52 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
* - triggered by current->work.syscall_trace
*/
__attribute__((regparm(3)))
-void do_syscall_trace(struct pt_regs *regs, int entryexit)
+int do_syscall_trace(struct pt_regs *regs, int entryexit)
{
+ int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU), ret = 0;
+ /* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
+ * interception. */
+ int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
+
/* do the secure computing check first */
secure_computing(regs->orig_eax);
- if (unlikely(current->audit_context) && entryexit)
- audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ if (unlikely(current->audit_context)) {
+ if (entryexit)
+ audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
+ * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
+ * not used, entry.S will call us only on syscall exit, not
+ * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
+ * calling send_sigtrap() on syscall entry.
+ *
+ * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
+ * is_singlestep is false, despite his name, so we will still do
+ * the correct thing.
+ */
+ else if (is_singlestep)
+ goto out;
+ }
if (!(current->ptrace & PT_PTRACED))
goto out;
+ /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
+ * and then is resumed with SYSEMU_SINGLESTEP, it will come in
+ * here. We have to check this and return */
+ if (is_sysemu && entryexit)
+ return 0;
+
/* Fake a debug trap */
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (is_singlestep)
send_sigtrap(current, regs, 0);
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
goto out;
/* the 0x80 provides a way for the tracing parent to distinguish
between a syscall stop and SIGTRAP delivery */
+ /* Note that the debugger could change the result of test_thread_flag!*/
ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
/*
@@ -709,9 +749,16 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit)
send_sig(current->exit_code, current, 1);
current->exit_code = 0;
}
+ ret = is_sysemu;
out:
if (unlikely(current->audit_context) && !entryexit)
audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
regs->ebx, regs->ecx, regs->edx, regs->esi);
+ if (ret == 0)
+ return 0;
+ regs->orig_eax = -1; /* force skip of syscall restarting */
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ return 1;
}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index b3e58484996..1cbb9c0f470 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -13,6 +13,7 @@
#include <linux/dmi.h>
#include <asm/uaccess.h>
#include <asm/apic.h>
+#include <asm/desc.h>
#include "mach_reboot.h"
#include <linux/reboot_fixups.h>
@@ -242,13 +243,13 @@ void machine_real_restart(unsigned char *code, int length)
/* Set up the IDT for real mode. */
- __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
+ load_idt(&real_mode_idt);
/* Set up a GDT from which we can load segment descriptors for real
mode. The GDT is not used in real mode; it is just needed here to
prepare the descriptors. */
- __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
+ load_gdt(&real_mode_gdt);
/* Load the data segment registers, and thus the descriptors ready for
real mode. The base address of each segment is 0x100, 16 times the
@@ -284,7 +285,7 @@ void machine_shutdown(void)
reboot_cpu_id = 0;
/* See if there has been given a command line override */
- if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
+ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
cpu_isset(reboot_cpu, cpu_online_map)) {
reboot_cpu_id = reboot_cpu;
}
@@ -311,14 +312,12 @@ void machine_shutdown(void)
#endif
}
-void machine_restart(char * __unused)
+void machine_emergency_restart(void)
{
- machine_shutdown();
-
if (!reboot_thru_bios) {
if (efi_enabled) {
efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
- __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ load_idt(&no_idt);
__asm__ __volatile__("int3");
}
/* rebooting needs to touch the page at absolute addr 0 */
@@ -327,7 +326,7 @@ void machine_restart(char * __unused)
mach_reboot_fixups(); /* for board specific fixups */
mach_reboot();
/* That didn't work - force a triple fault.. */
- __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ load_idt(&no_idt);
__asm__ __volatile__("int3");
}
}
@@ -337,23 +336,22 @@ void machine_restart(char * __unused)
machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
}
-EXPORT_SYMBOL(machine_restart);
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
+ machine_emergency_restart();
+}
void machine_halt(void)
{
}
-EXPORT_SYMBOL(machine_halt);
-
void machine_power_off(void)
{
- lapic_shutdown();
+ machine_shutdown();
- if (efi_enabled)
- efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
if (pm_power_off)
pm_power_off();
}
-EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
index 469f496e55c..7455ab64394 100644
--- a/arch/i386/kernel/semaphore.c
+++ b/arch/i386/kernel/semaphore.c
@@ -13,171 +13,9 @@
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
*/
#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
#include <asm/semaphore.h>
/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- * - only on a boundary condition do we need to care. When we go
- * from a negative count to a non-negative, we wake people up.
- * - when we go from a non-negative count to a negative do we
- * (a) synchronize with the "sleeper" count and (b) make sure
- * that we're on the wakeup list before we synchronize so that
- * we cannot lose wakeup events.
- */
-
-static fastcall void __attribute_used__ __up(struct semaphore *sem)
-{
- wake_up(&sem->wait);
-}
-
-static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_UNINTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * the wait_queue_head.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_UNINTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- tsk->state = TASK_RUNNING;
-}
-
-static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
-{
- int retval = 0;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_INTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * With signals pending, this turns into
- * the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
- * it has contention. Just correct the count
- * and exit.
- */
- if (signal_pending(current)) {
- retval = -EINTR;
- sem->sleepers = 0;
- atomic_add(sleepers, &sem->count);
- break;
- }
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * wait_queue_head. The "-1" is because we're
- * still hoping to get the semaphore.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_INTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- tsk->state = TASK_RUNNING;
- return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
-{
- int sleepers;
- unsigned long flags;
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- sleepers = sem->sleepers + 1;
- sem->sleepers = 0;
-
- /*
- * Add "everybody else" and us into it. They aren't
- * playing, because we own the spinlock in the
- * wait_queue_head.
- */
- if (!atomic_add_negative(sleepers, &sem->count)) {
- wake_up_locked(&sem->wait);
- }
-
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- return 1;
-}
-
-
-/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
* need to convert that sequence back into the C sequence when
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 7306353c520..294bcca985a 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -370,12 +370,16 @@ static void __init limit_regions(unsigned long long size)
int i;
if (efi_enabled) {
- for (i = 0; i < memmap.nr_map; i++) {
- current_addr = memmap.map[i].phys_addr +
- (memmap.map[i].num_pages << 12);
- if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map, i = 0; p < memmap.map_end;
+ p += memmap.desc_size, i++) {
+ md = p;
+ current_addr = md->phys_addr + (md->num_pages << 12);
+ if (md->type == EFI_CONVENTIONAL_MEMORY) {
if (current_addr >= size) {
- memmap.map[i].num_pages -=
+ md->num_pages -=
(((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
memmap.nr_map = i + 1;
return;
@@ -1414,7 +1418,7 @@ static struct nop {
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled.
- In this case boot with "noreplacement". */
+ Tough. Make sure you disable such features by hand. */
void apply_alternatives(void *start, void *end)
{
struct alt_instr *a;
@@ -1442,24 +1446,12 @@ void apply_alternatives(void *start, void *end)
}
}
-static int no_replacement __initdata = 0;
-
void __init alternative_instructions(void)
{
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
- if (no_replacement)
- return;
apply_alternatives(__alt_instructions, __alt_instructions_end);
}
-static int __init noreplacement_setup(char *s)
-{
- no_replacement = 1;
- return 0;
-}
-
-__setup("noreplacement", noreplacement_setup);
-
static char * __init machine_specific_memory_setup(void);
#ifdef CONFIG_MCA
@@ -1593,8 +1585,14 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_table_init();
acpi_boot_init();
-#endif
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
+ if (def_to_bigsmp)
+ printk(KERN_WARNING "More than 8 CPUs detected and "
+ "CONFIG_X86_PC cannot handle it.\nUse "
+ "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
+#endif
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
if (smp_found_config)
get_smp_config();
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 89ef7adc63a..61eb0c8a6e4 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
int tmp, err = 0;
tmp = 0;
- __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(fs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
else
ret = setup_frame(sig, ka, oldset, regs);
- if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+ if (ret) {
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- sigaddset(&current->blocked,sig);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked,sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
}
@@ -603,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
* We want the common case to go fast, which
* is why we may in certain cases get here from
* kernel mode. Just return without doing anything
- * if so.
+ * if so. vm86 regs switched out by assembly code
+ * before reaching here, so testing against kernel
+ * CS suffices.
*/
if (!user_mode(regs))
return 1;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index cec4bde6716..48b55db3680 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
local_irq_disable();
disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) __asm__("hlt");
+ for(;;) halt();
for (;;);
}
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8ac8e9fd561..5e4893d2b9f 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -88,6 +88,8 @@ EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
@@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu)
tsc_sync_disabled = 1;
/* init low mem mapping */
- memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ KERNEL_PGD_PTRS);
flush_tlb_all();
schedule_work(&task);
wait_for_completion(&done);
@@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void)
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
cpu_set(smp_processor_id(), cpu_present_map);
+ cpu_set(smp_processor_id(), cpu_possible_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 3db9a04aec6..9b21a31d4f4 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -251,7 +251,7 @@ ENTRY(sys_call_table)
.long sys_io_submit
.long sys_io_cancel
.long sys_fadvise64 /* 250 */
- .long sys_set_zone_reclaim
+ .long sys_ni_syscall
.long sys_exit_group
.long sys_lookup_dcookie
.long sys_epoll_create
@@ -291,3 +291,6 @@ ENTRY(sys_call_table)
.long sys_keyctl
.long sys_ioprio_set
.long sys_ioprio_get /* 290 */
+ .long sys_inotify_init
+ .long sys_inotify_add_watch
+ .long sys_inotify_rm_watch
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 0ee9dee8af0..6f794a78ee1 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -383,6 +383,7 @@ void notify_arch_cmos_timer(void)
static long clock_cmos_diff, sleep_start;
+static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -391,6 +392,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
+ last_timer = cur_timer;
+ cur_timer = &timer_none;
+ if (last_timer->suspend)
+ last_timer->suspend(state);
return 0;
}
@@ -404,6 +409,7 @@ static int timer_resume(struct sys_device *dev)
if (is_hpet_enabled())
hpet_reenable();
#endif
+ setup_pit_timer();
sec = get_cmos_time() + clock_cmos_diff;
sleep_length = (get_cmos_time() - sleep_start) * HZ;
write_seqlock_irqsave(&xtime_lock, flags);
@@ -412,6 +418,10 @@ static int timer_resume(struct sys_device *dev)
write_sequnlock_irqrestore(&xtime_lock, flags);
jiffies += sleep_length;
wall_jiffies += sleep_length;
+ if (last_timer->resume)
+ last_timer->resume();
+ cur_timer = last_timer;
+ last_timer = NULL;
return 0;
}
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index ef8dac5dd33..001de97c9e4 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops)
} while ((hpet_end - hpet_start) < (loops));
}
+static struct timer_opts timer_hpet;
+
static int __init init_hpet(char* override)
{
unsigned long result, remain;
@@ -163,6 +165,8 @@ static int __init init_hpet(char* override)
}
set_cyc2ns_scale(cpu_khz/1000);
}
+ /* set this only when cpu_has_tsc */
+ timer_hpet.read_timer = read_timer_tsc;
}
/*
@@ -177,6 +181,19 @@ static int __init init_hpet(char* override)
return 0;
}
+static int hpet_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ rdtsc(last_tsc_low, last_tsc_high);
+
+ if (hpet_use_timer)
+ hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ else
+ hpet_last = hpet_readl(HPET_COUNTER);
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
/************************************************************/
/* tsc timer_opts struct */
@@ -186,7 +203,7 @@ static struct timer_opts timer_hpet __read_mostly = {
.get_offset = get_offset_hpet,
.monotonic_clock = monotonic_clock_hpet,
.delay = delay_hpet,
- .read_timer = read_timer_tsc,
+ .resume = hpet_resume,
};
struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 06de036a820..eddb6403823 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -175,30 +175,3 @@ void setup_pit_timer(void)
outb(LATCH >> 8 , PIT_CH0); /* MSB */
spin_unlock_irqrestore(&i8253_lock, flags);
}
-
-static int timer_resume(struct sys_device *dev)
-{
- setup_pit_timer();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- set_kset_name("timer_pit"),
- .resume = timer_resume,
-};
-
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int __init init_timer_sysfs(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(init_timer_sysfs);
-
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 4ef20e66349..264edaaac31 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void)
}
}
+static int pmtmr_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ offset_tick = read_pmtmr();
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
static unsigned long long monotonic_clock_pmtmr(void)
{
@@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = {
.monotonic_clock = monotonic_clock_pmtmr,
.delay = delay_pmtmr,
.read_timer = read_timer_tsc,
+ .resume = pmtmr_resume,
};
struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 8f4e4d5bc56..6dd470cc9f7 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -543,6 +543,19 @@ static int __init init_tsc(char* override)
return -ENODEV;
}
+static int tsc_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ rdtsc(last_tsc_low, last_tsc_high);
+#ifdef CONFIG_HPET_TIMER
+ if (is_hpet_enabled() && hpet_use_timer)
+ hpet_last = hpet_readl(HPET_COUNTER);
+#endif
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
+
#ifndef CONFIG_X86_TSC
/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
* in cpu/common.c */
@@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = {
.monotonic_clock = monotonic_clock_tsc,
.delay = delay_tsc,
.read_timer = read_timer_tsc,
+ .resume = tsc_resume,
};
struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index a61f33d06ea..54629bb5893 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs)
unsigned short ss;
esp = (unsigned long) (&regs->esp);
- ss = __KERNEL_DS;
+ savesegment(ss, ss);
if (user_mode(regs)) {
in_kernel = 0;
esp = regs->esp;
@@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs)
char c;
unsigned long eip;
- if (user_mode(regs))
- goto no_bug; /* Not in kernel */
-
eip = regs->eip;
if (eip < PAGE_OFFSET)
@@ -568,6 +565,10 @@ static DEFINE_SPINLOCK(nmi_print_lock);
void die_nmi (struct pt_regs *regs, const char *msg)
{
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+ NOTIFY_STOP)
+ return;
+
spin_lock(&nmi_print_lock);
/*
* We are in trouble anyway, lets at least try
@@ -803,15 +804,17 @@ void math_error(void __user *eip)
*/
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
- switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ switch (swd & ~cwd & 0x3f) {
case 0x000:
default:
break;
case 0x001: /* Invalid Op */
- case 0x041: /* Stack Fault */
- case 0x241: /* Stack Fault | Direction */
+ /*
+ * swd & 0x240 == 0x040: Stack Underflow
+ * swd & 0x240 == 0x240: Stack Overflow
+ * User must clear the SF bit (0x40) if set
+ */
info.si_code = FPE_FLTINV;
- /* Should we clear the SF or let user space do it ???? */
break;
case 0x002: /* Denormalize */
case 0x010: /* Underflow */
@@ -1006,7 +1009,7 @@ void __init trap_init_f00f_bug(void)
* it uses the read-only mapped virtual address.
*/
idt_descr.address = fix_to_virt(FIX_F00F_IDT);
- __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ load_idt(&idt_descr);
}
#endif
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index ec0f68ce688..16b48500962 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
*/
info->regs32->eax = 0;
tsk->thread.saved_esp0 = tsk->thread.esp0;
- asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs));
- asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs));
+ savesegment(fs, tsk->thread.saved_fs);
+ savesegment(gs, tsk->thread.saved_gs);
tss = &per_cpu(init_tss, get_cpu());
tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
unsigned char opcode;
unsigned char __user *csp;
unsigned char __user *ssp;
- unsigned short ip, sp;
+ unsigned short ip, sp, orig_flags;
int data32, pref_done;
#define CHECK_IF_IN_TRAP \
@@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
#define VM86_FAULT_RETURN do { \
if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
return_to_32bit(regs, VM86_PICRETURN); \
+ if (orig_flags & TF_MASK) \
+ handle_vm86_trap(regs, 0, 1); \
return; } while (0)
+ orig_flags = *(unsigned short *)&regs->eflags;
+
csp = (unsigned char __user *) (regs->cs << 4);
ssp = (unsigned char __user *) (regs->ss << 4);
sp = SP(regs);
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
index c8fcf75b9be..68afa50dd7c 100644
--- a/arch/i386/kernel/vsyscall-sigreturn.S
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -15,7 +15,7 @@
*/
.text
- .org __kernel_vsyscall+32
+ .org __kernel_vsyscall+32,0x90
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
__kernel_sigreturn:
@@ -35,6 +35,7 @@ __kernel_rt_sigreturn:
int $0x80
.LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+ .balign 32
.previous
.section .eh_frame,"a",@progbits
diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h
index 70691f0c4ce..898ed905e11 100644
--- a/arch/i386/mach-es7000/es7000.h
+++ b/arch/i386/mach-es7000/es7000.h
@@ -104,7 +104,8 @@ struct mip_reg {
#define MIP_SW_APIC 0x1020b
#define MIP_FUNC(VALUE) (VALUE & 0xff)
-extern int parse_unisys_oem (char *oemptr, int oem_entries);
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length);
+extern int parse_unisys_oem (char *oemptr);
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+extern void setup_unisys ();
extern int es7000_start_cpu(int cpu, unsigned long eip);
extern void es7000_sw_apic(void);
diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c
index d5936d50047..2000bdca2fc 100644
--- a/arch/i386/mach-es7000/es7000plat.c
+++ b/arch/i386/mach-es7000/es7000plat.c
@@ -75,12 +75,29 @@ es7000_rename_gsi(int ioapic, int gsi)
#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)
+void __init
+setup_unisys ()
+{
+ /*
+ * Determine the generation of the ES7000 currently running.
+ *
+ * es7000_plat = 1 if the machine is a 5xx ES7000 box
+ * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+ *
+ */
+ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+ es7000_plat = 2;
+ else
+ es7000_plat = 1;
+ ioapic_renumber_irq = es7000_rename_gsi;
+}
+
/*
* Parse the OEM Table
*/
int __init
-parse_unisys_oem (char *oemptr, int oem_entries)
+parse_unisys_oem (char *oemptr)
{
int i;
int success = 0;
@@ -95,7 +112,7 @@ parse_unisys_oem (char *oemptr, int oem_entries)
tp += 8;
- for (i=0; i <= oem_entries; i++) {
+ for (i=0; i <= 6; i++) {
type = *tp++;
size = *tp++;
tp -= 2;
@@ -130,34 +147,18 @@ parse_unisys_oem (char *oemptr, int oem_entries)
default:
break;
}
- if (i == 6) break;
tp += size;
}
if (success < 2) {
es7000_plat = 0;
- } else {
- printk("\nEnabling ES7000 specific features...\n");
- /*
- * Determine the generation of the ES7000 currently running.
- *
- * es7000_plat = 0 if the machine is NOT a Unisys ES7000 box
- * es7000_plat = 1 if the machine is a 5xx ES7000 box
- * es7000_plat = 2 if the machine is a x86_64 ES7000 box
- *
- */
- if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
- es7000_plat = 2;
- else
- es7000_plat = 1;
-
- ioapic_renumber_irq = es7000_rename_gsi;
- }
+ } else
+ setup_unisys();
return es7000_plat;
}
int __init
-find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length)
+find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_rsdp *rsdp = NULL;
unsigned long rsdp_phys = 0;
@@ -201,13 +202,11 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length)
acpi_table_print(header, sdt.entry[i].pa);
t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
- *length = header->length;
*oem_addr = (unsigned long) addr;
return 0;
}
}
}
- Dprintk("ES7000: did not find Unisys ACPI OEM table!\n");
return -1;
}
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
index 25883b44f62..037b2af1a1f 100644
--- a/arch/i386/mach-generic/bigsmp.c
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -47,7 +47,10 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
static __init int probe_bigsmp(void)
{
- dmi_check_system(bigsmp_dmi_table);
+ if (def_to_bigsmp)
+ dmi_bigsmp = 1;
+ else
+ dmi_check_system(bigsmp_dmi_table);
return dmi_bigsmp;
}
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
index 5497c65a879..cea5b3ce4b5 100644
--- a/arch/i386/mach-generic/probe.c
+++ b/arch/i386/mach-generic/probe.c
@@ -30,6 +30,25 @@ struct genapic *apic_probe[] __initdata = {
NULL,
};
+static int cmdline_apic;
+
+void __init generic_bigsmp_probe(void)
+{
+ /*
+ * This routine is used to switch to bigsmp mode when
+ * - There is no apic= option specified by the user
+ * - generic_apic_probe() has choosen apic_default as the sub_arch
+ * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
+ */
+
+ if (!cmdline_apic && genapic == &apic_default)
+ if (apic_bigsmp.probe()) {
+ genapic = &apic_bigsmp;
+ printk(KERN_INFO "Overriding APIC driver with %s\n",
+ genapic->name);
+ }
+}
+
void __init generic_apic_probe(char *command_line)
{
char *s;
@@ -52,6 +71,7 @@ void __init generic_apic_probe(char *command_line)
if (!changed)
printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
*p = old;
+ cmdline_apic = changed;
}
for (i = 0; !changed && apic_probe[i]; i++) {
if (apic_probe[i]->probe()) {
diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c
index 3a81e904a7b..5d73e042ed0 100644
--- a/arch/i386/mach-visws/reboot.c
+++ b/arch/i386/mach-visws/reboot.c
@@ -7,13 +7,17 @@
#include "piix4.h"
void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
-void machine_restart(char * __unused)
+void machine_shutdown(void)
{
#ifdef CONFIG_SMP
smp_send_stop();
#endif
+}
+void machine_emergency_restart(void)
+{
/*
* Visual Workstations restart after this
* register is poked on the PIIX4
@@ -21,7 +25,11 @@ void machine_restart(char * __unused)
outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
}
-EXPORT_SYMBOL(machine_restart);
+void machine_restart(char * __unused)
+{
+ machine_shutdown();
+ machine_emergency_restart();
+}
void machine_power_off(void)
{
@@ -42,10 +50,7 @@ void machine_power_off(void)
outl(PIIX_SPECIAL_STOP, 0xCFC);
}
-EXPORT_SYMBOL(machine_power_off);
-
void machine_halt(void)
{
}
-EXPORT_SYMBOL(machine_halt);
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c
index 9f6d2d9b1be..26ada6fc0d7 100644
--- a/arch/i386/mach-visws/setup.c
+++ b/arch/i386/mach-visws/setup.c
@@ -14,6 +14,8 @@
#include "cobalt.h"
#include "piix4.h"
+int no_broadcast;
+
char visws_board_type = -1;
char visws_board_rev = -1;
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 3e439ce5e1b..cc69875d979 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -36,6 +36,7 @@
* Power off function, if any
*/
void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
int voyager_level = 0;
@@ -233,10 +234,9 @@ voyager_power_off(void)
#endif
}
/* and wait for it to happen */
- for(;;) {
- __asm("cli");
- __asm("hlt");
- }
+ local_irq_disable();
+ for(;;)
+ halt();
}
/* copied from process.c */
@@ -251,6 +251,12 @@ kb_wait(void)
}
void
+machine_shutdown(void)
+{
+ /* Architecture specific shutdown needed before a kexec */
+}
+
+void
machine_restart(char *cmd)
{
printk("Voyager Warm Restart\n");
@@ -271,13 +277,17 @@ machine_restart(char *cmd)
outb(basebd | 0x08, VOYAGER_MC_SETUP);
outb(0x02, catbase + 0x21);
}
- for(;;) {
- asm("cli");
- asm("hlt");
- }
+ local_irq_disable();
+ for(;;)
+ halt();
}
-EXPORT_SYMBOL(machine_restart);
+void
+machine_emergency_restart(void)
+{
+ /*for now, just hook this to a warm restart */
+ machine_restart(NULL);
+}
void
mca_nmi_hook(void)
@@ -314,12 +324,9 @@ machine_halt(void)
machine_power_off();
}
-EXPORT_SYMBOL(machine_halt);
-
void machine_power_off(void)
{
if (pm_power_off)
pm_power_off();
}
-EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 8c8527593da..46b0cf4a31e 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -10,6 +10,7 @@
* the voyager hal to provide the functionality
*/
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/delay.h>
@@ -40,6 +41,7 @@ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR
/* per CPU data structure (for /proc/cpuinfo et al), visible externally
* indexed physically */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
/* physical ID of the CPU used to boot the system */
unsigned char boot_cpu_id;
@@ -72,6 +74,7 @@ static volatile unsigned long smp_invalidate_needed;
/* Bitmask of currently online CPUs - used by setup.c for
/proc/cpuinfo, visible externally but still physical */
cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
* by scheduler but indexed physically */
@@ -238,6 +241,9 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
/* This is for the new dynamic CPU boot code */
cpumask_t cpu_callin_map = CPU_MASK_NONE;
cpumask_t cpu_callout_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
/* The per processor IRQ masks (these are usually kept in sync) */
static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -978,6 +984,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
preempt_enable();
}
+EXPORT_SYMBOL(flush_tlb_page);
/* enable the requested IRQs */
static void
@@ -1010,7 +1017,7 @@ smp_stop_cpu_function(void *dummy)
cpu_clear(smp_processor_id(), cpu_online_map);
local_irq_disable();
for(;;)
- __asm__("hlt");
+ halt();
}
static DEFINE_SPINLOCK(call_lock);
@@ -1109,6 +1116,7 @@ smp_call_function (void (*func) (void *info), void *info, int retry,
return 0;
}
+EXPORT_SYMBOL(smp_call_function);
/* Sorry about the name. In an APIC based system, the APICs
* themselves are programmed to send a timer interrupt. This is used
@@ -1904,6 +1912,7 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
+ cpu_set(smp_processor_id(), cpu_possible_map);
}
int __devinit
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
index 91175738e94..9819b705efa 100644
--- a/arch/i386/math-emu/get_address.c
+++ b/arch/i386/math-emu/get_address.c
@@ -155,7 +155,6 @@ static long pm_address(u_char FPU_modrm, u_char segment,
{
struct desc_struct descriptor;
unsigned long base_address, limit, address, seg_top;
- unsigned short selector;
segment--;
@@ -173,17 +172,11 @@ static long pm_address(u_char FPU_modrm, u_char segment,
/* fs and gs aren't used by the kernel, so they still have their
user-space values. */
case PREFIX_FS_-1:
- /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
- in the assembler statement. */
-
- __asm__("mov %%fs,%0":"=r" (selector));
- addr->selector = selector;
+ /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
+ savesegment(fs, addr->selector);
break;
case PREFIX_GS_-1:
- /* The cast is needed here to get gcc 2.8.0 to use a 16 bit register
- in the assembler statement. */
- __asm__("mov %%gs,%0":"=r" (selector));
- addr->selector = selector;
+ savesegment(gs, addr->selector);
break;
default:
addr->selector = PM_REG_(segment);
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index b358f0702a4..6711ce3f691 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -262,6 +262,17 @@ static unsigned long calculate_numa_remap_pages(void)
reserve_pages += size;
printk("Shrinking node %d from %ld pages to %ld pages\n",
nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+
+ if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
+ /*
+ * Align node_end_pfn[] and node_remap_start_pfn[] to
+ * pmd boundary. remap_numa_kva will barf otherwise.
+ */
+ printk("Shrinking node %d further by %ld pages for proper alignment\n",
+ nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
+ size += node_end_pfn[nid] & (PTRS_PER_PTE-1);
+ }
+
node_end_pfn[nid] -= size;
node_remap_start_pfn[nid] = node_end_pfn[nid];
}
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 8e90339d6ea..411b8500ad1 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -199,6 +199,18 @@ static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
return 0;
}
+static noinline void force_sig_info_fault(int si_signo, int si_code,
+ unsigned long address, struct task_struct *tsk)
+{
+ siginfo_t info;
+
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+ force_sig_info(si_signo, &info, tsk);
+}
+
fastcall void do_invalid_op(struct pt_regs *, unsigned long);
/*
@@ -218,11 +230,10 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
struct vm_area_struct * vma;
unsigned long address;
unsigned long page;
- int write;
- siginfo_t info;
+ int write, si_code;
/* get the address */
- __asm__("movl %%cr2,%0":"=r" (address));
+ address = read_cr2();
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
@@ -233,7 +244,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
tsk = current;
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
/*
* We fault-in kernel-space virtual memory on-demand. The
@@ -313,7 +324,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
* we can handle it..
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
write = 0;
switch (error_code & 3) {
default: /* 3: write, present */
@@ -387,11 +398,7 @@ bad_area_nosemaphore:
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *)address;
- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
@@ -446,7 +453,7 @@ no_context:
printk(" at virtual address %08lx\n",address);
printk(KERN_ALERT " printing eip:\n");
printk("%08lx\n", regs->eip);
- asm("movl %%cr3,%0":"=r" (page));
+ page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page);
/*
@@ -500,11 +507,7 @@ do_sigbus:
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, tsk);
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
return;
vmalloc_fault:
@@ -523,7 +526,7 @@ vmalloc_fault:
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
- asm("movl %%cr3,%0":"=r" (pgd_paddr));
+ pgd_paddr = read_cr3();
pgd = index + (pgd_t *)__va(pgd_paddr);
pgd_k = init_mm.pgd + index;
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 3b099f32b94..d524127c9af 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -22,12 +22,15 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd = NULL;
+ pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- pmd = pmd_alloc(mm, pud, addr);
- return (pte_t *) pmd;
+ if (pud)
+ pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+ return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
@@ -37,8 +40,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (pud_present(*pud))
+ pmd = pmd_offset(pud, addr);
+ }
return (pte_t *) pmd;
}
@@ -118,17 +124,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
}
#endif
-void hugetlb_clean_stale_pgtable(pte_t *pte)
-{
- pmd_t *pmd = (pmd_t *) pte;
- struct page *page;
-
- page = pmd_page(*pmd);
- pmd_clear(pmd);
- dec_page_state(nr_page_table_pages);
- page_cache_release(page);
-}
-
/* x86_64 also uses this file */
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 12216b52e28..9edfc058b89 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -198,9 +198,10 @@ int page_is_ram(unsigned long pagenr)
if (efi_enabled) {
efi_memory_desc_t *md;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if (!is_available_memory(md))
continue;
addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
@@ -348,7 +349,7 @@ static void __init pagetable_init (void)
* All user-space mappings are explicitly cleared after
* SMP startup.
*/
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+ set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
#endif
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index cb3da6baa70..f600fc244f0 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -12,6 +12,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
static DEFINE_SPINLOCK(cpa_lock);
static struct list_head df_list = LIST_HEAD_INIT(df_list);
@@ -52,8 +53,8 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot)
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : PAGE_KERNEL);
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+ addr == address ? prot : PAGE_KERNEL));
}
return base;
}
@@ -62,7 +63,7 @@ static void flush_kernel_map(void *dummy)
{
/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
if (boot_cpu_data.x86_model >= 4)
- asm volatile("wbinvd":::"memory");
+ wbinvd();
/* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd2f7afc7a2..dcdce2c6c53 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -207,19 +207,19 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags;
- if (PTRS_PER_PMD == 1)
+ if (PTRS_PER_PMD == 1) {
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
+ }
- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
-
+ KERNEL_PGD_PTRS);
if (PTRS_PER_PMD > 1)
return;
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
}
/* never called when PTRS_PER_PMD > 1 */
diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
index 2db65ec45dc..42913f43feb 100644
--- a/arch/i386/pci/acpi.c
+++ b/arch/i386/pci/acpi.c
@@ -30,6 +30,7 @@ static int __init pci_acpi_init(void)
acpi_irq_penalty_init();
pcibios_scanned++;
pcibios_enable_irq = acpi_pci_irq_enable;
+ pcibios_disable_irq = acpi_pci_irq_disable;
if (pci_routeirq) {
/*
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 70bcd53451f..c96bea14b98 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -165,7 +165,6 @@ static int __init pcibios_init(void)
if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
pcibios_sort();
#endif
- pci_assign_unassigned_resources();
return 0;
}
@@ -254,3 +253,9 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pcibios_enable_irq(dev);
}
+
+void pcibios_disable_device (struct pci_dev *dev)
+{
+ if (pcibios_disable_irq)
+ pcibios_disable_irq(dev);
+}
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
index 93a364c8215..3cc480998a4 100644
--- a/arch/i386/pci/i386.c
+++ b/arch/i386/pci/i386.c
@@ -170,43 +170,26 @@ static void __init pcibios_allocate_resources(int pass)
static int __init pcibios_assign_resources(void)
{
struct pci_dev *dev = NULL;
- int idx;
- struct resource *r;
-
- for_each_pci_dev(dev) {
- int class = dev->class >> 8;
-
- /* Don't touch classless devices and host bridges */
- if (!class || class == PCI_CLASS_BRIDGE_HOST)
- continue;
-
- for(idx=0; idx<6; idx++) {
- r = &dev->resource[idx];
-
- /*
- * Don't touch IDE controllers and I/O ports of video cards!
- */
- if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
- (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
- continue;
-
- /*
- * We shall assign a new address to this resource, either because
- * the BIOS forgot to do so or because we have decided the old
- * address was unusable for some reason.
- */
- if (!r->start && r->end)
- pci_assign_resource(dev, idx);
- }
+ struct resource *r, *pr;
- if (pci_probe & PCI_ASSIGN_ROMS) {
+ if (!(pci_probe & PCI_ASSIGN_ROMS)) {
+ /* Try to use BIOS settings for ROMs, otherwise let
+ pci_assign_unassigned_resources() allocate the new
+ addresses. */
+ for_each_pci_dev(dev) {
r = &dev->resource[PCI_ROM_RESOURCE];
- r->end -= r->start;
- r->start = 0;
- if (r->end)
- pci_assign_resource(dev, PCI_ROM_RESOURCE);
+ if (!r->flags || !r->start)
+ continue;
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0) {
+ r->end -= r->start;
+ r->start = 0;
+ }
}
}
+
+ pci_assign_unassigned_resources();
+
return 0;
}
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index 78ca1ecbb90..86348b68fda 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -56,6 +56,7 @@ struct irq_router_handler {
};
int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
+void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
/*
* Check passed address for the PCI IRQ Routing Table signature
@@ -550,6 +551,13 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
{
/* FIXME: We should move some of the quirk fixup stuff here */
+
+ if (router->device == PCI_DEVICE_ID_VIA_82C686 &&
+ device == PCI_DEVICE_ID_VIA_82C586_0) {
+ /* Asus k7m bios wrongly reports 82C686A as 586-compatible */
+ device = PCI_DEVICE_ID_VIA_82C686;
+ }
+
switch(device)
{
case PCI_DEVICE_ID_VIA_82C586_0:
@@ -1051,24 +1059,28 @@ static int __init pcibios_irq_init(void)
subsys_initcall(pcibios_irq_init);
-static void pirq_penalize_isa_irq(int irq)
+static void pirq_penalize_isa_irq(int irq, int active)
{
/*
* If any ISAPnP device reports an IRQ in its list of possible
* IRQ's, we try to avoid assigning it to PCI devices.
*/
- if (irq < 16)
- pirq_penalty[irq] += 100;
+ if (irq < 16) {
+ if (active)
+ pirq_penalty[irq] += 1000;
+ else
+ pirq_penalty[irq] += 100;
+ }
}
-void pcibios_penalize_isa_irq(int irq)
+void pcibios_penalize_isa_irq(int irq, int active)
{
#ifdef CONFIG_ACPI_PCI
if (!acpi_noirq)
- acpi_penalize_isa_irq(irq);
+ acpi_penalize_isa_irq(irq, active);
else
#endif
- pirq_penalize_isa_irq(irq);
+ pirq_penalize_isa_irq(irq, active);
}
static int pirq_enable_irq(struct pci_dev *dev)
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index a80f0f55ff5..127d53ad16b 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -73,3 +73,4 @@ extern int pcibios_scanned;
extern spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
+extern void (*pcibios_disable_irq)(struct pci_dev *dev);
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
index 6a924878443..6c17433fdf7 100644
--- a/arch/i386/pci/visws.c
+++ b/arch/i386/pci/visws.c
@@ -18,10 +18,12 @@
extern struct pci_raw_ops pci_direct_conf1;
static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
+static void pci_visws_disable_irq(struct pci_dev *dev) { }
int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq;
+void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq;
-void __init pcibios_penalize_isa_irq(int irq) {}
+void __init pcibios_penalize_isa_irq(int irq, int active) {}
unsigned int pci_bus0, pci_bus1;
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index c547c1af6fa..7b0b9ad848e 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -42,25 +42,25 @@ void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
- asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
- asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
- asm volatile ("str %0" : "=m" (ctxt->tr));
+ store_gdt(&ctxt->gdt_limit);
+ store_idt(&ctxt->idt_limit);
+ store_tr(ctxt->tr);
/*
* segment registers
*/
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+ savesegment(es, ctxt->es);
+ savesegment(fs, ctxt->fs);
+ savesegment(gs, ctxt->gs);
+ savesegment(ss, ctxt->ss);
/*
* control registers
*/
- asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0));
- asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2));
- asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3));
- asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4));
+ ctxt->cr0 = read_cr0();
+ ctxt->cr2 = read_cr2();
+ ctxt->cr3 = read_cr3();
+ ctxt->cr4 = read_cr4();
}
void save_processor_state(void)
@@ -84,7 +84,6 @@ static void fix_processor_context(void)
struct tss_struct * t = &per_cpu(init_tss, cpu);
set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
- per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
@@ -109,25 +108,25 @@ void __restore_processor_state(struct saved_context *ctxt)
/*
* control registers
*/
- asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4));
- asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3));
- asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2));
- asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0));
+ write_cr4(ctxt->cr4);
+ write_cr3(ctxt->cr3);
+ write_cr2(ctxt->cr2);
+ write_cr2(ctxt->cr0);
/*
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
- asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
- asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+ load_gdt(&ctxt->gdt_limit);
+ load_idt(&ctxt->idt_limit);
/*
* segment registers
*/
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
- asm volatile ("movw %0, %%gs" :: "r" (ctxt->gs));
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
+ loadsegment(es, ctxt->es);
+ loadsegment(fs, ctxt->fs);
+ loadsegment(gs, ctxt->gs);
+ loadsegment(ss, ctxt->ss);
/*
* sysenter MSRs