summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/boot.c44
-rw-r--r--arch/i386/kernel/acpi/sleep.c12
-rw-r--r--arch/i386/kernel/acpi/wakeup.S37
-rw-r--r--arch/i386/kernel/alternative.c69
-rw-r--r--arch/i386/kernel/apic.c10
-rw-r--r--arch/i386/kernel/apm.c2
-rw-r--r--arch/i386/kernel/asm-offsets.c29
-rw-r--r--arch/i386/kernel/cpu/Makefile3
-rw-r--r--arch/i386/kernel/cpu/addon_cpuid_features.c50
-rw-r--r--arch/i386/kernel/cpu/amd.c11
-rw-r--r--arch/i386/kernel/cpu/common.c4
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig31
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c17
-rw-r--r--arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c6
-rw-r--r--arch/i386/kernel/cpu/cpufreq/gx-suspmod.c6
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c216
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h12
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c29
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c276
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-ich.c4
-rw-r--r--arch/i386/kernel/cpu/cyrix.c2
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c79
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c14
-rw-r--r--arch/i386/kernel/cpu/mcheck/non-fatal.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c6
-rw-r--r--arch/i386/kernel/cpu/mtrr/cyrix.c1
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/state.c1
-rw-r--r--arch/i386/kernel/cpu/perfctr-watchdog.c10
-rw-r--r--arch/i386/kernel/cpu/proc.c21
-rw-r--r--arch/i386/kernel/cpu/rise.c52
-rw-r--r--arch/i386/kernel/e820.c34
-rw-r--r--arch/i386/kernel/efi.c2
-rw-r--r--arch/i386/kernel/entry.S87
-rw-r--r--arch/i386/kernel/geode.c155
-rw-r--r--arch/i386/kernel/head.S13
-rw-r--r--arch/i386/kernel/hpet.c98
-rw-r--r--arch/i386/kernel/i8253.c32
-rw-r--r--arch/i386/kernel/init_task.c2
-rw-r--r--arch/i386/kernel/io_apic.c27
-rw-r--r--arch/i386/kernel/irq.c10
-rw-r--r--arch/i386/kernel/kprobes.c9
-rw-r--r--arch/i386/kernel/nmi.c10
-rw-r--r--arch/i386/kernel/paravirt.c55
-rw-r--r--arch/i386/kernel/process.c85
-rw-r--r--arch/i386/kernel/ptrace.c39
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/setup.c25
-rw-r--r--arch/i386/kernel/signal.c7
-rw-r--r--arch/i386/kernel/smp.c5
-rw-r--r--arch/i386/kernel/smpboot.c8
-rw-r--r--arch/i386/kernel/smpcommon.c8
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/sysenter.c4
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/traps.c53
-rw-r--r--arch/i386/kernel/tsc.c27
-rw-r--r--arch/i386/kernel/verify_cpu.S94
-rw-r--r--arch/i386/kernel/vmi.c4
-rw-r--r--arch/i386/kernel/vmiclock.c8
-rw-r--r--arch/i386/kernel/vmlinux.lds.S8
-rw-r--r--arch/i386/kernel/vsyscall-note.S52
64 files changed, 1167 insertions, 917 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 06da59f6f83..dbe5e87e0d6 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
+obj-$(CONFIG_MGEODE_LX) += geode.o
obj-$(CONFIG_VMI) += vmi.o vmiclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index a574cd2c8b6..cacdd883bf2 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -618,6 +618,8 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table)
#ifdef CONFIG_HPET_TIMER
#include <asm/hpet.h>
+static struct __initdata resource *hpet_res;
+
static int __init acpi_parse_hpet(struct acpi_table_header *table)
{
struct acpi_table_hpet *hpet_tbl;
@@ -638,8 +640,42 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, hpet_address);
+ /*
+ * Allocate and initialize the HPET firmware resource for adding into
+ * the resource tree during the lateinit timeframe.
+ */
+#define HPET_RESOURCE_NAME_SIZE 9
+ hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
+
+ if (!hpet_res)
+ return 0;
+
+ memset(hpet_res, 0, sizeof(*hpet_res));
+ hpet_res->name = (void *)&hpet_res[1];
+ hpet_res->flags = IORESOURCE_MEM;
+ snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
+ hpet_tbl->sequence);
+
+ hpet_res->start = hpet_address;
+ hpet_res->end = hpet_address + (1 * 1024) - 1;
+
return 0;
}
+
+/*
+ * hpet_insert_resource inserts the HPET resources used into the resource
+ * tree.
+ */
+static __init int hpet_insert_resource(void)
+{
+ if (!hpet_res)
+ return 1;
+
+ return insert_resource(&iomem_resource, hpet_res);
+}
+
+late_initcall(hpet_insert_resource);
+
#else
#define acpi_parse_hpet NULL
#endif
@@ -950,14 +986,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
},
{
.callback = force_acpi_ht,
- .ident = "DELL GX240",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
- },
- },
- {
- .callback = force_acpi_ht,
.ident = "HP VISUALIZE NT Workstation",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 4ee83577bf6..c42b5ab49de 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -14,7 +14,7 @@
/* address in low memory of the wakeup routine. */
unsigned long acpi_wakeup_address = 0;
-unsigned long acpi_video_flags;
+unsigned long acpi_realmode_flags;
extern char wakeup_start, wakeup_end;
extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
@@ -68,9 +68,11 @@ static int __init acpi_sleep_setup(char *str)
{
while ((str != NULL) && (*str != '\0')) {
if (strncmp(str, "s3_bios", 7) == 0)
- acpi_video_flags = 1;
+ acpi_realmode_flags |= 1;
if (strncmp(str, "s3_mode", 7) == 0)
- acpi_video_flags |= 2;
+ acpi_realmode_flags |= 2;
+ if (strncmp(str, "s3_beep", 7) == 0)
+ acpi_realmode_flags |= 4;
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
@@ -80,9 +82,11 @@ static int __init acpi_sleep_setup(char *str)
__setup("acpi_sleep=", acpi_sleep_setup);
+/* Ouch, we want to delete this. We already have better version in userspace, in
+ s2ram from suspend.sf.net project */
static __init int reset_videomode_after_s3(struct dmi_system_id *d)
{
- acpi_video_flags |= 2;
+ acpi_realmode_flags |= 2;
return 0;
}
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index a2295a34b2c..ed0a0f2c159 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -13,6 +13,21 @@
# cs = 0x1234, eip = 0x05
#
+#define BEEP \
+ inb $97, %al; \
+ outb %al, $0x80; \
+ movb $3, %al; \
+ outb %al, $97; \
+ outb %al, $0x80; \
+ movb $-74, %al; \
+ outb %al, $67; \
+ outb %al, $0x80; \
+ movb $-119, %al; \
+ outb %al, $66; \
+ outb %al, $0x80; \
+ movb $15, %al; \
+ outb %al, $66;
+
ALIGN
.align 4096
ENTRY(wakeup_start)
@@ -31,6 +46,11 @@ wakeup_code:
movw %cs, %ax
movw %ax, %ds # Make ds:0 point to wakeup_start
movw %ax, %ss
+
+ testl $4, realmode_flags - wakeup_code
+ jz 1f
+ BEEP
+1:
mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
movw $0x0e00 + 'S', %fs:(0x12)
@@ -41,7 +61,7 @@ wakeup_code:
cmpl $0x12345678, %eax
jne bogus_real_magic
- testl $1, video_flags - wakeup_code
+ testl $1, realmode_flags - wakeup_code
jz 1f
lcall $0xc000,$3
movw %cs, %ax
@@ -49,7 +69,7 @@ wakeup_code:
movw %ax, %ss
1:
- testl $2, video_flags - wakeup_code
+ testl $2, realmode_flags - wakeup_code
jz 1f
mov video_mode - wakeup_code, %ax
call mode_set
@@ -88,7 +108,11 @@ wakeup_code:
cmpl $0x12345678, %eax
jne bogus_real_magic
- ljmpl $__KERNEL_CS,$wakeup_pmode_return
+ testl $8, realmode_flags - wakeup_code
+ jz 1f
+ BEEP
+1:
+ ljmpl $__KERNEL_CS, $wakeup_pmode_return
real_save_gdt: .word 0
.long 0
@@ -97,7 +121,8 @@ real_save_cr3: .long 0
real_save_cr4: .long 0
real_magic: .long 0
video_mode: .long 0
-video_flags: .long 0
+realmode_flags: .long 0
+beep_flags: .long 0
real_efer_save_restore: .long 0
real_save_efer_edx: .long 0
real_save_efer_eax: .long 0
@@ -260,8 +285,8 @@ ENTRY(acpi_copy_wakeup_routine)
movl saved_videomode, %edx
movl %edx, video_mode - wakeup_start (%eax)
- movl acpi_video_flags, %edx
- movl %edx, video_flags - wakeup_start (%eax)
+ movl acpi_realmode_flags, %edx
+ movl %edx, realmode_flags - wakeup_start (%eax)
movl $0x12345678, real_magic - wakeup_start (%eax)
movl $0x12345678, saved_magic
popl %ebx
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index d8cda14fff8..c3750c2c411 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -2,12 +2,17 @@
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <asm/alternative.h>
#include <asm/sections.h>
+#include <asm/pgtable.h>
+#include <asm/mce.h>
+#include <asm/nmi.h>
-static int noreplace_smp = 0;
-static int smp_alt_once = 0;
-static int debug_alternative = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int smp_alt_once;
static int __init bootonly(char *str)
{
@@ -15,6 +20,11 @@ static int __init bootonly(char *str)
return 1;
}
__setup("smp-alt-boot", bootonly);
+#else
+#define smp_alt_once 1
+#endif
+
+static int debug_alternative;
static int __init debug_alt(char *str)
{
@@ -23,6 +33,8 @@ static int __init debug_alt(char *str)
}
__setup("debug-alternative", debug_alt);
+static int noreplace_smp;
+
static int __init setup_noreplace_smp(char *str)
{
noreplace_smp = 1;
@@ -144,7 +156,7 @@ static void nop_out(void *insns, unsigned int len)
unsigned int noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
- memcpy(insns, noptable[noplen], noplen);
+ text_poke(insns, noptable[noplen], noplen);
insns += noplen;
len -= noplen;
}
@@ -196,7 +208,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
continue;
if (*ptr > text_end)
continue;
- **ptr = 0xf0; /* lock prefix */
+ text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
};
}
@@ -354,10 +366,6 @@ void apply_paravirt(struct paravirt_patch_site *start,
/* Pad the rest with nops */
nop_out(p->instr + used, p->len - used);
}
-
- /* Sync to be conservative, in case we patched following
- * instructions */
- sync_core();
}
extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
@@ -367,6 +375,14 @@ void __init alternative_instructions(void)
{
unsigned long flags;
+ /* The patching is not fully atomic, so try to avoid local interruptions
+ that might execute the to be patched code.
+ Other CPUs are not running. */
+ stop_nmi();
+#ifdef CONFIG_MCE
+ stop_mce();
+#endif
+
local_irq_save(flags);
apply_alternatives(__alt_instructions, __alt_instructions_end);
@@ -376,8 +392,6 @@ void __init alternative_instructions(void)
#ifdef CONFIG_HOTPLUG_CPU
if (num_possible_cpus() < 2)
smp_alt_once = 1;
-#else
- smp_alt_once = 1;
#endif
#ifdef CONFIG_SMP
@@ -401,4 +415,37 @@ void __init alternative_instructions(void)
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
local_irq_restore(flags);
+
+ restart_nmi();
+#ifdef CONFIG_MCE
+ restart_mce();
+#endif
+}
+
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these instructions.
+ * And on the local CPU you need to be protected again NMI or MCE handlers
+ * seeing an inconsistent instruction while you patch.
+ */
+void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len)
+{
+ u8 *addr = oaddr;
+ if (!pte_write(*lookup_address((unsigned long)addr))) {
+ struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
+ addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
+ if (!addr)
+ return;
+ addr += ((unsigned long)oaddr) % PAGE_SIZE;
+ }
+ memcpy(addr, opcode, len);
+ sync_core();
+ /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
+ case. */
+ if (cpu_has_clflush)
+ asm("clflush (%0) " :: "r" (oaddr) : "memory");
+ if (addr != oaddr)
+ vunmap(addr);
}
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 67824f3bb97..bfc6cb7df7e 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write_around(APIC_LVTT, v);
break;
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here */
+ break;
}
local_irq_restore(flags);
@@ -315,7 +318,7 @@ static void __devinit setup_APIC_timer(void)
#define LAPIC_CAL_LOOPS (HZ/10)
-static __initdata volatile int lapic_cal_loops = -1;
+static __initdata int lapic_cal_loops = -1;
static __initdata long lapic_cal_t1, lapic_cal_t2;
static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
@@ -485,7 +488,7 @@ void __init setup_boot_APIC_clock(void)
/* Let the interrupts run */
local_irq_enable();
- while(lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
cpu_relax();
local_irq_disable();
@@ -521,6 +524,9 @@ void __init setup_boot_APIC_clock(void)
*/
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ else
+ printk(KERN_WARNING "APIC timer registered as dummy,"
+ " due to nmi_watchdog=1!\n");
}
/* Setup the lapic or request the broadcast */
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 4112afe712b..47001d50a08 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -222,6 +222,7 @@
#include <linux/capability.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/freezer.h>
#include <linux/smp.h>
#include <linux/dmi.h>
#include <linux/suspend.h>
@@ -2311,7 +2312,6 @@ static int __init apm_init(void)
remove_proc_entry("apm", NULL);
return err;
}
- kapmd_task->flags |= PF_NOFREEZE;
wake_up_process(kapmd_task);
if (num_online_cpus() > 1 && !smp ) {
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 27a776c9044..7288ac88d74 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -17,6 +17,13 @@
#include <asm/thread_info.h>
#include <asm/elf.h>
+#include <xen/interface/xen.h>
+
+#ifdef CONFIG_LGUEST_GUEST
+#include <linux/lguest.h>
+#include "../../../drivers/lguest/lg.h"
+#endif
+
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -59,6 +66,7 @@ void foo(void)
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ OFFSET(TI_cpu, thread_info, cpu);
BLANK();
OFFSET(GDS_size, Xgt_desc_struct, size);
@@ -115,4 +123,25 @@ void foo(void)
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+#ifdef CONFIG_LGUEST_GUEST
+ BLANK();
+ OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+ OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
+ OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
+ OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
+ OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp);
+ OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc);
+ OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc);
+ OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt);
+ OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum);
+ OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
+ OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
+#endif
}
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 74f27a463db..778396c78d6 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -8,8 +8,7 @@ obj-y += amd.o
obj-y += cyrix.o
obj-y += centaur.o
obj-y += transmeta.o
-obj-y += intel.o intel_cacheinfo.o
-obj-y += rise.o
+obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o
obj-y += nexgen.o
obj-y += umc.o
diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c
new file mode 100644
index 00000000000..3e91d3ee26e
--- /dev/null
+++ b/arch/i386/kernel/cpu/addon_cpuid_features.c
@@ -0,0 +1,50 @@
+
+/*
+ * Routines to indentify additional cpu features that are scattered in
+ * cpuid space.
+ */
+
+#include <linux/cpu.h>
+
+#include <asm/processor.h>
+
+struct cpuid_bit {
+ u16 feature;
+ u8 reg;
+ u8 bit;
+ u32 level;
+};
+
+enum cpuid_regs {
+ CR_EAX = 0,
+ CR_ECX,
+ CR_EDX,
+ CR_EBX
+};
+
+void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
+{
+ u32 max_level;
+ u32 regs[4];
+ const struct cpuid_bit *cb;
+
+ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { 0, 0, 0, 0 }
+ };
+
+ for (cb = cpuid_bits; cb->feature; cb++) {
+
+ /* Verify that the level is valid */
+ max_level = cpuid_eax(cb->level & 0xffff0000);
+ if (max_level < cb->level ||
+ max_level > (cb->level | 0xffff))
+ continue;
+
+ cpuid(cb->level, &regs[CR_EAX], &regs[CR_EBX],
+ &regs[CR_ECX], &regs[CR_EDX]);
+
+ if (regs[cb->reg] & (1 << cb->bit))
+ set_bit(cb->feature, c->x86_capability);
+ }
+}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 6f47eeeb93e..c7ba455d5ac 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -231,6 +231,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
switch (c->x86) {
case 15:
+ /* Use K8 tuning for Fam10h and Fam11h */
+ case 0x10:
+ case 0x11:
set_bit(X86_FEATURE_K8, c->x86_capability);
break;
case 6:
@@ -272,8 +275,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
#endif
- if (cpuid_eax(0x80000000) >= 0x80000006)
- num_cache_leaves = 3;
+ if (cpuid_eax(0x80000000) >= 0x80000006) {
+ if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000))
+ num_cache_leaves = 4;
+ else
+ num_cache_leaves = 3;
+ }
if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 794d593c47e..d506201d397 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -353,6 +353,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
if ( xlvl >= 0x80000004 )
get_model_name(c); /* Default name */
}
+
+ init_scattered_cpuid_features(c);
}
early_intel_workaround(c);
@@ -604,7 +606,6 @@ extern int nsc_init_cpu(void);
extern int amd_init_cpu(void);
extern int centaur_init_cpu(void);
extern int transmeta_init_cpu(void);
-extern int rise_init_cpu(void);
extern int nexgen_init_cpu(void);
extern int umc_init_cpu(void);
@@ -616,7 +617,6 @@ void __init early_cpu_init(void)
amd_init_cpu();
centaur_init_cpu();
transmeta_init_cpu();
- rise_init_cpu();
nexgen_init_cpu();
umc_init_cpu();
early_cpu_detect();
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index e912aae9473..094118ba00d 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -90,10 +90,17 @@ config X86_POWERNOW_K8
If in doubt, say N.
config X86_POWERNOW_K8_ACPI
- bool
- depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
+ bool "ACPI Support"
+ select ACPI_PROCESSOR
+ depends on X86_POWERNOW_K8
default y
+ help
+ This provides access to the K8s Processor Performance States via ACPI.
+ This driver is probably required for CPUFreq to work with multi-socket and
+ SMP systems. It is not required on at least some single-socket yet
+ multi-core systems, even if SMP is enabled.
+
+ It is safe to say Y here.
config X86_GX_SUSPMOD
tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
@@ -109,7 +116,7 @@ config X86_GX_SUSPMOD
config X86_SPEEDSTEP_CENTRINO
tristate "Intel Enhanced SpeedStep"
select CPU_FREQ_TABLE
- select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI)
+ select X86_SPEEDSTEP_CENTRINO_TABLE
help
This adds the CPUFreq driver for Enhanced SpeedStep enabled
mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However,
@@ -121,20 +128,6 @@ config X86_SPEEDSTEP_CENTRINO
If in doubt, say N.
-config X86_SPEEDSTEP_CENTRINO_ACPI
- bool "Use ACPI tables to decode valid frequency/voltage (deprecated)"
- depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
- depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
- help
- This is deprecated and this functionality is now merged into
- acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
- speedstep_centrino.
- Use primarily the information provided in the BIOS ACPI tables
- to determine valid CPU frequency and voltage pairings. It is
- required for the driver to work on non-Banias CPUs.
-
- If in doubt, say Y.
-
config X86_SPEEDSTEP_CENTRINO_TABLE
bool "Built-in tables for Banias CPUs"
depends on X86_SPEEDSTEP_CENTRINO
@@ -230,7 +223,7 @@ comment "shared options"
config X86_ACPI_CPUFREQ_PROC_INTF
bool "/proc/acpi/processor/../performance interface (deprecated)"
depends on PROC_FS
- depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
+ depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI
help
This enables the deprecated /proc/acpi/processor/../performance
interface. While it is helpful for debugging, the generic,
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 10baa3501ed..6f846bee210 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -167,11 +167,13 @@ static void do_drv_read(struct drv_cmd *cmd)
static void do_drv_write(struct drv_cmd *cmd)
{
- u32 h = 0;
+ u32 lo, hi;
switch (cmd->type) {
case SYSTEM_INTEL_MSR_CAPABLE:
- wrmsr(cmd->addr.msr.reg, cmd->val, h);
+ rdmsr(cmd->addr.msr.reg, lo, hi);
+ lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
+ wrmsr(cmd->addr.msr.reg, lo, hi);
break;
case SYSTEM_IO_CAPABLE:
acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
@@ -372,7 +374,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
struct cpufreq_freqs freqs;
cpumask_t online_policy_cpus;
struct drv_cmd cmd;
- unsigned int msr;
unsigned int next_state = 0; /* Index into freq_table */
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
@@ -417,11 +418,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
- msr =
- (u32) perf->states[next_perf_state].
- control & INTEL_MSR_RANGE;
- cmd.val = get_cur_val(online_policy_cpus);
- cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr;
+ cmd.val = (u32) perf->states[next_perf_state].control;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
@@ -668,8 +665,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
for (i=0; i<perf->state_count; i++) {
- if (i>0 && perf->states[i].core_frequency ==
- perf->states[i-1].core_frequency)
+ if (i>0 && perf->states[i].core_frequency >=
+ data->freq_table[valid_states-1].frequency / 1000)
continue;
data->freq_table[valid_states].index = i;
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 0d49d73d1b7..66acd503991 100644
--- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -391,8 +391,6 @@ static struct cpufreq_driver nforce2_driver = {
*/
static unsigned int nforce2_detect_chipset(void)
{
- u8 revision;
-
nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NFORCE2,
PCI_ANY_ID, PCI_ANY_ID, NULL);
@@ -400,10 +398,8 @@ static unsigned int nforce2_detect_chipset(void)
if (nforce2_chipset_dev == NULL)
return -ENODEV;
- pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision);
-
printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
- revision);
+ nforce2_chipset_dev->revision);
printk(KERN_INFO
"cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
index 6667e9cceb9..461dabc4e49 100644
--- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c
@@ -79,7 +79,7 @@
#include <linux/smp.h>
#include <linux/cpufreq.h>
#include <linux/pci.h>
-#include <asm/processor.h>
+#include <asm/processor-cyrix.h>
#include <asm/errno.h>
/* PCI config registers, all at F0 */
@@ -115,7 +115,6 @@ struct gxfreq_params {
u8 pci_suscfg;
u8 pci_pmer1;
u8 pci_pmer2;
- u8 pci_rev;
struct pci_dev *cs55x0;
};
@@ -276,7 +275,7 @@ static void gx_set_cpuspeed(unsigned int khz)
pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
- if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */
+ if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */
suscfg = gx_params->pci_suscfg | SUSMOD;
} else { /* CS5530A,B.. */
suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
@@ -471,7 +470,6 @@ static int __init cpufreq_gx_init(void)
pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
- pci_read_config_byte(params->cs55x0, PCI_REVISION_ID, &params->pci_rev);
if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
kfree(params);
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index a3df9c039bd..ef8f0bc3fc7 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -29,6 +29,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/delay.h>
#include <asm/msr.h>
#include <asm/timex.h>
@@ -55,7 +56,6 @@
/* Flags */
#define USE_ACPI_C3 (1 << 1)
#define USE_NORTHBRIDGE (1 << 2)
-#define USE_VT8235 (1 << 3)
static int cpu_model;
static unsigned int numscales=16;
@@ -63,22 +63,19 @@ static unsigned int fsb;
static const struct mV_pos *vrm_mV_table;
static const unsigned char *mV_vrm_table;
-struct f_msr {
- u8 vrm;
- u8 pos;
-};
-static struct f_msr f_msr_table[32];
static unsigned int highest_speed, lowest_speed; /* kHz */
static unsigned int minmult, maxmult;
static int can_scale_voltage;
static struct acpi_processor *pr = NULL;
static struct acpi_processor_cx *cx = NULL;
+static u32 acpi_regs_addr;
static u8 longhaul_flags;
-static u8 longhaul_pos;
+static unsigned int longhaul_index;
/* Module parameters */
static int scale_voltage;
+static int disable_acpi_c3;
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
@@ -144,7 +141,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
rdmsrl(MSR_VIA_BCR2, bcr2.val);
/* Enable software clock multiplier */
bcr2.bits.ESOFTBF = 1;
- bcr2.bits.CLOCKMUL = clock_ratio_index;
+ bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff;
/* Sync to timer tick */
safe_halt();
@@ -163,14 +160,12 @@ static void do_longhaul1(unsigned int clock_ratio_index)
/* For processor with Longhaul MSR */
-static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
+static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
+ unsigned int dir)
{
union msr_longhaul longhaul;
- u8 dest_pos;
u32 t;
- dest_pos = f_msr_table[clock_ratio_index].pos;
-
rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
/* Setup new frequency */
longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
@@ -178,11 +173,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
/* Setup new voltage */
if (can_scale_voltage)
- longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+ longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f;
/* Sync to timer tick */
safe_halt();
/* Raise voltage if necessary */
- if (can_scale_voltage && longhaul_pos < dest_pos) {
+ if (can_scale_voltage && dir) {
longhaul.bits.EnableSoftVID = 1;
wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
/* Change voltage */
@@ -199,7 +194,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
}
longhaul.bits.EnableSoftVID = 0;
wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
- longhaul_pos = dest_pos;
}
/* Change frequency on next halt or sleep */
@@ -220,7 +214,7 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
/* Reduce voltage if necessary */
- if (can_scale_voltage && longhaul_pos > dest_pos) {
+ if (can_scale_voltage && !dir) {
longhaul.bits.EnableSoftVID = 1;
wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
/* Change voltage */
@@ -237,7 +231,6 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
}
longhaul.bits.EnableSoftVID = 0;
wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
- longhaul_pos = dest_pos;
}
}
@@ -248,25 +241,28 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
* Sets a new clock ratio.
*/
-static void longhaul_setstate(unsigned int clock_ratio_index)
+static void longhaul_setstate(unsigned int table_index)
{
+ unsigned int clock_ratio_index;
int speed, mult;
struct cpufreq_freqs freqs;
- static unsigned int old_ratio=-1;
unsigned long flags;
unsigned int pic1_mask, pic2_mask;
+ u16 bm_status = 0;
+ u32 bm_timeout = 1000;
+ unsigned int dir = 0;
- if (old_ratio == clock_ratio_index)
- return;
- old_ratio = clock_ratio_index;
-
- mult = clock_ratio[clock_ratio_index];
+ clock_ratio_index = longhaul_table[table_index].index;
+ /* Safety precautions */
+ mult = clock_ratio[clock_ratio_index & 0x1f];
if (mult == -1)
return;
-
speed = calc_speed(mult);
if ((speed > highest_speed) || (speed < lowest_speed))
return;
+ /* Voltage transition before frequency transition? */
+ if (can_scale_voltage && longhaul_index < table_index)
+ dir = 1;
freqs.old = calc_speed(longhaul_get_cpu_mult());
freqs.new = speed;
@@ -285,11 +281,24 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
outb(0xFF,0xA1); /* Overkill */
outb(0xFE,0x21); /* TMR0 only */
+ /* Wait while PCI bus is busy. */
+ if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
+ || ((pr != NULL) && pr->flags.bm_control))) {
+ bm_status = inw(acpi_regs_addr);
+ bm_status &= 1 << 4;
+ while (bm_status && bm_timeout) {
+ outw(1 << 4, acpi_regs_addr);
+ bm_timeout--;
+ bm_status = inw(acpi_regs_addr);
+ bm_status &= 1 << 4;
+ }
+ }
+
if (longhaul_flags & USE_NORTHBRIDGE) {
/* Disable AGP and PCI arbiters */
outb(3, 0x22);
} else if ((pr != NULL) && pr->flags.bm_control) {
- /* Disable bus master arbitration */
+ /* Disable bus master arbitration */
acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
}
switch (longhaul_version) {
@@ -314,9 +323,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
if (longhaul_flags & USE_ACPI_C3) {
/* Don't allow wakeup */
acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
- do_powersaver(cx->address, clock_ratio_index);
+ do_powersaver(cx->address, clock_ratio_index, dir);
} else {
- do_powersaver(0, clock_ratio_index);
+ do_powersaver(0, clock_ratio_index, dir);
}
break;
}
@@ -336,6 +345,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
freqs.new = calc_speed(longhaul_get_cpu_mult());
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+ if (!bm_timeout)
+ printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n");
}
/*
@@ -369,7 +381,8 @@ static int guess_fsb(int mult)
static int __init longhaul_get_ranges(void)
{
- unsigned int j, k = 0;
+ unsigned int i, j, k = 0;
+ unsigned int ratio;
int mult;
/* Get current frequency */
@@ -423,8 +436,7 @@ static int __init longhaul_get_ranges(void)
if(!longhaul_table)
return -ENOMEM;
- for (j=0; j < numscales; j++) {
- unsigned int ratio;
+ for (j = 0; j < numscales; j++) {
ratio = clock_ratio[j];
if (ratio == -1)
continue;
@@ -434,13 +446,41 @@ static int __init longhaul_get_ranges(void)
longhaul_table[k].index = j;
k++;
}
+ if (k <= 1) {
+ kfree(longhaul_table);
+ return -ENODEV;
+ }
+ /* Sort */
+ for (j = 0; j < k - 1; j++) {
+ unsigned int min_f, min_i;
+ min_f = longhaul_table[j].frequency;
+ min_i = j;
+ for (i = j + 1; i < k; i++) {
+ if (longhaul_table[i].frequency < min_f) {
+ min_f = longhaul_table[i].frequency;
+ min_i = i;
+ }
+ }
+ if (min_i != j) {
+ unsigned int temp;
+ temp = longhaul_table[j].frequency;
+ longhaul_table[j].frequency = longhaul_table[min_i].frequency;
+ longhaul_table[min_i].frequency = temp;
+ temp = longhaul_table[j].index;
+ longhaul_table[j].index = longhaul_table[min_i].index;
+ longhaul_table[min_i].index = temp;
+ }
+ }
longhaul_table[k].frequency = CPUFREQ_TABLE_END;
- if (!k) {
- kfree (longhaul_table);
- return -EINVAL;
- }
+ /* Find index we are running on */
+ for (j = 0; j < k; j++) {
+ if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) {
+ longhaul_index = j;
+ break;
+ }
+ }
return 0;
}
@@ -448,7 +488,7 @@ static int __init longhaul_get_ranges(void)
static void __init longhaul_setup_voltagescaling(void)
{
union msr_longhaul longhaul;
- struct mV_pos minvid, maxvid;
+ struct mV_pos minvid, maxvid, vid;
unsigned int j, speed, pos, kHz_step, numvscales;
int min_vid_speed;
@@ -459,11 +499,11 @@ static void __init longhaul_setup_voltagescaling(void)
}
if (!longhaul.bits.VRMRev) {
- printk (KERN_INFO PFX "VRM 8.5\n");
+ printk(KERN_INFO PFX "VRM 8.5\n");
vrm_mV_table = &vrm85_mV[0];
mV_vrm_table = &mV_vrm85[0];
} else {
- printk (KERN_INFO PFX "Mobile VRM\n");
+ printk(KERN_INFO PFX "Mobile VRM\n");
if (cpu_model < CPU_NEHEMIAH)
return;
vrm_mV_table = &mobilevrm_mV[0];
@@ -523,7 +563,6 @@ static void __init longhaul_setup_voltagescaling(void)
/* Calculate kHz for one voltage step */
kHz_step = (highest_speed - min_vid_speed) / numvscales;
-
j = 0;
while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
speed = longhaul_table[j].frequency;
@@ -531,15 +570,14 @@ static void __init longhaul_setup_voltagescaling(void)
pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
else
pos = minvid.pos;
- f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
- f_msr_table[longhaul_table[j].index].pos = pos;
+ longhaul_table[j].index |= mV_vrm_table[pos] << 8;
+ vid = vrm_mV_table[mV_vrm_table[pos]];
+ printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV);
j++;
}
- longhaul_pos = maxvid.pos;
can_scale_voltage = 1;
- printk(KERN_INFO PFX "Voltage scaling enabled. "
- "Use of \"conservative\" governor is highly recommended.\n");
+ printk(KERN_INFO PFX "Voltage scaling enabled.\n");
}
@@ -553,15 +591,44 @@ static int longhaul_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
unsigned int table_index = 0;
- unsigned int new_clock_ratio = 0;
+ unsigned int i;
+ unsigned int dir = 0;
+ u8 vid, current_vid;
if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
return -EINVAL;
- new_clock_ratio = longhaul_table[table_index].index & 0xFF;
-
- longhaul_setstate(new_clock_ratio);
+ /* Don't set same frequency again */
+ if (longhaul_index == table_index)
+ return 0;
+ if (!can_scale_voltage)
+ longhaul_setstate(table_index);
+ else {
+ /* On test system voltage transitions exceeding single
+ * step up or down were turning motherboard off. Both
+ * "ondemand" and "userspace" are unsafe. C7 is doing
+ * this in hardware, C3 is old and we need to do this
+ * in software. */
+ i = longhaul_index;
+ current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f;
+ if (table_index > longhaul_index)
+ dir = 1;
+ while (i != table_index) {
+ vid = (longhaul_table[i].index >> 8) & 0x1f;
+ if (vid != current_vid) {
+ longhaul_setstate(i);
+ current_vid = vid;
+ msleep(200);
+ }
+ if (dir)
+ i++;
+ else
+ i--;
+ }
+ longhaul_setstate(table_index);
+ }
+ longhaul_index = table_index;
return 0;
}
@@ -590,11 +657,10 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
static int enable_arbiter_disable(void)
{
struct pci_dev *dev;
- int status;
+ int status = 1;
int reg;
u8 pci_cmd;
- status = 1;
/* Find PLE133 host bridge */
reg = 0x78;
dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
@@ -627,13 +693,17 @@ static int enable_arbiter_disable(void)
return 0;
}
-static int longhaul_setup_vt8235(void)
+static int longhaul_setup_southbridge(void)
{
struct pci_dev *dev;
u8 pci_cmd;
/* Find VT8235 southbridge */
dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
+ if (dev == NULL)
+ /* Find VT8237 southbridge */
+ dev = pci_get_device(PCI_VENDOR_ID_VIA,
+ PCI_DEVICE_ID_VIA_8237, NULL);
if (dev != NULL) {
/* Set transition time to max */
pci_read_config_byte(dev, 0xec, &pci_cmd);
@@ -645,6 +715,14 @@ static int longhaul_setup_vt8235(void)
pci_read_config_byte(dev, 0xe5, &pci_cmd);
pci_cmd |= 1 << 7;
pci_write_config_byte(dev, 0xe5, pci_cmd);
+ /* Get address of ACPI registers block*/
+ pci_read_config_byte(dev, 0x81, &pci_cmd);
+ if (pci_cmd & 1 << 7) {
+ pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
+ acpi_regs_addr &= 0xff00;
+ printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
+ }
+
pci_dev_put(dev);
return 1;
}
@@ -657,7 +735,6 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
char *cpuname=NULL;
int ret;
u32 lo, hi;
- int vt8235_present;
/* Check what we have on this motherboard */
switch (c->x86_model) {
@@ -755,7 +832,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
};
/* Doesn't hurt */
- vt8235_present = longhaul_setup_vt8235();
+ longhaul_setup_southbridge();
/* Find ACPI data for processor */
acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
@@ -765,35 +842,29 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
/* Check ACPI support for C3 state */
if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
cx = &pr->power.states[ACPI_STATE_C3];
- if (cx->address > 0 && cx->latency <= 1000) {
+ if (cx->address > 0 && cx->latency <= 1000)
longhaul_flags |= USE_ACPI_C3;
- goto print_support_type;
- }
}
+ /* Disable if it isn't working */
+ if (disable_acpi_c3)
+ longhaul_flags &= ~USE_ACPI_C3;
/* Check if northbridge is friendly */
- if (enable_arbiter_disable()) {
+ if (enable_arbiter_disable())
longhaul_flags |= USE_NORTHBRIDGE;
- goto print_support_type;
- }
- /* Use VT8235 southbridge if present */
- if (longhaul_version == TYPE_POWERSAVER && vt8235_present) {
- longhaul_flags |= USE_VT8235;
- goto print_support_type;
- }
+
/* Check ACPI support for bus master arbiter disable */
- if ((pr == NULL) || !(pr->flags.bm_control)) {
+ if (!(longhaul_flags & USE_ACPI_C3
+ || longhaul_flags & USE_NORTHBRIDGE)
+ && ((pr == NULL) || !(pr->flags.bm_control))) {
printk(KERN_ERR PFX
"No ACPI support. Unsupported northbridge.\n");
return -ENODEV;
}
-print_support_type:
if (longhaul_flags & USE_NORTHBRIDGE)
- printk (KERN_INFO PFX "Using northbridge support.\n");
- else if (longhaul_flags & USE_VT8235)
- printk (KERN_INFO PFX "Using VT8235 support.\n");
- else
- printk (KERN_INFO PFX "Using ACPI support.\n");
+ printk(KERN_INFO PFX "Using northbridge support.\n");
+ if (longhaul_flags & USE_ACPI_C3)
+ printk(KERN_INFO PFX "Using ACPI support.\n");
ret = longhaul_get_ranges();
if (ret != 0)
@@ -885,6 +956,9 @@ static void __exit longhaul_exit(void)
kfree(longhaul_table);
}
+module_param (disable_acpi_c3, int, 0644);
+MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
+
module_param (scale_voltage, int, 0644);
MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index 102548f1284..4fcc320997d 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -180,7 +180,7 @@ static const int __initdata ezrat_clock_ratio[32] = {
-1, /* 0000 -> RESERVED (10.0x) */
110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
+ -1, /* 0010 -> 12.0x */
-1, /* 0011 -> RESERVED (9.0x)*/
105, /* 0100 -> 10.5x */
115, /* 0101 -> 11.5x */
@@ -237,7 +237,7 @@ static const int __initdata ezrat_eblcr[32] = {
static const int __initdata nehemiah_clock_ratio[32] = {
100, /* 0000 -> 10.0x */
- 160, /* 0001 -> 16.0x */
+ -1, /* 0001 -> 16.0x */
40, /* 0010 -> 4.0x */
90, /* 0011 -> 9.0x */
95, /* 0100 -> 9.5x */
@@ -252,10 +252,10 @@ static const int __initdata nehemiah_clock_ratio[32] = {
75, /* 1101 -> 7.5x */
85, /* 1110 -> 8.5x */
120, /* 1111 -> 12.0x */
- 100, /* 0000 -> 10.0x */
+ -1, /* 0000 -> 10.0x */
110, /* 0001 -> 11.0x */
- 120, /* 0010 -> 12.0x */
- 90, /* 0011 -> 9.0x */
+ -1, /* 0010 -> 12.0x */
+ -1, /* 0011 -> 9.0x */
105, /* 0100 -> 10.5x */
115, /* 0101 -> 11.5x */
125, /* 0110 -> 12.5x */
@@ -267,7 +267,7 @@ static const int __initdata nehemiah_clock_ratio[32] = {
145, /* 1100 -> 14.5x */
155, /* 1101 -> 15.5x */
-1, /* 1110 -> RESERVED (13.0x) */
- 120, /* 1111 -> 12.0x */
+ -1, /* 1111 -> 12.0x */
};
static const int __initdata nehemiah_eblcr[32] = {
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 4ade55c5f33..34ed53a0673 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -599,14 +599,17 @@ static void print_basics(struct powernow_k8_data *data)
for (j = 0; j < data->numps; j++) {
if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
if (cpu_family == CPU_HW_PSTATE) {
- printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8,
- (data->powernow_table[j].index & 0xff0000) >> 16,
- data->powernow_table[j].frequency/1000);
+ printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n",
+ j,
+ (data->powernow_table[j].index & 0xff00) >> 8,
+ (data->powernow_table[j].index & 0xff0000) >> 16,
+ data->powernow_table[j].frequency/1000);
} else {
- printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j,
- data->powernow_table[j].index & 0xff,
- data->powernow_table[j].frequency/1000,
- data->powernow_table[j].index >> 8);
+ printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n",
+ j,
+ data->powernow_table[j].index & 0xff,
+ data->powernow_table[j].frequency/1000,
+ data->powernow_table[j].index >> 8);
}
}
}
@@ -1086,7 +1089,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
if (cpu_family == CPU_HW_PSTATE)
dprintk("targ: curr fid 0x%x, did 0x%x\n",
- data->currfid, data->currvid);
+ data->currfid, data->currdid);
else {
dprintk("targ: curr fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
@@ -1322,16 +1325,22 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
static int __cpuinit powernowk8_init(void)
{
unsigned int i, supported_cpus = 0;
+ unsigned int booted_cores = 1;
for_each_online_cpu(i) {
if (check_supported_cpu(i))
supported_cpus++;
}
+#ifdef CONFIG_SMP
+ booted_cores = cpu_data[0].booted_cores;
+#endif
+
if (supported_cpus == num_online_cpus()) {
printk(KERN_INFO PFX "Found %d %s "
- "processors (" VERSION ")\n", supported_cpus,
- boot_cpu_data.x86_model_id);
+ "processors (%d cpu cores) (" VERSION ")\n",
+ supported_cpus/booted_cores,
+ boot_cpu_data.x86_model_id, supported_cpus);
return cpufreq_register_driver(&cpufreq_amd64_driver);
}
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 35489fd6885..6c5dc2c85ae 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -21,12 +21,6 @@
#include <linux/delay.h>
#include <linux/compiler.h>
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-#include <linux/acpi.h>
-#include <linux/dmi.h>
-#include <acpi/processor.h>
-#endif
-
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
@@ -257,9 +251,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
/* Matched a non-match */
dprintk("no table support for CPU model \"%s\"\n",
cpu->x86_model_id);
-#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
-#endif
+ dprintk("try using the acpi-cpufreq driver\n");
return -ENOENT;
}
@@ -346,213 +338,6 @@ static unsigned int get_cur_freq(unsigned int cpu)
}
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-
-static struct acpi_processor_performance *acpi_perf_data[NR_CPUS];
-
-/*
- * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States
- * library
- *
- * Before doing the actual init, we need to do _PSD related setup whenever
- * supported by the BIOS. These are handled by this early_init routine.
- */
-static int centrino_cpu_early_init_acpi(void)
-{
- unsigned int i, j;
- struct acpi_processor_performance *data;
-
- for_each_possible_cpu(i) {
- data = kzalloc(sizeof(struct acpi_processor_performance),
- GFP_KERNEL);
- if (!data) {
- for_each_possible_cpu(j) {
- kfree(acpi_perf_data[j]);
- acpi_perf_data[j] = NULL;
- }
- return (-ENOMEM);
- }
- acpi_perf_data[i] = data;
- }
-
- acpi_processor_preregister_performance(acpi_perf_data);
- return 0;
-}
-
-
-#ifdef CONFIG_SMP
-/*
- * Some BIOSes do SW_ANY coordination internally, either set it up in hw
- * or do it in BIOS firmware and won't inform about it to OS. If not
- * detected, this has a side effect of making CPU run at a different speed
- * than OS intended it to run at. Detect it and handle it cleanly.
- */
-static int bios_with_sw_any_bug;
-static int sw_any_bug_found(struct dmi_system_id *d)
-{
- bios_with_sw_any_bug = 1;
- return 0;
-}
-
-static struct dmi_system_id sw_any_bug_dmi_table[] = {
- {
- .callback = sw_any_bug_found,
- .ident = "Supermicro Server X6DLP",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
- DMI_MATCH(DMI_BIOS_VERSION, "080010"),
- DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
- },
- },
- { }
-};
-#endif
-
-/*
- * centrino_cpu_init_acpi - register with ACPI P-States library
- *
- * Register with the ACPI P-States library (part of drivers/acpi/processor.c)
- * in order to determine correct frequency and voltage pairings by reading
- * the _PSS of the ACPI DSDT or SSDT tables.
- */
-static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
-{
- unsigned long cur_freq;
- int result = 0, i;
- unsigned int cpu = policy->cpu;
- struct acpi_processor_performance *p;
-
- p = acpi_perf_data[cpu];
-
- /* register with ACPI core */
- if (acpi_processor_register_performance(p, cpu)) {
- dprintk(PFX "obtaining ACPI data failed\n");
- return -EIO;
- }
-
- policy->shared_type = p->shared_type;
- /*
- * Will let policy->cpus know about dependency only when software
- * coordination is required.
- */
- if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
- policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
- policy->cpus = p->shared_cpu_map;
- }
-
-#ifdef CONFIG_SMP
- dmi_check_system(sw_any_bug_dmi_table);
- if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
- policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
- policy->cpus = cpu_core_map[cpu];
- }
-#endif
-
- /* verify the acpi_data */
- if (p->state_count <= 1) {
- dprintk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- dprintk("Invalid control/status registers (%x - %x)\n",
- p->control_register.space_id, p->status_register.space_id);
- result = -EIO;
- goto err_unreg;
- }
-
- for (i=0; i<p->state_count; i++) {
- if ((p->states[i].control & INTEL_MSR_RANGE) !=
- (p->states[i].status & INTEL_MSR_RANGE)) {
- dprintk("Different MSR bits in control (%llu) and status (%llu)\n",
- p->states[i].control, p->states[i].status);
- result = -EINVAL;
- goto err_unreg;
- }
-
- if (!p->states[i].core_frequency) {
- dprintk("Zero core frequency for state %u\n", i);
- result = -EINVAL;
- goto err_unreg;
- }
-
- if (p->states[i].core_frequency > p->states[0].core_frequency) {
- dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i,
- p->states[i].core_frequency, p->states[0].core_frequency);
- p->states[i].core_frequency = 0;
- continue;
- }
- }
-
- centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL);
- if (!centrino_model[cpu]) {
- result = -ENOMEM;
- goto err_unreg;
- }
-
- centrino_model[cpu]->model_name=NULL;
- centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000;
- centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) *
- (p->state_count + 1), GFP_KERNEL);
- if (!centrino_model[cpu]->op_points) {
- result = -ENOMEM;
- goto err_kfree;
- }
-
- for (i=0; i<p->state_count; i++) {
- centrino_model[cpu]->op_points[i].index = p->states[i].control & INTEL_MSR_RANGE;
- centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000;
- dprintk("adding state %i with frequency %u and control value %04x\n",
- i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
- }
- centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END;
-
- cur_freq = get_cur_freq(cpu);
-
- for (i=0; i<p->state_count; i++) {
- if (!p->states[i].core_frequency) {
- dprintk("skipping state %u\n", i);
- centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
- continue;
- }
-
- if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) !=
- (centrino_model[cpu]->op_points[i].frequency)) {
- dprintk("Invalid encoded frequency (%u vs. %u)\n",
- extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0),
- centrino_model[cpu]->op_points[i].frequency);
- result = -EINVAL;
- goto err_kfree_all;
- }
-
- if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
- p->state = i;
- }
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
- printk("speedstep-centrino with X86_SPEEDSTEP_CENTRINO_ACPI "
- "config is deprecated.\n "
- "Use X86_ACPI_CPUFREQ (acpi-cpufreq) instead.\n" );
-
- return 0;
-
- err_kfree_all:
- kfree(centrino_model[cpu]->op_points);
- err_kfree:
- kfree(centrino_model[cpu]);
- err_unreg:
- acpi_processor_unregister_performance(p, cpu);
- dprintk(PFX "invalid ACPI data\n");
- return (result);
-}
-#else
-static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
-static inline int centrino_cpu_early_init_acpi(void) { return 0; }
-#endif
-
static int centrino_cpu_init(struct cpufreq_policy *policy)
{
struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
@@ -568,27 +353,25 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
- if (centrino_cpu_init_acpi(policy)) {
- if (policy->cpu != 0)
- return -ENODEV;
+ if (policy->cpu != 0)
+ return -ENODEV;
- for (i = 0; i < N_IDS; i++)
- if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
- break;
+ for (i = 0; i < N_IDS; i++)
+ if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
+ break;
- if (i != N_IDS)
- centrino_cpu[policy->cpu] = &cpu_ids[i];
+ if (i != N_IDS)
+ centrino_cpu[policy->cpu] = &cpu_ids[i];
- if (!centrino_cpu[policy->cpu]) {
- dprintk("found unsupported CPU with "
- "Enhanced SpeedStep: send /proc/cpuinfo to "
- MAINTAINER "\n");
- return -ENODEV;
- }
+ if (!centrino_cpu[policy->cpu]) {
+ dprintk("found unsupported CPU with "
+ "Enhanced SpeedStep: send /proc/cpuinfo to "
+ MAINTAINER "\n");
+ return -ENODEV;
+ }
- if (centrino_cpu_init_table(policy)) {
- return -ENODEV;
- }
+ if (centrino_cpu_init_table(policy)) {
+ return -ENODEV;
}
/* Check to see if Enhanced SpeedStep is enabled, and try to
@@ -634,20 +417,6 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
cpufreq_frequency_table_put_attr(cpu);
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- if (!centrino_model[cpu]->model_name) {
- static struct acpi_processor_performance *p;
-
- if (acpi_perf_data[cpu]) {
- p = acpi_perf_data[cpu];
- dprintk("unregistering and freeing ACPI data\n");
- acpi_processor_unregister_performance(p, cpu);
- kfree(centrino_model[cpu]->op_points);
- kfree(centrino_model[cpu]);
- }
- }
-#endif
-
centrino_model[cpu] = NULL;
return 0;
@@ -849,25 +618,12 @@ static int __init centrino_init(void)
if (!cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
- centrino_cpu_early_init_acpi();
-
return cpufreq_register_driver(&centrino_driver);
}
static void __exit centrino_exit(void)
{
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- unsigned int j;
-#endif
-
cpufreq_unregister_driver(&centrino_driver);
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- for_each_possible_cpu(j) {
- kfree(acpi_perf_data[j]);
- acpi_perf_data[j] = NULL;
- }
-#endif
}
MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
index 698f980eb44..a5b2346faf1 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c
@@ -205,7 +205,6 @@ static unsigned int speedstep_detect_chipset (void)
* host brige. Abort on these systems.
*/
static struct pci_dev *hostbridge;
- u8 rev = 0;
hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82815_MC,
@@ -216,8 +215,7 @@ static unsigned int speedstep_detect_chipset (void)
if (!hostbridge)
return 2; /* 2-M */
- pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev);
- if (rev < 5) {
+ if (hostbridge->revision < 5) {
dprintk("hostbridge does not support speedstep\n");
speedstep_chipset_dev = NULL;
pci_dev_put(hostbridge);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index e88d2fba156..122d2d75aa9 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -4,7 +4,7 @@
#include <linux/pci.h>
#include <asm/dma.h>
#include <asm/io.h>
-#include <asm/processor.h>
+#include <asm/processor-cyrix.h>
#include <asm/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index e5be819492e..d5a456d27d8 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,7 +4,7 @@
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
- * Andi Kleen : CPUID4 emulation on AMD.
+ * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
#include <linux/init.h>
@@ -135,7 +135,7 @@ unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
- No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+ L2 not shared, no SMT etc. that is currently true on AMD CPUs.
In theory the TLBs could be reported as fake type (they are in "dummy").
Maybe later */
@@ -159,13 +159,26 @@ union l2_cache {
unsigned val;
};
+union l3_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 4;
+ unsigned assoc : 4;
+ unsigned res : 2;
+ unsigned size_encoded : 14;
+ };
+ unsigned val;
+};
+
static const unsigned short assocs[] = {
[1] = 1, [2] = 2, [4] = 4, [6] = 8,
- [8] = 16,
+ [8] = 16, [0xa] = 32, [0xb] = 48,
+ [0xc] = 64,
[0xf] = 0xffff // ??
- };
-static const unsigned char levels[] = { 1, 1, 2 };
-static const unsigned char types[] = { 1, 2, 3 };
+};
+
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
union _cpuid4_leaf_ebx *ebx,
@@ -175,37 +188,58 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
unsigned line_size, lines_per_tag, assoc, size_in_kb;
union l1_cache l1i, l1d;
union l2_cache l2;
+ union l3_cache l3;
+ union l1_cache *l1 = &l1d;
eax->full = 0;
ebx->full = 0;
ecx->full = 0;
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
- cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
-
- if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
- return;
-
- eax->split.is_self_initializing = 1;
- eax->split.type = types[leaf];
- eax->split.level = levels[leaf];
- eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-
- if (leaf <= 1) {
- union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+ cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
+
+ switch (leaf) {
+ case 1:
+ l1 = &l1i;
+ case 0:
+ if (!l1->val)
+ return;
assoc = l1->assoc;
line_size = l1->line_size;
lines_per_tag = l1->lines_per_tag;
size_in_kb = l1->size_in_kb;
- } else {
+ break;
+ case 2:
+ if (!l2.val)
+ return;
assoc = l2.assoc;
line_size = l2.line_size;
lines_per_tag = l2.lines_per_tag;
/* cpu_data has errata corrections for K7 applied */
size_in_kb = current_cpu_data.x86_cache_size;
+ break;
+ case 3:
+ if (!l3.val)
+ return;
+ assoc = l3.assoc;
+ line_size = l3.line_size;
+ lines_per_tag = l3.lines_per_tag;
+ size_in_kb = l3.size_encoded * 512;
+ break;
+ default:
+ return;
}
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[leaf];
+ eax->split.level = levels[leaf];
+ if (leaf == 3)
+ eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+ else
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+
if (assoc == 0xf)
eax->split.is_fully_associative = 1;
ebx->split.coherency_line_size = line_size - 1;
@@ -239,8 +273,7 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
return 0;
}
-/* will only be called once; __init is safe here */
-static int __init find_num_cache_leaves(void)
+static int __cpuinit find_num_cache_leaves(void)
{
unsigned int eax, ebx, ecx, edx;
union _cpuid4_leaf_eax cache_eax;
@@ -710,7 +743,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
return retval;
}
-static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
+static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index 56cd485b127..34c781eddee 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -60,6 +60,20 @@ void mcheck_init(struct cpuinfo_x86 *c)
}
}
+static unsigned long old_cr4 __initdata;
+
+void __init stop_mce(void)
+{
+ old_cr4 = read_cr4();
+ clear_in_cr4(X86_CR4_MCE);
+}
+
+void __init restart_mce(void)
+{
+ if (old_cr4 & X86_CR4_MCE)
+ set_in_cr4(X86_CR4_MCE);
+}
+
static int __init mcheck_disable(char *str)
{
mce_disabled = 1;
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
index 6b5d3518a1c..bf39409b383 100644
--- a/arch/i386/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -57,7 +57,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
on_each_cpu(mce_checkregs, NULL, 1, 1);
- schedule_delayed_work(&mce_work, MCE_RATE);
+ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
static int __init init_nonfatal_mce_checker(void)
@@ -82,7 +82,7 @@ static int __init init_nonfatal_mce_checker(void)
/*
* Check for non-fatal errors every MCE_RATE s
*/
- schedule_delayed_work(&mce_work, MCE_RATE);
+ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
printk(KERN_INFO "Machine check exception polling timer started.\n");
return 0;
}
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 7ba7c3abd3a..1203dc5ab87 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
int err;
sys_dev = get_cpu_sysdev(cpu);
- mutex_lock(&therm_cpu_lock);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
+ mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(sys_dev);
+ mutex_unlock(&therm_cpu_lock);
break;
}
- mutex_unlock(&therm_cpu_lock);
return NOTIFY_OK;
}
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
index 1001f1e0fe6..2287d4863a8 100644
--- a/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ b/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -3,6 +3,7 @@
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/io.h>
+#include <asm/processor-cyrix.h>
#include "mtrr.h"
int arr3_protected;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f6e46943e6e..56f64e34829 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -79,7 +79,7 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
}
/* Grab all of the MTRR state for this CPU into *state */
-void get_mtrr_state(void)
+void __init get_mtrr_state(void)
{
unsigned int i;
struct mtrr_var_range *vrs;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 75dc6d5214b..c48b6fea5ab 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -643,7 +643,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
* initialized (i.e. before smp_init()).
*
*/
-__init void mtrr_bp_init(void)
+void __init mtrr_bp_init(void)
{
init_ifs();
diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c
index 7b39a2f954d..c9014ca4a57 100644
--- a/arch/i386/kernel/cpu/mtrr/state.c
+++ b/arch/i386/kernel/cpu/mtrr/state.c
@@ -3,6 +3,7 @@
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
+#include <asm-i386/processor-cyrix.h>
#include "mtrr.h"
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
index 4d26d514c56..4be488e73be 100644
--- a/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -325,7 +325,7 @@ static struct wd_ops k7_wd_ops = {
.stop = single_msr_stop_watchdog,
.perfctr = MSR_K7_PERFCTR0,
.evntsel = MSR_K7_EVNTSEL0,
- .checkbit = 1ULL<<63,
+ .checkbit = 1ULL<<47,
};
/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
@@ -346,7 +346,9 @@ static int setup_p6_watchdog(unsigned nmi_hz)
perfctr_msr = MSR_P6_PERFCTR0;
evntsel_msr = MSR_P6_EVNTSEL0;
- wrmsrl(perfctr_msr, 0UL);
+ /* KVM doesn't implement this MSR */
+ if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
+ return 0;
evntsel = P6_EVNTSEL_INT
| P6_EVNTSEL_OS
@@ -599,8 +601,8 @@ static struct wd_ops intel_arch_wd_ops = {
.setup = setup_intel_arch_watchdog,
.rearm = p6_rearm,
.stop = single_msr_stop_watchdog,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
+ .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
};
static void probe_nmi_watchdog(void)
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 89d91e6cc97..1e31b6caffb 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -29,7 +29,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow",
+ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+ "3dnowext", "3dnow",
/* Transmeta-defined */
"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -40,8 +41,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Other (Linux-defined) */
"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "constant_tsc", "up", NULL, "arch_perfmon",
+ "pebs", "bts", NULL, "sync_rdtsc",
+ "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
@@ -57,9 +59,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm",
- "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL,
+ "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
+ "altmovcr8", "abm", "sse4a",
+ "misalignsse", "3dnowprefetch",
+ "osvw", "ibs", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Auxiliary (Linux-defined) */
+ "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
deleted file mode 100644
index 50076f22e90..00000000000
--- a/arch/i386/kernel/cpu/rise.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-static void __cpuinit init_rise(struct cpuinfo_x86 *c)
-{
- printk("CPU: Rise iDragon");
- if (c->x86_model > 2)
- printk(" II");
- printk("\n");
-
- /* Unhide possibly hidden capability flags
- The mp6 iDragon family don't have MSRs.
- We switch on extra features with this cpuid weirdness: */
- __asm__ (
- "movl $0x6363452a, %%eax\n\t"
- "movl $0x3231206c, %%ecx\n\t"
- "movl $0x2a32313a, %%edx\n\t"
- "cpuid\n\t"
- "movl $0x63634523, %%eax\n\t"
- "movl $0x32315f6c, %%ecx\n\t"
- "movl $0x2333313a, %%edx\n\t"
- "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
- );
- set_bit(X86_FEATURE_CX8, c->x86_capability);
-}
-
-static struct cpu_dev rise_cpu_dev __cpuinitdata = {
- .c_vendor = "Rise",
- .c_ident = { "RiseRiseRise" },
- .c_models = {
- { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
- {
- [0] = "iDragon",
- [2] = "iDragon",
- [8] = "iDragon II",
- [9] = "iDragon II"
- }
- },
- },
- .c_init = init_rise,
-};
-
-int __init rise_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
- return 0;
-}
-
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index 9645bb51f76..e60cddbc4cf 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -320,6 +321,37 @@ static int __init request_standard_resources(void)
subsys_initcall(request_standard_resources);
+#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+/**
+ * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
+ * correspond to e820 RAM areas and mark the corresponding pages as nosave for
+ * hibernation.
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(void)
+{
+ int i;
+ unsigned long pfn;
+
+ pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
+ for (i = 1; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (pfn < PFN_UP(ei->addr))
+ register_nosave_region(pfn, PFN_UP(ei->addr));
+
+ pfn = PFN_DOWN(ei->addr + ei->size);
+ if (ei->type != E820_RAM)
+ register_nosave_region(PFN_UP(ei->addr), pfn);
+
+ if (pfn >= max_low_pfn)
+ break;
+ }
+}
+#endif
+
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
@@ -734,7 +766,7 @@ void __init print_memory_map(char *who)
case E820_NVS:
printk("(ACPI NVS)\n");
break;
- default: printk("type %lu\n", e820.map[i].type);
+ default: printk("type %u\n", e820.map[i].type);
break;
}
}
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index a1808022ea1..2452c6fbe99 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -278,7 +278,7 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
struct range {
unsigned long start;
unsigned long end;
- } prev, curr;
+ } uninitialized_var(prev), curr;
efi_memory_desc_t *md;
unsigned long start, end;
void *p;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3c3c220488c..a714d6b4350 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -409,8 +409,6 @@ restore_nocheck_notrace:
1: INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
pushl $0 # no error code
pushl $do_iret_error
jmp error_code
@@ -1023,6 +1021,91 @@ ENTRY(kernel_thread_helper)
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
+#ifdef CONFIG_XEN
+ENTRY(xen_hypervisor_callback)
+ CFI_STARTPROC
+ pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+
+ /* Check to see if we got the event in the critical
+ region in xen_iret_direct, after we've reenabled
+ events and checked for pending events. This simulates
+ iret instruction's behaviour where it delivers a
+ pending interrupt when enabling interrupts. */
+ movl PT_EIP(%esp),%eax
+ cmpl $xen_iret_start_crit,%eax
+ jb 1f
+ cmpl $xen_iret_end_crit,%eax
+ jae 1f
+
+ call xen_iret_crit_fixup
+
+1: mov %esp, %eax
+ call xen_evtchn_do_upcall
+ jmp ret_from_intr
+ CFI_ENDPROC
+ENDPROC(xen_hypervisor_callback)
+
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+# 1. Fault while reloading DS, ES, FS or GS
+# 2. Fault while executing IRET
+# Category 1 we fix up by reattempting the load, and zeroing the segment
+# register if the load fails.
+# Category 2 we fix up by jumping to do_iret_error. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by maintaining a status value in EAX.
+ENTRY(xen_failsafe_callback)
+ CFI_STARTPROC
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl $1,%eax
+1: mov 4(%esp),%ds
+2: mov 8(%esp),%es
+3: mov 12(%esp),%fs
+4: mov 16(%esp),%gs
+ testl %eax,%eax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ lea 16(%esp),%esp
+ CFI_ADJUST_CFA_OFFSET -16
+ jz 5f
+ addl $16,%esp
+ jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
+5: pushl $0 # EAX == 0 => Category 1 (Bad segment)
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ jmp ret_from_exception
+ CFI_ENDPROC
+
+.section .fixup,"ax"
+6: xorl %eax,%eax
+ movl %eax,4(%esp)
+ jmp 1b
+7: xorl %eax,%eax
+ movl %eax,8(%esp)
+ jmp 2b
+8: xorl %eax,%eax
+ movl %eax,12(%esp)
+ jmp 3b
+9: xorl %eax,%eax
+ movl %eax,16(%esp)
+ jmp 4b
+.previous
+.section __ex_table,"a"
+ .align 4
+ .long 1b,6b
+ .long 2b,7b
+ .long 3b,8b
+ .long 4b,9b
+.previous
+ENDPROC(xen_failsafe_callback)
+
+#endif /* CONFIG_XEN */
+
.section .rodata,"a"
#include "syscall_table.S"
diff --git a/arch/i386/kernel/geode.c b/arch/i386/kernel/geode.c
new file mode 100644
index 00000000000..41e8aec4c61
--- /dev/null
+++ b/arch/i386/kernel/geode.c
@@ -0,0 +1,155 @@
+/*
+ * AMD Geode southbridge support code
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <asm/msr.h>
+#include <asm/geode.h>
+
+static struct {
+ char *name;
+ u32 msr;
+ int size;
+ u32 base;
+} lbars[] = {
+ { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
+ { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
+ { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
+ { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
+};
+
+static void __init init_lbars(void)
+{
+ u32 lo, hi;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lbars); i++) {
+ rdmsr(lbars[i].msr, lo, hi);
+ if (hi & 0x01)
+ lbars[i].base = lo & 0x0000ffff;
+
+ if (lbars[i].base == 0)
+ printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
+ lbars[i].name);
+ }
+}
+
+int geode_get_dev_base(unsigned int dev)
+{
+ BUG_ON(dev >= ARRAY_SIZE(lbars));
+ return lbars[dev].base;
+}
+EXPORT_SYMBOL_GPL(geode_get_dev_base);
+
+/* === GPIO API === */
+
+void geode_gpio_set(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return;
+
+ if (gpio < 16)
+ outl(1 << gpio, base + reg);
+ else
+ outl(1 << (gpio - 16), base + 0x80 + reg);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_set);
+
+void geode_gpio_clear(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return;
+
+ if (gpio < 16)
+ outl(1 << (gpio + 16), base + reg);
+ else
+ outl(1 << gpio, base + 0x80 + reg);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_clear);
+
+int geode_gpio_isset(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return 0;
+
+ if (gpio < 16)
+ return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
+ else
+ return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(geode_gpio_isset);
+
+void geode_gpio_set_irq(unsigned int group, unsigned int irq)
+{
+ u32 lo, hi;
+
+ if (group > 7 || irq > 15)
+ return;
+
+ rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
+
+ lo &= ~(0xF << (group * 4));
+ lo |= (irq & 0xF) << (group * 4);
+
+ wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
+
+void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+ u32 offset, shift, val;
+
+ if (gpio >= 24)
+ offset = GPIO_MAP_W;
+ else if (gpio >= 16)
+ offset = GPIO_MAP_Z;
+ else if (gpio >= 8)
+ offset = GPIO_MAP_Y;
+ else
+ offset = GPIO_MAP_X;
+
+ shift = (gpio % 8) * 4;
+
+ val = inl(base + offset);
+
+ /* Clear whatever was there before */
+ val &= ~(0xF << shift);
+
+ /* And set the new value */
+
+ val |= ((pair & 7) << shift);
+
+ /* Set the PME bit if this is a PME event */
+
+ if (pme)
+ val |= (1 << (shift + 3));
+
+ outl(val, base + offset);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
+
+static int __init geode_southbridge_init(void)
+{
+ if (!is_geode())
+ return -ENODEV;
+
+ init_lbars();
+ return 0;
+}
+
+postcore_initcall(geode_southbridge_init);
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index f74dfc419b5..7c52b222207 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -168,6 +168,12 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
.section .init.text,"ax",@progbits
#endif
+ /* Do an early initialization of the fixmap area */
+ movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
+ movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
+ addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
+ movl %eax, 4092(%edx)
+
#ifdef CONFIG_SMP
ENTRY(startup_32_smp)
cld
@@ -504,9 +510,12 @@ ENTRY(_stext)
/*
* BSS section
*/
-.section ".bss.page_aligned","w"
+.section ".bss.page_aligned","wa"
+ .align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
.fill 1024,4,0
+ENTRY(swapper_pg_pmd)
+ .fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
@@ -530,6 +539,8 @@ fault_msg:
.ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
.asciz "Stack: %p %p %p %p %p %p %p %p\n"
+#include "../xen/xen-head.S"
+
/*
* The IDT and GDT 'descriptors' are a strange 48-bit object
* only used by the lidt and lgdt instructions. They are not
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 17d73459fc5..533d4932bc7 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -5,6 +5,7 @@
#include <linux/init.h>
#include <linux/sysdev.h>
#include <linux/pm.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
#include <asm/io.h>
@@ -187,6 +188,10 @@ static void hpet_set_mode(enum clock_event_mode mode,
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T0_CFG);
break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ hpet_enable_int();
+ break;
}
}
@@ -217,6 +222,7 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.shift = HPET_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .resume = hpet_start_counter,
};
/*
@@ -226,7 +232,8 @@ int __init hpet_enable(void)
{
unsigned long id;
uint64_t hpet_freq;
- u64 tmp;
+ u64 tmp, start, now;
+ cycle_t t1;
if (!is_hpet_capable())
return 0;
@@ -273,6 +280,27 @@ int __init hpet_enable(void)
/* Start the counter */
hpet_start_counter();
+ /* Verify whether hpet counter works */
+ t1 = read_hpet();
+ rdtscll(start);
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 200000 TSC cycles is safe:
+ * 4 GHz == 50us
+ * 1 GHz == 200us
+ */
+ do {
+ rep_nop();
+ rdtscll(now);
+ } while ((now - start) < 200000UL);
+
+ if (t1 == read_hpet()) {
+ printk(KERN_WARNING
+ "HPET counter not counting. HPET disabled\n");
+ goto out_nohpet;
+ }
+
/* Initialize and register HPET clocksource
*
* hpet period is in femto seconds per cycle
@@ -291,7 +319,6 @@ int __init hpet_enable(void)
clocksource_register(&clocksource_hpet);
-
if (id & HPET_ID_LEGSUP) {
hpet_enable_int();
hpet_reserve_platform_timers(id);
@@ -299,7 +326,7 @@ int __init hpet_enable(void)
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask =cpumask_of_cpu(0);
+ hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
clockevents_register_device(&hpet_clockevent);
global_clock_event = &hpet_clockevent;
return 1;
@@ -524,68 +551,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
#endif
-
-
-/*
- * Suspend/resume part
- */
-
-#ifdef CONFIG_PM
-
-static int hpet_suspend(struct sys_device *sys_device, pm_message_t state)
-{
- unsigned long cfg = hpet_readl(HPET_CFG);
-
- cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_CFG);
-
- return 0;
-}
-
-static int hpet_resume(struct sys_device *sys_device)
-{
- unsigned int id;
-
- hpet_start_counter();
-
- id = hpet_readl(HPET_ID);
-
- if (id & HPET_ID_LEGSUP)
- hpet_enable_int();
-
- return 0;
-}
-
-static struct sysdev_class hpet_class = {
- set_kset_name("hpet"),
- .suspend = hpet_suspend,
- .resume = hpet_resume,
-};
-
-static struct sys_device hpet_device = {
- .id = 0,
- .cls = &hpet_class,
-};
-
-
-static __init int hpet_register_sysfs(void)
-{
- int err;
-
- if (!is_hpet_capable())
- return 0;
-
- err = sysdev_class_register(&hpet_class);
-
- if (!err) {
- err = sysdev_register(&hpet_device);
- if (err)
- sysdev_class_unregister(&hpet_class);
- }
-
- return err;
-}
-
-device_initcall(hpet_register_sysfs);
-
-#endif
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index f8a3c4054c7..6d839f2f1b1 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -3,18 +3,17 @@
*
*/
#include <linux/clockchips.h>
-#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/jiffies.h>
-#include <linux/sysdev.h>
#include <linux/module.h>
-#include <linux/init.h>
+#include <linux/spinlock.h>
#include <asm/smp.h>
#include <asm/delay.h>
#include <asm/i8253.h>
#include <asm/io.h>
-
-#include "io_ports.h"
+#include <asm/timer.h>
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
@@ -41,26 +40,27 @@ static void init_pit_timer(enum clock_event_mode mode,
case CLOCK_EVT_MODE_PERIODIC:
/* binary, mode 2, LSB/MSB, ch 0 */
outb_p(0x34, PIT_MODE);
- udelay(10);
outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
outb(LATCH >> 8 , PIT_CH0); /* MSB */
break;
- /*
- * Avoid unnecessary state transitions, as it confuses
- * Geode / Cyrix based boxen.
- */
case CLOCK_EVT_MODE_SHUTDOWN:
- if (evt->mode == CLOCK_EVT_MODE_UNUSED)
- break;
case CLOCK_EVT_MODE_UNUSED:
- if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN)
- break;
+ if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
+ evt->mode == CLOCK_EVT_MODE_ONESHOT) {
+ outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
+ }
+ break;
+
case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */
outb_p(0x38, PIT_MODE);
- udelay(10);
+ break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here */
break;
}
spin_unlock_irqrestore(&i8253_lock, flags);
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index cff95d10a4d..d26fc063a76 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task);
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 7f8b7af2b95..893df828075 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -353,14 +353,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# include <linux/slab.h> /* kmalloc() */
# include <linux/timer.h> /* time_after() */
-#ifdef CONFIG_BALANCED_IRQ_DEBUG
-# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
-# define Dprintk(x...) do { TDprintk(x); } while (0)
-# else
-# define TDprintk(x...)
-# define Dprintk(x...)
-# endif
-
#define IRQBALANCE_CHECK_ARCH -999
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -443,7 +435,7 @@ static inline void balance_irq(int cpu, int irq)
static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
{
int i, j;
- Dprintk("Rotating IRQs among CPUs.\n");
+
for_each_online_cpu(i) {
for (j = 0; j < NR_IRQS; j++) {
if (!irq_desc[j].action)
@@ -560,19 +552,11 @@ tryanothercpu:
max_loaded = tmp_loaded; /* processor */
imbalance = (max_cpu_irq - min_cpu_irq) / 2;
- Dprintk("max_loaded cpu = %d\n", max_loaded);
- Dprintk("min_loaded cpu = %d\n", min_loaded);
- Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
- Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
- Dprintk("load imbalance = %lu\n", imbalance);
-
/* if imbalance is less than approx 10% of max load, then
* observe diminishing returns action. - quit
*/
- if (imbalance < (max_cpu_irq >> 3)) {
- Dprintk("Imbalance too trivial\n");
+ if (imbalance < (max_cpu_irq >> 3))
goto not_worth_the_effort;
- }
tryanotherirq:
/* if we select an IRQ to move that can't go where we want, then
@@ -629,9 +613,6 @@ tryanotherirq:
cpus_and(tmp, target_cpu_mask, allowed_mask);
if (!cpus_empty(tmp)) {
-
- Dprintk("irq = %d moved to cpu = %d\n",
- selected_irq, min_loaded);
/* mark for change destination */
set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
@@ -651,7 +632,6 @@ not_worth_the_effort:
*/
balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
- Dprintk("IRQ worth rotating not found\n");
return;
}
@@ -667,6 +647,7 @@ static int balanced_irq(void *unused)
set_pending_irq(i, cpumask_of_cpu(0));
}
+ set_freezable();
for ( ; ; ) {
time_remaining = schedule_timeout_interruptible(time_remaining);
try_to_freeze();
@@ -1901,7 +1882,7 @@ __setup("no_timer_check", notimercheck);
* - if this function detects that timer IRQs are defunct, then we fall
* back to ISA timer IRQs
*/
-int __init timer_irq_works(void)
+static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index d2daf672f4a..dd2b97fc00b 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -21,7 +21,7 @@
#include <asm/apic.h>
#include <asm/uaccess.h>
-DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
@@ -149,15 +149,11 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
#ifdef CONFIG_4KSTACKS
-/*
- * These should really be __section__(".bss.page_aligned") as well, but
- * gcc's 3.0 and earlier don't handle that correctly.
- */
static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
+ __attribute__((__section__(".bss.page_aligned")));
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
+ __attribute__((__section__(".bss.page_aligned")));
/*
* allocate per-cpu stacks for hardirq and for softirq processing
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index dde828a333c..448a50b1324 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -35,6 +35,7 @@
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/uaccess.h>
+#include <asm/alternative.h>
void jprobe_return_end(void);
@@ -169,16 +170,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ text_poke(p->addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index fba121f7973..99beac7f96c 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -295,7 +295,7 @@ static unsigned int
last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];
-void touch_nmi_watchdog (void)
+void touch_nmi_watchdog(void)
{
if (nmi_watchdog > 0) {
unsigned cpu;
@@ -304,8 +304,10 @@ void touch_nmi_watchdog (void)
* Just reset the alert counters, (other CPUs might be
* spinning on locks we hold):
*/
- for_each_present_cpu (cpu)
- alert_counter[cpu] = 0;
+ for_each_present_cpu(cpu) {
+ if (alert_counter[cpu])
+ alert_counter[cpu] = 0;
+ }
}
/*
@@ -351,7 +353,7 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
* Take the local apic timer and PIT/HPET into account. We don't
* know which one is active, when we have highres/dyntick on
*/
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0];
/* if the none of the timers isn't firing, this cpu isn't doing much */
if (!touched && last_irq_sums[cpu] == sum) {
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index faab09abca5..ea962c0667d 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -124,20 +124,28 @@ unsigned paravirt_patch_ignore(unsigned len)
return len;
}
+struct branch {
+ unsigned char opcode;
+ u32 delta;
+} __attribute__((packed));
+
unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
void *site, u16 site_clobbers,
unsigned len)
{
unsigned char *call = site;
unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
+ struct branch b;
if (tgt_clobbers & ~site_clobbers)
return len; /* target would clobber too much for this site */
if (len < 5)
return len; /* call too long for patch site */
- *call++ = 0xe8; /* call */
- *(unsigned long *)call = delta;
+ b.opcode = 0xe8; /* call */
+ b.delta = delta;
+ BUILD_BUG_ON(sizeof(b) != 5);
+ text_poke(call, (unsigned char *)&b, 5);
return 5;
}
@@ -146,12 +154,14 @@ unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
{
unsigned char *jmp = site;
unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
+ struct branch b;
if (len < 5)
return len; /* call too long for patch site */
- *jmp++ = 0xe9; /* jmp */
- *(unsigned long *)jmp = delta;
+ b.opcode = 0xe9; /* jmp */
+ b.delta = delta;
+ text_poke(jmp, (unsigned char *)&b, 5);
return 5;
}
@@ -228,6 +238,41 @@ static int __init print_banner(void)
}
core_initcall(print_banner);
+static struct resource reserve_ioports = {
+ .start = 0,
+ .end = IO_SPACE_LIMIT,
+ .name = "paravirt-ioport",
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static struct resource reserve_iomem = {
+ .start = 0,
+ .end = -1,
+ .name = "paravirt-iomem",
+ .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+/*
+ * Reserve the whole legacy IO space to prevent any legacy drivers
+ * from wasting time probing for their hardware. This is a fairly
+ * brute-force approach to disabling all non-virtual drivers.
+ *
+ * Note that this must be called very early to have any effect.
+ */
+int paravirt_disable_iospace(void)
+{
+ int ret;
+
+ ret = request_resource(&ioport_resource, &reserve_ioports);
+ if (ret == 0) {
+ ret = request_resource(&iomem_resource, &reserve_iomem);
+ if (ret)
+ release_resource(&reserve_ioports);
+ }
+
+ return ret;
+}
+
struct paravirt_ops paravirt_ops = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -267,7 +312,7 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
- .get_scheduled_cycles = native_read_tsc,
+ .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 06dfa65ad18..84664710b78 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -300,6 +300,7 @@ early_param("idle", idle_setup);
void show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+ unsigned long d0, d1, d2, d3, d6, d7;
printk("\n");
printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
@@ -324,6 +325,17 @@ void show_regs(struct pt_regs * regs)
cr3 = read_cr3();
cr4 = read_cr4_safe();
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+
+ get_debugreg(d0, 0);
+ get_debugreg(d1, 1);
+ get_debugreg(d2, 2);
+ get_debugreg(d3, 3);
+ printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+ d0, d1, d2, d3);
+ get_debugreg(d6, 6);
+ get_debugreg(d7, 7);
+ printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+
show_trace(NULL, regs, &regs->esp);
}
@@ -538,8 +550,31 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
return 1;
}
-static noinline void __switch_to_xtra(struct task_struct *next_p,
- struct tss_struct *tss)
+#ifdef CONFIG_SECCOMP
+void hard_disable_TSC(void)
+{
+ write_cr4(read_cr4() | X86_CR4_TSD);
+}
+void disable_TSC(void)
+{
+ preempt_disable();
+ if (!test_and_set_thread_flag(TIF_NOTSC))
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOTSC in the current running context.
+ */
+ hard_disable_TSC();
+ preempt_enable();
+}
+void hard_enable_TSC(void)
+{
+ write_cr4(read_cr4() & ~X86_CR4_TSD);
+}
+#endif /* CONFIG_SECCOMP */
+
+static noinline void
+__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
{
struct thread_struct *next;
@@ -555,6 +590,17 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
set_debugreg(next->debugreg[7], 7);
}
+#ifdef CONFIG_SECCOMP
+ if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+ test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+ /* prev and next are different */
+ if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+ hard_disable_TSC();
+ else
+ hard_enable_TSC();
+ }
+#endif
+
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
/*
* Disable the bitmap via an invalid offset. We still cache
@@ -586,33 +632,6 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
}
/*
- * This function selects if the context switch from prev to next
- * has to tweak the TSC disable bit in the cr4.
- */
-static inline void disable_tsc(struct task_struct *prev_p,
- struct task_struct *next_p)
-{
- struct thread_info *prev, *next;
-
- /*
- * gcc should eliminate the ->thread_info dereference if
- * has_secure_computing returns 0 at compile time (SECCOMP=n).
- */
- prev = task_thread_info(prev_p);
- next = task_thread_info(next_p);
-
- if (has_secure_computing(prev) || has_secure_computing(next)) {
- /* slow path here */
- if (has_secure_computing(prev) &&
- !has_secure_computing(next)) {
- write_cr4(read_cr4() & ~X86_CR4_TSD);
- } else if (!has_secure_computing(prev) &&
- has_secure_computing(next))
- write_cr4(read_cr4() | X86_CR4_TSD);
- }
-}
-
-/*
* switch_to(x,yn) should switch tasks from x to y.
*
* We fsave/fwait so that an exception goes off at the right time
@@ -689,11 +708,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
/*
* Now maybe handle debug registers and/or IO bitmaps
*/
- if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)
- || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)))
- __switch_to_xtra(next_p, tss);
-
- disable_tsc(prev_p, next_p);
+ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
+ task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+ __switch_to_xtra(prev_p, next_p, tss);
/*
* Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0c0ceec5de0..0c8f00e69c4 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -164,14 +164,22 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
u32 *desc;
unsigned long base;
- down(&child->mm->context.sem);
- desc = child->mm->context.ldt + (seg & ~7);
- base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
+ seg &= ~7UL;
- /* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
- addr &= 0xffff;
- addr += base;
+ down(&child->mm->context.sem);
+ if (unlikely((seg >> 3) >= child->mm->context.size))
+ addr = -1L; /* bogus selector, access would fault */
+ else {
+ desc = child->mm->context.ldt + seg;
+ base = ((desc[0] >> 16) |
+ ((desc[1] & 0xff) << 16) |
+ (desc[1] & 0xff000000));
+
+ /* 16-bit code segment? */
+ if (!((desc[1] >> 22) & 1))
+ addr &= 0xffff;
+ addr += base;
+ }
up(&child->mm->context.sem);
}
return addr;
@@ -358,17 +366,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
- case PTRACE_PEEKDATA: {
- unsigned long tmp;
- int copied;
-
- copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
- ret = -EIO;
- if (copied != sizeof(tmp))
- break;
- ret = put_user(tmp, datap);
+ case PTRACE_PEEKDATA:
+ ret = generic_ptrace_peekdata(child, addr, data);
break;
- }
/* read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
@@ -395,10 +395,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
/* when I and D space are separate, this will have to be fixed. */
case PTRACE_POKETEXT: /* write the word at location addr. */
case PTRACE_POKEDATA:
- ret = 0;
- if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
- break;
- ret = -EIO;
+ ret = generic_ptrace_pokedata(child, addr, data);
break;
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 5513f8d5b5b..0d796248866 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -113,6 +113,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 745",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
+ },
+ },
{ /* Handle problems with rebooting on Dell 2400's */
.callback = set_bios_reboot,
.ident = "Dell PowerEdge 2400",
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 698c24fe482..d474cd639bc 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -102,19 +102,10 @@ static unsigned int highmem_pages = -1;
/*
* Setup options
*/
-struct drive_info_struct { char dummy[32]; } drive_info;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
- defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-EXPORT_SYMBOL(drive_info);
-#endif
struct screen_info screen_info;
EXPORT_SYMBOL(screen_info);
struct apm_info apm_info;
EXPORT_SYMBOL(apm_info);
-struct sys_desc_table_struct {
- unsigned short length;
- unsigned char table[0];
-};
struct edid_info edid_info;
EXPORT_SYMBOL_GPL(edid_info);
struct ist_info ist_info;
@@ -134,7 +125,7 @@ unsigned long saved_videomode;
static char __initdata command_line[COMMAND_LINE_SIZE];
-unsigned char __initdata boot_params[PARAM_SIZE];
+struct boot_params __initdata boot_params;
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
struct edd edd;
@@ -282,18 +273,18 @@ unsigned long __init find_max_low_pfn(void)
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
MAXMEM>>20);
if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
else
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
max_pfn = MAXMEM_PFN;
#else /* !CONFIG_HIGHMEM */
-#ifndef CONFIG_X86_PAE
+#ifndef CONFIG_HIGHMEM64G
if (max_pfn > MAX_NONPAE_PFN) {
max_pfn = MAX_NONPAE_PFN;
printk(KERN_WARNING "Warning only 4GB will be used.\n");
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
}
-#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM64G */
#endif /* !CONFIG_HIGHMEM */
} else {
if (highmem_pages == -1)
@@ -475,7 +466,7 @@ void __init setup_bootmem_allocator(void)
*
* This should all compile down to nothing when NUMA is off.
*/
-void __init remapped_pgdat_init(void)
+static void __init remapped_pgdat_init(void)
{
int nid;
@@ -528,7 +519,6 @@ void __init setup_arch(char **cmdline_p)
#endif
ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
edid_info = EDID_INFO;
apm_info.bios = APM_BIOS_INFO;
@@ -611,6 +601,8 @@ void __init setup_arch(char **cmdline_p)
* NOTE: at this point the bootmem allocator is fully available.
*/
+ paravirt_post_allocator_init();
+
dmi_scan_machine();
#ifdef CONFIG_X86_GENERICARCH
@@ -648,6 +640,7 @@ void __init setup_arch(char **cmdline_p)
#endif
e820_register_memory();
+ e820_mark_nosave_regions();
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index d574e38f0f7..f5dd85656c1 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -199,6 +199,13 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
return eax;
badframe:
+ if (show_unhandled_signals && printk_ratelimit())
+ printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
+ " esp:%lx oeax:%lx\n",
+ current->pid > 1 ? KERN_INFO : KERN_EMERG,
+ current->comm, current->pid, frame, regs->eip,
+ regs->esp, regs->orig_eax);
+
force_sig(SIGSEGV, current);
return 0;
}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 6299c080f6e..2d35d850202 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -22,6 +22,7 @@
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#include <mach_apic.h>
/*
@@ -249,13 +250,13 @@ static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
/*
- * We cannot call mmdrop() because we are in interrupt context,
+ * We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
-static inline void leave_mm (unsigned long cpu)
+void leave_mm(unsigned long cpu)
{
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
BUG();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 0b2954534b8..e4f61d1c624 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -148,7 +148,7 @@ void __init smp_alloc_memory(void)
* a given CPU
*/
-static void __cpuinit smp_store_cpu_info(int id)
+void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
@@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu)
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
-static inline void
-set_cpu_sibling_map(int cpu)
+void __cpuinit set_cpu_sibling_map(int cpu)
{
int i;
struct cpuinfo_x86 *c = cpu_data;
@@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void)
}
#ifdef CONFIG_HOTPLUG_CPU
-static void
-remove_siblinginfo(int cpu)
+void remove_siblinginfo(int cpu)
{
int sibling;
struct cpuinfo_x86 *c = cpu_data;
diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c
index 1868ae18eb4..bbfe85a0f69 100644
--- a/arch/i386/kernel/smpcommon.c
+++ b/arch/i386/kernel/smpcommon.c
@@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
EXPORT_SYMBOL(smp_call_function);
/**
- * smp_call_function_single - Run a function on another CPU
+ * smp_call_function_single - Run a function on a specific CPU
* @cpu: The target CPU. Cannot be the calling CPU.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
@@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int ret;
int me = get_cpu();
if (cpu == me) {
- WARN_ON(1);
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
put_cpu();
- return -EBUSY;
+ return 0;
}
ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index bf6adce5226..8344c70adf6 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
.long sys_signalfd
.long sys_timerfd
.long sys_eventfd
+ .long sys_fallocate
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index ff4ee6f3326..6deb159d08e 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -336,7 +336,9 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
int in_gate_area(struct task_struct *task, unsigned long addr)
{
- return 0;
+ const struct vm_area_struct *vma = get_gate_vma(task);
+
+ return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a665df61f08..19a6c678d02 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void)
return retval;
}
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-int no_sync_cmos_clock;
-
-static void sync_cmos_clock(unsigned long dummy)
-{
- struct timeval now, next;
- int fail = 1;
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
- */
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
- return;
-
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
- }
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
+int update_persistent_clock(struct timespec now)
{
- if (!no_sync_cmos_clock)
- mod_timer(&sync_cmos_timer, jiffies + 1);
+ return set_rtc_mmss(now.tv_sec);
}
extern void (*late_time_init)(void);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 90da0575fcf..cfffe3dd9e8 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -41,6 +41,10 @@
#include <linux/mca.h>
#endif
+#if defined(CONFIG_EDAC)
+#include <linux/edac.h>
+#endif
+
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -148,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
stack = &dummy;
- if (task && task != current)
+ if (task != current)
stack = (unsigned long *)task->thread.esp;
}
@@ -207,6 +211,7 @@ static void print_trace_address(void *data, unsigned long addr)
{
printk("%s [<%08lx>] ", (char *)data, addr);
print_symbol("%s\n", addr);
+ touch_nmi_watchdog();
}
static struct stacktrace_ops print_trace_ops = {
@@ -390,7 +395,7 @@ void die(const char * str, struct pt_regs * regs, long err)
unsigned long esp;
unsigned short ss;
- report_bug(regs->eip);
+ report_bug(regs->eip, regs);
printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
#ifdef CONFIG_PREEMPT
@@ -433,6 +438,7 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(0);
die.lock_owner = -1;
+ add_taint(TAINT_DIE);
spin_unlock_irqrestore(&die.lock, flags);
if (!regs)
@@ -517,10 +523,12 @@ fastcall void do_##name(struct pt_regs * regs, long error_code) \
do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
}
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
fastcall void do_##name(struct pt_regs * regs, long error_code) \
{ \
siginfo_t info; \
+ if (irq) \
+ local_irq_enable(); \
info.si_signo = signr; \
info.si_errno = 0; \
info.si_code = sicode; \
@@ -560,13 +568,13 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
#endif
DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip)
+DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
fastcall void __kprobes do_general_protection(struct pt_regs * regs,
long error_code)
@@ -610,6 +618,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
current->thread.error_code = error_code;
current->thread.trap_no = 13;
+ if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
+ printk_ratelimit())
+ printk(KERN_INFO
+ "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
+ current->comm, current->pid,
+ regs->eip, regs->esp, error_code);
+
force_sig(SIGSEGV, current);
return;
@@ -635,6 +650,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
"CPU %d.\n", reason, smp_processor_id());
printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
+
+#if defined(CONFIG_EDAC)
+ if(edac_handler_set()) {
+ edac_atomic_assert_error();
+ return;
+ }
+#endif
+
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
@@ -752,6 +775,8 @@ static __kprobes void default_do_nmi(struct pt_regs * regs)
reassert_nmi();
}
+static int ignore_nmis;
+
fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
@@ -762,11 +787,24 @@ fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
++nmi_count(cpu);
- default_do_nmi(regs);
+ if (!ignore_nmis)
+ default_do_nmi(regs);
nmi_exit();
}
+void stop_nmi(void)
+{
+ acpi_nmi_disable();
+ ignore_nmis++;
+}
+
+void restart_nmi(void)
+{
+ ignore_nmis--;
+ acpi_nmi_enable();
+}
+
#ifdef CONFIG_KPROBES
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
@@ -1053,6 +1091,7 @@ asmlinkage void math_state_restore(void)
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
+EXPORT_SYMBOL_GPL(math_state_restore);
#ifndef CONFIG_MATH_EMULATION
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index ea63a30ca3e..debd7dbb415 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -27,6 +27,7 @@ static int tsc_enabled;
* an extra value to store the TSC freq
*/
unsigned int tsc_khz;
+EXPORT_SYMBOL_GPL(tsc_khz);
int tsc_disable;
@@ -58,10 +59,11 @@ __setup("notsc", tsc_setup);
*/
static int tsc_unstable;
-static inline int check_tsc_unstable(void)
+int check_tsc_unstable(void)
{
return tsc_unstable;
}
+EXPORT_SYMBOL_GPL(check_tsc_unstable);
/* Accellerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
@@ -84,7 +86,7 @@ static inline int check_tsc_unstable(void)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale __read_mostly;
+unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
@@ -93,15 +95,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz)
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
}
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
/*
* Scheduler clock - returns current time in nanosec units.
*/
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
{
unsigned long long this_offset;
@@ -118,12 +115,24 @@ unsigned long long sched_clock(void)
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
- get_scheduled_cycles(this_offset);
+ rdtscll(this_offset);
/* return the value in ns */
return cycles_2_ns(this_offset);
}
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#else
+unsigned long long sched_clock(void)
+ __attribute__((alias("native_sched_clock")));
+#endif
+
unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S
deleted file mode 100644
index f1d1eacf4ab..00000000000
--- a/arch/i386/kernel/verify_cpu.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Check if CPU has some minimum CPUID bits
- This runs in 16bit mode so that the caller can still use the BIOS
- to output errors on the screen */
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
-
-verify_cpu:
- pushfl # Save caller passed flags
- pushl $0 # Kill any dangerous flags
- popfl
-
-#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4
- pushfl
- pop %eax
- orl $(1<<18),%eax # try setting AC
- push %eax
- popfl
- pushfl
- popl %eax
- testl $(1<<18),%eax
- jz bad
-#endif
-#if REQUIRED_MASK1 != 0
- pushfl # standard way to check for cpuid
- popl %eax
- movl %eax,%ebx
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- cmpl %eax,%ebx
- pushfl # standard way to check for cpuid
- popl %eax
- movl %eax,%ebx
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- cmpl %eax,%ebx
- jz bad # REQUIRED_MASK1 != 0 requires CPUID
-
- movl $0x0,%eax # See if cpuid 1 is implemented
- cpuid
- cmpl $0x1,%eax
- jb bad # no cpuid 1
-
-#if REQUIRED_MASK1 & NEED_CMPXCHG64
- /* Some VIA C3s need magic MSRs to enable CX64. Do this here */
- cmpl $0x746e6543,%ebx # Cent
- jne 1f
- cmpl $0x48727561,%edx # aurH
- jne 1f
- cmpl $0x736c7561,%ecx # auls
- jne 1f
- movl $1,%eax # check model
- cpuid
- movl %eax,%ebx
- shr $8,%ebx
- andl $0xf,%ebx
- cmp $6,%ebx # check family == 6
- jne 1f
- shr $4,%eax
- andl $0xf,%eax
- cmpl $6,%eax # check model >= 6
- jb 1f
- # assume models >= 6 all support this MSR
- movl $MSR_VIA_FCR,%ecx
- rdmsr
- orl $((1<<1)|(1<<7)),%eax # enable CMPXCHG64 and PGE
- wrmsr
-1:
-#endif
- movl $0x1,%eax # Does the cpu have what it takes
- cpuid
-
-#if CONFIG_X86_MINIMUM_CPU_MODEL > 4
-#error add proper model checking here
-#endif
-
- andl $REQUIRED_MASK1,%edx
- xorl $REQUIRED_MASK1,%edx
- jnz bad
-#endif /* REQUIRED_MASK1 */
-
- popfl
- xor %eax,%eax
- ret
-
-bad:
- popfl
- movl $1,%eax
- ret
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index c12720d7cbc..72042bb7ec9 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
}
#endif
-static void vmi_allocate_pt(u32 pfn)
+static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -891,7 +891,7 @@ static inline int __init activate_vmi(void)
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
#endif
- paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+ paravirt_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
/* We have true wallclock functions; disable CMOS clock sync */
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
index 26a37f8a876..b1b5ab08b26 100644
--- a/arch/i386/kernel/vmiclock.c
+++ b/arch/i386/kernel/vmiclock.c
@@ -32,6 +32,7 @@
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/timer.h>
+#include <asm/i8253.h>
#include <irq_vectors.h>
#include "io_ports.h"
@@ -64,10 +65,10 @@ int vmi_set_wallclock(unsigned long now)
return 0;
}
-/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
-unsigned long long vmi_get_sched_cycles(void)
+/* paravirt_ops.sched_clock = vmi_sched_clock */
+unsigned long long vmi_sched_clock(void)
{
- return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
+ return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
}
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
@@ -142,6 +143,7 @@ static void vmi_timer_set_mode(enum clock_event_mode mode,
switch (mode) {
case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
break;
case CLOCK_EVT_MODE_PERIODIC:
cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index aa87b06c7c8..7d72cce0052 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -60,7 +60,9 @@ SECTIONS
__stop___ex_table = .;
}
- BUG_TABLE
+ NOTES :text :note
+
+ BUG_TABLE :text
. = ALIGN(4);
.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
@@ -88,6 +90,7 @@ SECTIONS
. = ALIGN(4096);
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+ *(.data.page_aligned)
*(.data.idt)
}
@@ -180,6 +183,7 @@ SECTIONS
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu)
+ *(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
. = ALIGN(4096);
@@ -206,6 +210,4 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
-
- NOTES
}
diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S
index d4b5be4f3d5..07c0daf7823 100644
--- a/arch/i386/kernel/vsyscall-note.S
+++ b/arch/i386/kernel/vsyscall-note.S
@@ -3,23 +3,43 @@
* Here we can supply some information useful to userland.
*/
-#include <linux/uts.h>
#include <linux/version.h>
+#include <linux/elfnote.h>
-#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
- .section name, flags; \
- .balign 4; \
- .long 1f - 0f; /* name length */ \
- .long 3f - 2f; /* data length */ \
- .long type; /* note type */ \
-0: .asciz vendor; /* vendor name */ \
-1: .balign 4; \
-2:
+/* Ideally this would use UTS_NAME, but using a quoted string here
+ doesn't work. Remember to change this when changing the
+ kernel's name. */
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
-#define ASM_ELF_NOTE_END \
-3: .balign 4; /* pad out section */ \
- .previous
+#ifdef CONFIG_XEN
+/*
+ * Add a special note telling glibc's dynamic linker a fake hardware
+ * flavor that it will use to choose the search path for libraries in the
+ * same way it uses real hardware capabilities like "mmx".
+ * We supply "nosegneg" as the fake capability, to indicate that we
+ * do not like negative offsets in instructions using segment overrides,
+ * since we implement those inefficiently. This makes it possible to
+ * install libraries optimized to avoid those access patterns in someplace
+ * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
+ * corresponding to the bits here is needed to make ldconfig work right.
+ * It should contain:
+ * hwcap 1 nosegneg
+ * to match the mapping of bit to name that we give here.
+ *
+ * At runtime, the fake hardware feature will be considered to be present
+ * if its bit is set in the mask word. So, we start with the mask 0, and
+ * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
+ */
- ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
- .long LINUX_VERSION_CODE
- ASM_ELF_NOTE_END
+#include "../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
+
+ .globl VDSO_NOTE_MASK
+ELFNOTE_START(GNU, 2, "a")
+ .long 1 /* ncaps */
+VDSO_NOTE_MASK:
+ .long 0 /* mask */
+ .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
+ELFNOTE_END
+#endif