From 23d6f82bd1f07886b3a974c5193baa715475dd37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:23 +0200 Subject: i386: move kernel/acpi Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/Makefile | 5 + arch/x86/kernel/acpi/Makefile_32 | 10 + arch/x86/kernel/acpi/boot.c | 1326 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/acpi/cstate.c | 164 +++++ arch/x86/kernel/acpi/earlyquirk_32.c | 84 +++ arch/x86/kernel/acpi/processor.c | 75 ++ arch/x86/kernel/acpi/sleep_32.c | 110 +++ arch/x86/kernel/acpi/wakeup_32.S | 321 ++++++++ 8 files changed, 2095 insertions(+) create mode 100644 arch/x86/kernel/acpi/Makefile create mode 100644 arch/x86/kernel/acpi/Makefile_32 create mode 100644 arch/x86/kernel/acpi/boot.c create mode 100644 arch/x86/kernel/acpi/cstate.c create mode 100644 arch/x86/kernel/acpi/earlyquirk_32.c create mode 100644 arch/x86/kernel/acpi/processor.c create mode 100644 arch/x86/kernel/acpi/sleep_32.c create mode 100644 arch/x86/kernel/acpi/wakeup_32.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile new file mode 100644 index 00000000000..dd4eb7cef2b --- /dev/null +++ b/arch/x86/kernel/acpi/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/kernel/acpi/Makefile_32 +else +include ${srctree}/arch/x86_64/kernel/acpi/Makefile_64 +endif diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32 new file mode 100644 index 00000000000..a4852a2e919 --- /dev/null +++ b/arch/x86/kernel/acpi/Makefile_32 @@ -0,0 +1,10 @@ +obj-$(CONFIG_ACPI) += boot.o +ifneq ($(CONFIG_PCI),) +obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o +endif +obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o + +ifneq ($(CONFIG_ACPI_PROCESSOR),) +obj-y += cstate.o processor.o +endif + diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c new file mode 100644 index 00000000000..cacdd883bf2 --- /dev/null +++ b/arch/x86/kernel/acpi/boot.c @@ -0,0 +1,1326 @@ +/* + * boot.c - Architecture-Specific Low-Level ACPI Boot Support + * + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2001 Jun Nakajima + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static int __initdata acpi_force = 0; + +#ifdef CONFIG_ACPI +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); + +#ifdef CONFIG_X86_64 + +#include + +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + +#else /* X86 */ + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* X86 */ + +#define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ + ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) + +#define PREFIX "ACPI: " + +int acpi_noirq; /* skip ACPI IRQ initialization */ +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ +int acpi_ht __initdata = 1; /* enable HT */ + +int acpi_lapic; +int acpi_ioapic; +int acpi_strict; +EXPORT_SYMBOL(acpi_strict); + +u8 acpi_sci_flags __initdata; +int acpi_sci_override_gsi __initdata; +int acpi_skip_timer_override __initdata; +int acpi_use_timer_override __initdata; + +#ifdef CONFIG_X86_LOCAL_APIC +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +#endif + +#ifndef __HAVE_ARCH_CMPXCHG +#warning ACPI uses CMPXCHG, i486 and later hardware +#endif + +/* -------------------------------------------------------------------------- + Boot-time Configuration + -------------------------------------------------------------------------- */ + +/* + * The default interrupt routing model is PIC (8259). This gets + * overriden if IOAPICs are enumerated (below). + */ +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; + +#ifdef CONFIG_X86_64 + +/* rely on all ACPI tables being in the direct mapping */ +char *__acpi_map_table(unsigned long phys_addr, unsigned long size) +{ + if (!phys_addr || !size) + return NULL; + + if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) + return __va(phys_addr); + + return NULL; +} + +#else + +/* + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, + * to map the target physical address. The problem is that set_fixmap() + * provides a single page, and it is possible that the page is not + * sufficient. + * By using this area, we can map up to MAX_IO_APICS pages temporarily, + * i.e. until the next __va_range() call. + * + * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* + * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and + * count idx down while incrementing the phys address. + */ +char *__acpi_map_table(unsigned long phys, unsigned long size) +{ + unsigned long base, offset, mapped_size; + int idx; + + if (phys + size < 8 * 1024 * 1024) + return __va(phys); + + offset = phys & (PAGE_SIZE - 1); + mapped_size = PAGE_SIZE - offset; + set_fixmap(FIX_ACPI_END, phys); + base = fix_to_virt(FIX_ACPI_END); + + /* + * Most cases can be covered by the below. + */ + idx = FIX_ACPI_END; + while (mapped_size < size) { + if (--idx < FIX_ACPI_BEGIN) + return NULL; /* cannot handle this */ + phys += PAGE_SIZE; + set_fixmap(idx, phys); + mapped_size += PAGE_SIZE; + } + + return ((unsigned char *)base + offset); +} +#endif + +#ifdef CONFIG_PCI_MMCONFIG +/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +struct acpi_mcfg_allocation *pci_mmcfg_config; +int pci_mmcfg_config_num; + +int __init acpi_parse_mcfg(struct acpi_table_header *header) +{ + struct acpi_table_mcfg *mcfg; + unsigned long i; + int config_size; + + if (!header) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *)header; + + /* how many config structures do we have */ + pci_mmcfg_config_num = 0; + i = header->length - sizeof(struct acpi_table_mcfg); + while (i >= sizeof(struct acpi_mcfg_allocation)) { + ++pci_mmcfg_config_num; + i -= sizeof(struct acpi_mcfg_allocation); + }; + if (pci_mmcfg_config_num == 0) { + printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); + return -ENODEV; + } + + config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); + pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); + if (!pci_mmcfg_config) { + printk(KERN_WARNING PREFIX + "No memory for MCFG config tables\n"); + return -ENOMEM; + } + + memcpy(pci_mmcfg_config, &mcfg[1], config_size); + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + printk(KERN_ERR PREFIX + "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config_num = 0; + return -ENODEV; + } + } + + return 0; +} +#endif /* CONFIG_PCI_MMCONFIG */ + +#ifdef CONFIG_X86_LOCAL_APIC +static int __init acpi_parse_madt(struct acpi_table_header *table) +{ + struct acpi_table_madt *madt = NULL; + + if (!cpu_has_apic) + return -EINVAL; + + madt = (struct acpi_table_madt *)table; + if (!madt) { + printk(KERN_WARNING PREFIX "Unable to map MADT\n"); + return -ENODEV; + } + + if (madt->address) { + acpi_lapic_addr = (u64) madt->address; + + printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", + madt->address); + } + + acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + + return 0; +} + +static int __init +acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size + * cpus_possible_map more accurately, to permit + * to not preallocating memory for all NR_CPUS + * when we use CPU hotplug. + */ + mp_register_lapic(processor->id, /* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + + return 0; +} + +static int __init +acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; + + lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header; + + if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) + return -EINVAL; + + acpi_lapic_addr = lapic_addr_ovr->address; + + return 0; +} + +static int __init +acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; + + lapic_nmi = (struct acpi_madt_local_apic_nmi *)header; + + if (BAD_MADT_ENTRY(lapic_nmi, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (lapic_nmi->lint != 1) + printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); + + return 0; +} + +#endif /*CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC + +static int __init +acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_io_apic *ioapic = NULL; + + ioapic = (struct acpi_madt_io_apic *)header; + + if (BAD_MADT_ENTRY(ioapic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_register_ioapic(ioapic->id, + ioapic->address, ioapic->global_irq_base); + + return 0; +} + +/* + * Parse Interrupt Source Override for the ACPI SCI + */ +static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +{ + if (trigger == 0) /* compatible SCI trigger is level */ + trigger = 3; + + if (polarity == 0) /* compatible SCI polarity is low */ + polarity = 3; + + /* Command-line over-ride via acpi_sci= */ + if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) + trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2; + + if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) + polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; + + /* + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 + * If GSI is < 16, this will update its flags, + * else it will create a new mp_irqs[] entry. + */ + mp_override_legacy_irq(gsi, polarity, trigger, gsi); + + /* + * stash over-ride to indicate we've been here + * and for later update of acpi_gbl_FADT + */ + acpi_sci_override_gsi = gsi; + return; +} + +static int __init +acpi_parse_int_src_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_override *intsrc = NULL; + + intsrc = (struct acpi_madt_interrupt_override *)header; + + if (BAD_MADT_ENTRY(intsrc, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { + acpi_sci_ioapic_setup(intsrc->global_irq, + intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, + (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); + return 0; + } + + if (acpi_skip_timer_override && + intsrc->source_irq == 0 && intsrc->global_irq == 2) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; + } + + mp_override_legacy_irq(intsrc->source_irq, + intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, + (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, + intsrc->global_irq); + + return 0; +} + +static int __init +acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_nmi_source *nmi_src = NULL; + + nmi_src = (struct acpi_madt_nmi_source *)header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* TBD: Support nimsrc entries? */ + + return 0; +} + +#endif /* CONFIG_X86_IO_APIC */ + +/* + * acpi_pic_sci_set_trigger() + * + * use ELCR to set PIC-mode trigger type for SCI + * + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's + * it may require Edge Trigger -- use "acpi_sci=edge" + * + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers + * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) + */ + +void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) +{ + unsigned int mask = 1 << irq; + unsigned int old, new; + + /* Real old ELCR mask */ + old = inb(0x4d0) | (inb(0x4d1) << 8); + + /* + * If we use ACPI to set PCI irq's, then we should clear ELCR + * since we will set it correctly as we enable the PCI irq + * routing. + */ + new = acpi_noirq ? old : 0; + + /* + * Update SCI information in the ELCR, it isn't in the PCI + * routing tables.. + */ + switch (trigger) { + case 1: /* Edge - clear */ + new &= ~mask; + break; + case 3: /* Level - set */ + new |= mask; + break; + } + + if (old == new) + return; + + printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); + outb(new, 0x4d0); + outb(new >> 8, 0x4d1); +} + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +{ + *irq = gsi; + return 0; +} + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(u32 gsi, int triggering, int polarity) +{ + unsigned int irq; + unsigned int plat_gsi = gsi; + +#ifdef CONFIG_PCI + /* + * Make sure all (legacy) PCI IRQs are set as level-triggered. + */ + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { + extern void eisa_set_level_irq(unsigned int irq); + + if (triggering == ACPI_LEVEL_SENSITIVE) + eisa_set_level_irq(gsi); + } +#endif + +#ifdef CONFIG_X86_IO_APIC + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { + plat_gsi = mp_register_gsi(gsi, triggering, polarity); + } +#endif + acpi_gsi_to_irq(plat_gsi, &irq); + return irq; +} + +EXPORT_SYMBOL(acpi_register_gsi); + +/* + * ACPI based hotplug support for CPU + */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_lsapic(acpi_handle handle, int *pcpu) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_madt_local_apic *lapic; + cpumask_t tmp_map, new_map; + u8 physid; + int cpu; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return -EINVAL; + + if (!buffer.length || !buffer.pointer) + return -EINVAL; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*lapic)) { + kfree(buffer.pointer); + return -EINVAL; + } + + lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; + + if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || + !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { + kfree(buffer.pointer); + return -EINVAL; + } + + physid = lapic->id; + + kfree(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + tmp_map = cpu_present_map; + mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + + /* + * If mp_register_lapic successfully generates a new logical cpu + * number, then the following will get us exactly what was mapped + */ + cpus_andnot(new_map, cpu_present_map, tmp_map); + if (cpus_empty(new_map)) { + printk ("Unable to map lapic to logical cpu number\n"); + return -EINVAL; + } + + cpu = first_cpu(new_map); + + *pcpu = cpu; + return 0; +} + +EXPORT_SYMBOL(acpi_map_lsapic); + +int acpi_unmap_lsapic(int cpu) +{ + x86_cpu_to_apicid[cpu] = -1; + cpu_clear(cpu, cpu_present_map); + num_processors--; + + return (0); +} + +EXPORT_SYMBOL(acpi_unmap_lsapic); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +{ + /* TBD */ + return -EINVAL; +} + +EXPORT_SYMBOL(acpi_register_ioapic); + +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +{ + /* TBD */ + return -EINVAL; +} + +EXPORT_SYMBOL(acpi_unregister_ioapic); + +static unsigned long __init +acpi_scan_rsdp(unsigned long start, unsigned long length) +{ + unsigned long offset = 0; + unsigned long sig_len = sizeof("RSD PTR ") - 1; + + /* + * Scan all 16-byte boundaries of the physical memory region for the + * RSDP signature. + */ + for (offset = 0; offset < length; offset += 16) { + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) + continue; + return (start + offset); + } + + return 0; +} + +static int __init acpi_parse_sbf(struct acpi_table_header *table) +{ + struct acpi_table_boot *sb; + + sb = (struct acpi_table_boot *)table; + if (!sb) { + printk(KERN_WARNING PREFIX "Unable to map SBF\n"); + return -ENODEV; + } + + sbf_port = sb->cmos_index; /* Save CMOS port */ + + return 0; +} + +#ifdef CONFIG_HPET_TIMER +#include + +static struct __initdata resource *hpet_res; + +static int __init acpi_parse_hpet(struct acpi_table_header *table) +{ + struct acpi_table_hpet *hpet_tbl; + + hpet_tbl = (struct acpi_table_hpet *)table; + if (!hpet_tbl) { + printk(KERN_WARNING PREFIX "Unable to map HPET\n"); + return -ENODEV; + } + + if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { + printk(KERN_WARNING PREFIX "HPET timers must be located in " + "memory.\n"); + return -1; + } + + hpet_address = hpet_tbl->address.address; + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); + + /* + * Allocate and initialize the HPET firmware resource for adding into + * the resource tree during the lateinit timeframe. + */ +#define HPET_RESOURCE_NAME_SIZE 9 + hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + + if (!hpet_res) + return 0; + + memset(hpet_res, 0, sizeof(*hpet_res)); + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", + hpet_tbl->sequence); + + hpet_res->start = hpet_address; + hpet_res->end = hpet_address + (1 * 1024) - 1; + + return 0; +} + +/* + * hpet_insert_resource inserts the HPET resources used into the resource + * tree. + */ +static __init int hpet_insert_resource(void) +{ + if (!hpet_res) + return 1; + + return insert_resource(&iomem_resource, hpet_res); +} + +late_initcall(hpet_insert_resource); + +#else +#define acpi_parse_hpet NULL +#endif + +static int __init acpi_parse_fadt(struct acpi_table_header *table) +{ + +#ifdef CONFIG_X86_PM_TIMER + /* detect the location of the ACPI PM Timer */ + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (acpi_gbl_FADT.xpm_timer_block.space_id != + ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!pmtmr_ioport) + pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", + pmtmr_ioport); +#endif + return 0; +} + +unsigned long __init acpi_find_rsdp(void) +{ + unsigned long rsdp_phys = 0; + + if (efi_enabled) { + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + return efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + return efi.acpi; + } + /* + * Scan memory looking for the RSDP signature. First search EBDA (low + * memory) paragraphs and then search upper memory (E0000-FFFFF). + */ + rsdp_phys = acpi_scan_rsdp(0, 0x400); + if (!rsdp_phys) + rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); + + return rsdp_phys; +} + +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Parse LAPIC entries in MADT + * returns 0 on success, < 0 on error + */ +static int __init acpi_parse_madt_lapic_entries(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + mp_register_lapic_address(acpi_lapic_addr); + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, + MAX_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No LAPIC entries present\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return -ENODEV; + } else if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + return 0; +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +/* + * Parse IOAPIC related entries in MADT + * returns 0 on success, < 0 on error + */ +static int __init acpi_parse_madt_ioapic_entries(void) +{ + int count; + + /* + * ACPI interpreter is required to complete interrupt setup, + * so if it is off, don't enumerate the io-apics with ACPI. + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ + if (acpi_disabled || acpi_noirq) { + return -ENODEV; + } + + if (!cpu_has_apic) + return -ENODEV; + + /* + * if "noapic" boot option, don't look for IO-APICs + */ + if (skip_ioapic_setup) { + printk(KERN_INFO PREFIX "Skipping IOAPIC probe " + "due to 'noapic' option.\n"); + return -ENODEV; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, + MAX_IO_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); + return -ENODEV; + } else if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); + return count; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + /* + * If BIOS did not supply an INT_SRC_OVR for the SCI + * pretend we got one so we can set the SCI flags. + */ + if (!acpi_sci_override_gsi) + acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); + + /* Fill in identity legacy mapings where no override */ + mp_config_acpi_legacy_irqs(); + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + return 0; +} +#else +static inline int acpi_parse_madt_ioapic_entries(void) +{ + return -1; +} +#endif /* !CONFIG_X86_IO_APIC */ + +static void __init acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = acpi_parse_madt_lapic_entries(); + if (!error) { + acpi_lapic = 1; + +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif + /* + * Parse MADT IO-APIC entries + */ + error = acpi_parse_madt_ioapic_entries(); + if (!error) { + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + acpi_irq_balance_set(NULL); + acpi_ioapic = 1; + + smp_found_config = 1; + setup_apic_routing(); + } + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif + return; +} + +#ifdef __i386__ + +static int __init disable_acpi_irq(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", + d->ident); + acpi_noirq_set(); + } + return 0; +} + +static int __init disable_acpi_pci(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", + d->ident); + acpi_disable_pci(); + } + return 0; +} + +static int __init dmi_disable_acpi(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); + disable_acpi(); + } else { + printk(KERN_NOTICE + "Warning: DMI blacklist says broken, but acpi forced\n"); + } + return 0; +} + +/* + * Limit ACPI to CPU enumeration for HT + */ +static int __init force_acpi_ht(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", + d->ident); + disable_acpi(); + acpi_ht = 1; + } else { + printk(KERN_NOTICE + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + } + return 0; +} + +/* + * If your system is blacklisted here, but you find that acpi=force + * works for you, please contact acpi-devel@sourceforge.net + */ +static struct dmi_system_id __initdata acpi_dmi_table[] = { + /* + * Boxes that need ACPI disabled + */ + { + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, + + /* + * Boxes that need acpi=ht + */ + { + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + + /* + * Boxes that need ACPI PCI IRQ routing disabled + */ + { + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, ""), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, + { + /* + * Latest BIOS for IBM 600E (1.16) has bad pcinum + * for LPC bridge, which is needed for the PCI + * interrupt links to work. DSDT fix is in bug 5966. + * 2645, 2646 model numbers are shared with 600/600E/600X + */ + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2645", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2645"), + }, + }, + { + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2646", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2646"), + }, + }, + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, + { + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + {} +}; + +#endif /* __i386__ */ + +/* + * acpi_boot_table_init() and acpi_boot_init() + * called from setup_arch(), always. + * 1. checksums all tables + * 2. enumerates lapics + * 3. enumerates io-apics + * + * acpi_table_init() is separate to allow reading SRAT without + * other side effects. + * + * side effects of acpi_boot_init: + * acpi_lapic = 1 if LAPIC found + * acpi_ioapic = 1 if IOAPIC found + * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; + * if acpi_blacklisted() acpi_disabled = 1; + * acpi_irq_model=... + * ... + * + * return value: (currently ignored) + * 0: success + * !0: failure + */ + +int __init acpi_boot_table_init(void) +{ + int error; + +#ifdef __i386__ + dmi_check_system(acpi_dmi_table); +#endif + + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Initialize the ACPI boot-time table parser. + */ + error = acpi_table_init(); + if (error) { + disable_acpi(); + return error; + } + + acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * blacklist may disable ACPI entirely + */ + error = acpi_blacklisted(); + if (error) { + if (acpi_force) { + printk(KERN_WARNING PREFIX "acpi=force override\n"); + } else { + printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); + disable_acpi(); + return error; + } + } + + return 0; +} + +int __init acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * set sci_int and PM timer address + */ + acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + acpi_process_madt(); + + acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + + return 0; +} + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); + +static int __init parse_acpi_use_timer_override(char *arg) +{ + acpi_use_timer_override = 1; + return 0; +} +early_param("acpi_use_timer_override", parse_acpi_use_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE | + (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "level")) + acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL | + (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "high")) + acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH | + (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else if (!strcmp(s, "low")) + acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW | + (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); + +int __acpi_acquire_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return (new < 3) ? -1 : 0; +} + +int __acpi_release_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = old & ~0x3; + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return old & 0x1; +} diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c new file mode 100644 index 00000000000..2d39f55d29a --- /dev/null +++ b/arch/x86/kernel/acpi/cstate.c @@ -0,0 +1,164 @@ +/* + * arch/i386/kernel/acpi/cstate.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi + * - Added _PDC for SMP C-states on Intel CPUs + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Initialize bm_flags based on the CPU cache properties + * On SMP it depends on cache configuration + * - When cache is not shared among all CPUs, we flush cache + * before entering C3. + * - When cache is shared among all CPUs, we use bm_check + * mechanism as in UP case + * + * This routine is called only after all the CPUs are online + */ +void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, + unsigned int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + + flags->bm_check = 0; + if (num_online_cpus() == 1) + flags->bm_check = 1; + else if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * Today all CPUs that support C3 share cache. + * TBD: This needs to look at cache shared map, once + * multi-core detection patch makes to the base. + */ + flags->bm_check = 1; + } +} +EXPORT_SYMBOL(acpi_processor_power_init_bm_check); + +/* The code below handles cstate entry with monitor-mwait pair on Intel*/ + +struct cstate_entry { + struct { + unsigned int eax; + unsigned int ecx; + } states[ACPI_PROCESSOR_MAX_POWER]; +}; +static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ + +static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; + +#define MWAIT_SUBSTATE_MASK (0xf) +#define MWAIT_SUBSTATE_SIZE (4) + +#define CPUID_MWAIT_LEAF (5) +#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) +#define CPUID5_ECX_INTERRUPT_BREAK (0x2) + +#define MWAIT_ECX_INTERRUPT_BREAK (0x1) + +#define NATIVE_CSTATE_BEYOND_HALT (2) + +int acpi_processor_ffh_cstate_probe(unsigned int cpu, + struct acpi_processor_cx *cx, struct acpi_power_register *reg) +{ + struct cstate_entry *percpu_entry; + struct cpuinfo_x86 *c = cpu_data + cpu; + + cpumask_t saved_mask; + int retval; + unsigned int eax, ebx, ecx, edx; + unsigned int edx_part; + unsigned int cstate_type; /* C-state type and not ACPI C-state type */ + unsigned int num_cstate_subtype; + + if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF ) + return -1; + + if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) + return -1; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + percpu_entry->states[cx->index].eax = 0; + percpu_entry->states[cx->index].ecx = 0; + + /* Make sure we are running on right CPU */ + saved_mask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + return -1; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + /* Check whether this particular cx_type (in CST) is supported or not */ + cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; + edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); + num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; + + retval = 0; + if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { + retval = -1; + goto out; + } + + /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { + retval = -1; + goto out; + } + percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; + + /* Use the hint in CST */ + percpu_entry->states[cx->index].eax = cx->address; + + if (!mwait_supported[cstate_type]) { + mwait_supported[cstate_type] = 1; + printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " + "state\n", cx->type); + } + +out: + set_cpus_allowed(current, saved_mask); + return retval; +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); + +void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) +{ + unsigned int cpu = smp_processor_id(); + struct cstate_entry *percpu_entry; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + mwait_idle_with_hints(percpu_entry->states[cx->index].eax, + percpu_entry->states[cx->index].ecx); +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); + +static int __init ffh_cstate_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + if (c->x86_vendor != X86_VENDOR_INTEL) + return -1; + + cpu_cstate_entry = alloc_percpu(struct cstate_entry); + return 0; +} + +static void __exit ffh_cstate_exit(void) +{ + free_percpu(cpu_cstate_entry); + cpu_cstate_entry = NULL; +} + +arch_initcall(ffh_cstate_init); +__exitcall(ffh_cstate_exit); diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c new file mode 100644 index 00000000000..23f78efc577 --- /dev/null +++ b/arch/x86/kernel/acpi/earlyquirk_32.c @@ -0,0 +1,84 @@ +/* + * Do early PCI probing for bug detection when the main PCI subsystem is + * not up yet. + */ +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_ACPI + +static int __init nvidia_hpet_check(struct acpi_table_header *header) +{ + return 0; +} +#endif + +static int __init check_bridge(int vendor, int device) +{ +#ifdef CONFIG_ACPI + static int warned; + /* According to Nvidia all timer overrides are bogus unless HPET + is enabled. */ + if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { + if (!warned && acpi_table_parse(ACPI_SIG_HPET, + nvidia_hpet_check)) { + warned = 1; + acpi_skip_timer_override = 1; + printk(KERN_INFO "Nvidia board " + "detected. Ignoring ACPI " + "timer override.\n"); + printk(KERN_INFO "If you got timer trouble " + "try acpi_use_timer_override\n"); + + } + } +#endif + if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO "ATI board detected. Disabling timer routing " + "over 8254.\n"); + } + return 0; +} + +void __init check_acpi_pci(void) +{ + int num, slot, func; + + /* Assume the machine supports type 1. If not it will + always read ffffffff and should not have any side effect. + Actually a few buggy systems can machine check. Allow the user + to disable it by command line option at least -AK */ + if (!early_pci_allowed()) + return; + + /* Poor man's PCI discovery */ + for (num = 0; num < 32; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u32 vendor; + class = read_pci_config(num, slot, func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + break; + + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + continue; + + vendor = read_pci_config(num, slot, func, + PCI_VENDOR_ID); + + if (check_bridge(vendor & 0xffff, vendor >> 16)) + return; + } + + } + } +} diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c new file mode 100644 index 00000000000..b54fded4983 --- /dev/null +++ b/arch/x86/kernel/acpi/processor.c @@ -0,0 +1,75 @@ +/* + * arch/i386/kernel/acpi/processor.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi + * - Added _PDC for platforms with Intel CPUs + */ + +#include +#include +#include +#include + +#include +#include + +static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + return; +} + +/* Initialize _PDC data based on the CPU vendor */ +void arch_acpi_processor_init_pdc(struct acpi_processor *pr) +{ + unsigned int cpu = pr->id; + struct cpuinfo_x86 *c = cpu_data + cpu; + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + init_intel_pdc(pr, c); + + return; +} + +EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c new file mode 100644 index 00000000000..c42b5ab49de --- /dev/null +++ b/arch/x86/kernel/acpi/sleep_32.c @@ -0,0 +1,110 @@ +/* + * sleep.c - x86-specific ACPI sleep support. + * + * Copyright (C) 2001-2003 Patrick Mochel + * Copyright (C) 2001-2003 Pavel Machek + */ + +#include +#include +#include +#include + +#include + +/* address in low memory of the wakeup routine. */ +unsigned long acpi_wakeup_address = 0; +unsigned long acpi_realmode_flags; +extern char wakeup_start, wakeup_end; + +extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + +/** + * acpi_save_state_mem - save kernel state + * + * Create an identity mapped page table and copy the wakeup routine to + * low memory. + */ +int acpi_save_state_mem(void) +{ + if (!acpi_wakeup_address) + return 1; + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); + acpi_copy_wakeup_routine(acpi_wakeup_address); + + return 0; +} + +/* + * acpi_restore_state - undo effects of acpi_save_state_mem + */ +void acpi_restore_state_mem(void) +{ +} + +/** + * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * + * We allocate a page from the first 1MB of memory for the wakeup + * routine for when we come back from a sleep state. The + * runtime allocator allows specification of <16MB pages, but not + * <1MB pages. + */ +void __init acpi_reserve_bootmem(void) +{ + if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { + printk(KERN_ERR + "ACPI: Wakeup code way too big, S3 disabled.\n"); + return; + } + + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); + if (!acpi_wakeup_address) + printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); +} + +static int __init acpi_sleep_setup(char *str) +{ + while ((str != NULL) && (*str != '\0')) { + if (strncmp(str, "s3_bios", 7) == 0) + acpi_realmode_flags |= 1; + if (strncmp(str, "s3_mode", 7) == 0) + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; + str = strchr(str, ','); + if (str != NULL) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_sleep=", acpi_sleep_setup); + +/* Ouch, we want to delete this. We already have better version in userspace, in + s2ram from suspend.sf.net project */ +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_realmode_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + {} +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S new file mode 100644 index 00000000000..f22ba8534d2 --- /dev/null +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -0,0 +1,321 @@ +.text +#include +#include +#include + +# +# wakeup_code runs in real mode, and at unknown address (determined at run-time). +# Therefore it must only use relative jumps/calls. +# +# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled +# +# If physical address of wakeup_code is 0x12345, BIOS should call us with +# cs = 0x1234, eip = 0x05 +# + +#define BEEP \ + inb $97, %al; \ + outb %al, $0x80; \ + movb $3, %al; \ + outb %al, $97; \ + outb %al, $0x80; \ + movb $-74, %al; \ + outb %al, $67; \ + outb %al, $0x80; \ + movb $-119, %al; \ + outb %al, $66; \ + outb %al, $0x80; \ + movb $15, %al; \ + outb %al, $66; + +ALIGN + .align 4096 +ENTRY(wakeup_start) +wakeup_code: + wakeup_code_start = . + .code16 + + movw $0xb800, %ax + movw %ax,%fs + movw $0x0e00 + 'L', %fs:(0x10) + + cli + cld + + # setup data segment + movw %cs, %ax + movw %ax, %ds # Make ds:0 point to wakeup_start + movw %ax, %ss + + testl $4, realmode_flags - wakeup_code + jz 1f + BEEP +1: + mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board + movw $0x0e00 + 'S', %fs:(0x12) + + pushl $0 # Kill any dangerous flags + popfl + + movl real_magic - wakeup_code, %eax + cmpl $0x12345678, %eax + jne bogus_real_magic + + testl $1, realmode_flags - wakeup_code + jz 1f + lcall $0xc000,$3 + movw %cs, %ax + movw %ax, %ds # Bios might have played with that + movw %ax, %ss +1: + + testl $2, realmode_flags - wakeup_code + jz 1f + mov video_mode - wakeup_code, %ax + call mode_set +1: + + # set up page table + movl $swsusp_pg_dir-__PAGE_OFFSET, %eax + movl %eax, %cr3 + + testl $1, real_efer_save_restore - wakeup_code + jz 4f + # restore efer setting + movl real_save_efer_edx - wakeup_code, %edx + movl real_save_efer_eax - wakeup_code, %eax + mov $0xc0000080, %ecx + wrmsr +4: + # make sure %cr4 is set correctly (features, etc) + movl real_save_cr4 - wakeup_code, %eax + movl %eax, %cr4 + movw $0xb800, %ax + movw %ax,%fs + movw $0x0e00 + 'i', %fs:(0x12) + + # need a gdt -- use lgdtl to force 32-bit operands, in case + # the GDT is located past 16 megabytes. + lgdtl real_save_gdt - wakeup_code + + movl real_save_cr0 - wakeup_code, %eax + movl %eax, %cr0 + jmp 1f +1: + movw $0x0e00 + 'n', %fs:(0x14) + + movl real_magic - wakeup_code, %eax + cmpl $0x12345678, %eax + jne bogus_real_magic + + testl $8, realmode_flags - wakeup_code + jz 1f + BEEP +1: + ljmpl $__KERNEL_CS, $wakeup_pmode_return + +real_save_gdt: .word 0 + .long 0 +real_save_cr0: .long 0 +real_save_cr3: .long 0 +real_save_cr4: .long 0 +real_magic: .long 0 +video_mode: .long 0 +realmode_flags: .long 0 +beep_flags: .long 0 +real_efer_save_restore: .long 0 +real_save_efer_edx: .long 0 +real_save_efer_eax: .long 0 + +bogus_real_magic: + movw $0x0e00 + 'B', %fs:(0x12) + jmp bogus_real_magic + +/* This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +# Setting of user mode (AX=mode ID) => CF=success + +# For now, we only handle VESA modes (0x0200..0x03ff). To handle other +# modes, we should probably compile in the video code from the boot +# directory. +mode_set: + movw %ax, %bx + subb $VIDEO_FIRST_VESA>>8, %bh + cmpb $2, %bh + jb check_vesa + +setbad: + clc + ret + +check_vesa: + orw $0x4000, %bx # Use linear frame buffer + movw $0x4f02, %ax # VESA BIOS mode set call + int $0x10 + cmpw $0x004f, %ax # AL=4f if implemented + jnz setbad # AH=0 if OK + + stc + ret + + .code32 + ALIGN + +.org 0x800 +wakeup_stack_begin: # Stack grows down + +.org 0xff0 # Just below end of page +wakeup_stack: +ENTRY(wakeup_end) + +.org 0x1000 + +wakeup_pmode_return: + movw $__KERNEL_DS, %ax + movw %ax, %ss + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + movw $0x0e00 + 'u', 0xb8016 + + # reload the gdt, as we need the full 32 bit address + lgdt saved_gdt + lidt saved_idt + lldt saved_ldt + ljmp $(__KERNEL_CS),$1f +1: + movl %cr3, %eax + movl %eax, %cr3 + wbinvd + + # and restore the stack ... but you need gdt for this to work + movl saved_context_esp, %esp + + movl %cs:saved_magic, %eax + cmpl $0x12345678, %eax + jne bogus_magic + + # jump to place where we left off + movl saved_eip,%eax + jmp *%eax + +bogus_magic: + movw $0x0e00 + 'B', 0xb8018 + jmp bogus_magic + + +## +# acpi_copy_wakeup_routine +# +# Copy the above routine to low memory. +# +# Parameters: +# %eax: place to copy wakeup routine to +# +# Returned address is location of code in low memory (past data and stack) +# +ENTRY(acpi_copy_wakeup_routine) + + pushl %ebx + sgdt saved_gdt + sidt saved_idt + sldt saved_ldt + str saved_tss + + movl nx_enabled, %edx + movl %edx, real_efer_save_restore - wakeup_start (%eax) + testl $1, real_efer_save_restore - wakeup_start (%eax) + jz 2f + # save efer setting + pushl %eax + movl %eax, %ebx + mov $0xc0000080, %ecx + rdmsr + movl %edx, real_save_efer_edx - wakeup_start (%ebx) + movl %eax, real_save_efer_eax - wakeup_start (%ebx) + popl %eax +2: + + movl %cr3, %edx + movl %edx, real_save_cr3 - wakeup_start (%eax) + movl %cr4, %edx + movl %edx, real_save_cr4 - wakeup_start (%eax) + movl %cr0, %edx + movl %edx, real_save_cr0 - wakeup_start (%eax) + sgdt real_save_gdt - wakeup_start (%eax) + + movl saved_videomode, %edx + movl %edx, video_mode - wakeup_start (%eax) + movl acpi_realmode_flags, %edx + movl %edx, realmode_flags - wakeup_start (%eax) + movl $0x12345678, real_magic - wakeup_start (%eax) + movl $0x12345678, saved_magic + popl %ebx + ret + +save_registers: + leal 4(%esp), %eax + movl %eax, saved_context_esp + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl ; popl saved_context_eflags + + movl $ret_point, saved_eip + ret + + +restore_registers: + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + pushl saved_context_eflags ; popfl + ret + +ENTRY(do_suspend_lowlevel) + call save_processor_state + call save_registers + pushl $3 + call acpi_enter_sleep_state + addl $4, %esp + +# In case of S3 failure, we'll emerge here. Jump +# to ret_point to recover + jmp ret_point + .p2align 4,,7 +ret_point: + call restore_registers + call restore_processor_state + ret + +.data +ALIGN +ENTRY(saved_magic) .long 0 +ENTRY(saved_eip) .long 0 + +# saved registers +saved_gdt: .long 0,0 +saved_idt: .long 0,0 +saved_ldt: .long 0 +saved_tss: .long 0 + -- cgit v1.2.3-70-g09d2 From c18db0d7e299791c73d4dbe5ae7905b2ab8ba332 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:25 +0200 Subject: i386: move kernel/cpu/mcheck Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/mcheck/Makefile | 2 - arch/i386/kernel/cpu/mcheck/k7.c | 102 ------------ arch/i386/kernel/cpu/mcheck/mce.c | 90 ----------- arch/i386/kernel/cpu/mcheck/mce.h | 14 -- arch/i386/kernel/cpu/mcheck/non-fatal.c | 91 ----------- arch/i386/kernel/cpu/mcheck/p4.c | 253 ------------------------------ arch/i386/kernel/cpu/mcheck/p5.c | 53 ------- arch/i386/kernel/cpu/mcheck/p6.c | 119 -------------- arch/i386/kernel/cpu/mcheck/therm_throt.c | 186 ---------------------- arch/i386/kernel/cpu/mcheck/winchip.c | 36 ----- arch/x86/kernel/cpu/mcheck/Makefile | 2 + arch/x86/kernel/cpu/mcheck/k7.c | 102 ++++++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 90 +++++++++++ arch/x86/kernel/cpu/mcheck/mce.h | 14 ++ arch/x86/kernel/cpu/mcheck/non-fatal.c | 91 +++++++++++ arch/x86/kernel/cpu/mcheck/p4.c | 253 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/p5.c | 53 +++++++ arch/x86/kernel/cpu/mcheck/p6.c | 119 ++++++++++++++ arch/x86/kernel/cpu/mcheck/therm_throt.c | 186 ++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/winchip.c | 36 +++++ arch/x86_64/kernel/Makefile_64 | 2 +- 22 files changed, 948 insertions(+), 948 deletions(-) delete mode 100644 arch/i386/kernel/cpu/mcheck/Makefile delete mode 100644 arch/i386/kernel/cpu/mcheck/k7.c delete mode 100644 arch/i386/kernel/cpu/mcheck/mce.c delete mode 100644 arch/i386/kernel/cpu/mcheck/mce.h delete mode 100644 arch/i386/kernel/cpu/mcheck/non-fatal.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p4.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p5.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p6.c delete mode 100644 arch/i386/kernel/cpu/mcheck/therm_throt.c delete mode 100644 arch/i386/kernel/cpu/mcheck/winchip.c create mode 100644 arch/x86/kernel/cpu/mcheck/Makefile create mode 100644 arch/x86/kernel/cpu/mcheck/k7.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce.h create mode 100644 arch/x86/kernel/cpu/mcheck/non-fatal.c create mode 100644 arch/x86/kernel/cpu/mcheck/p4.c create mode 100644 arch/x86/kernel/cpu/mcheck/p5.c create mode 100644 arch/x86/kernel/cpu/mcheck/p6.c create mode 100644 arch/x86/kernel/cpu/mcheck/therm_throt.c create mode 100644 arch/x86/kernel/cpu/mcheck/winchip.c (limited to 'arch/x86/kernel') diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 778396c78d6..09effc02e35 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -12,7 +12,7 @@ obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o obj-y += nexgen.o obj-y += umc.o -obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile deleted file mode 100644 index f1ebe1c1c17..00000000000 --- a/arch/i386/kernel/cpu/mcheck/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o -obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c deleted file mode 100644 index eef63e3630c..00000000000 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Athlon/Hammer specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine Check Handler For AMD Athlon/Duron */ -static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=1; i, Dave Jones - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -int mce_disabled = 0; -int nr_mce_banks; - -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* Handle unconfigured int18 (should never happen) */ -static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled==1) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86==5) - intel_p5_mcheck_init(c); - if (c->x86==6) - intel_p6_mcheck_init(c); - if (c->x86==15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86==5) - winchip_mcheck_init(c); - break; - - default: - break; - } -} - -static unsigned long old_cr4 __initdata; - -void __init stop_mce(void) -{ - old_cr4 = read_cr4(); - clear_in_cr4(X86_CR4_MCE); -} - -void __init restart_mce(void) -{ - if (old_cr4 & X86_CR4_MCE) - set_in_cr4(X86_CR4_MCE); -} - -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} - -static int __init mcheck_enable(char *str) -{ - mce_disabled = -1; - return 1; -} - -__setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 81fb6e2d35f..00000000000 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include - -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); - -/* Call the installed machine check handler for this CPU setup. */ -extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); - -extern int nr_mce_banks; - diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index bf39409b383..00000000000 --- a/arch/i386/kernel/cpu/mcheck/non-fatal.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Non Fatal Machine Check Exception Reporting - * - * (C) Copyright 2002 Dave Jones. - * - * This file contains routines to check for non-fatal MCEs every 15s - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -static int firstbank; - -#define MCE_RATE 15*HZ /* timer rate is 15s */ - -static void mce_checkregs (void *info) -{ - u32 low, high; - int i; - - for (i=firstbank; i -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include "mce.h" - -/* as supported by the P4/Xeon family */ -struct intel_mce_extended_msrs { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 esp; - u32 eflags; - u32 eip; - /* u32 *reserved[]; */ -}; - -static int mce_num_extended_msrs = 0; - - -#ifdef CONFIG_X86_MCE_P4THERMAL -static void unexpected_thermal_interrupt(struct pt_regs *regs) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler */ -static void intel_thermal_interrupt(struct pt_regs *regs) -{ - __u64 msr_val; - - ack_APIC_irq(); - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & 0x1); -} - -/* Thermal interrupt handler for this CPU setup */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; - -fastcall void smp_thermal_interrupt(struct pt_regs *regs) -{ - irq_enter(); - vendor_thermal_interrupt(regs); - irq_exit(); -} - -/* P4/Xeon Thermal regulation detect and init */ -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - - /* Thermal monitoring */ - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ - - /* Clock modulation */ - if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ - - /* first check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already -zwanem. - */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", - cpu); - return; /* -EBUSY */ - } - - /* check whether a vector already exists, temporarily masked? */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; /* -EBUSY */ - } - - /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ - apic_write_around(APIC_LVTTHMR, h); - - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); - - /* ok we're good to go... */ - vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); - return; -} -#endif /* CONFIG_X86_MCE_P4THERMAL */ - - -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) -{ - u32 h; - - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); -} - -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (mce_num_extended_msrs > 0) { - struct intel_mce_extended_msrs dbg; - intel_get_extended_msrs(&dbg); - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i=0; i> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" - " available\n", - smp_processor_id(), mce_num_extended_msrs); - -#ifdef CONFIG_X86_MCE_P4THERMAL - /* Check for P4/Xeon Thermal monitor */ - intel_init_thermal(c); -#endif - } -} diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c deleted file mode 100644 index 94bc43d950c..00000000000 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * P5 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine check handler for Pentium class Intel */ -static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) -{ - u32 loaddr, hi, lotype; - rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); - rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); - if(lotype&(1<<5)) - printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting for processors with Intel style MCE */ -void intel_p5_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /*Check for MCE support */ - if( !cpu_has(c, X86_FEATURE_MCE) ) - return; - - /* Default P5 to off as its often misconnected */ - if(mce_disabled != -1) - return; - machine_check_vector = pentium_machine_check; - wmb(); - - /* Read registers before enabling */ - rdmsr(MSR_IA32_P5_MC_ADDR, l, h); - rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO "Intel old style machine check architecture supported.\n"); - - /* Enable MCE */ - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); -} diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c deleted file mode 100644 index deeae42ce19..00000000000 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * P6 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine Check Handler For PII/PIII */ -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=0; i -#include -#include -#include -#include -#include -#include - -/* How long to wait between reporting thermal events */ -#define CHECK_INTERVAL (300 * HZ) - -static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; -static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); - -#ifdef CONFIG_SYSFS -#define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) - -#define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - char *buf) \ -{ \ - unsigned int cpu = dev->id; \ - ssize_t ret; \ - \ - preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) \ - ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ - else \ - ret = 0; \ - preempt_enable(); \ - \ - return ret; \ -} - -define_therm_throt_sysdev_show_func(count); -define_therm_throt_sysdev_one_ro(count); - -static struct attribute *thermal_throttle_attrs[] = { - &attr_count.attr, - NULL -}; - -static struct attribute_group thermal_throttle_attr_group = { - .attrs = thermal_throttle_attrs, - .name = "thermal_throttle" -}; -#endif /* CONFIG_SYSFS */ - -/*** - * therm_throt_process - Process thermal throttling event from interrupt - * @curr: Whether the condition is current or not (boolean), since the - * thermal interrupt normally gets called both when the thermal - * event begins and once the event has ended. - * - * This function is called by the thermal interrupt after the - * IRQ has been acknowledged. - * - * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. - */ -int therm_throt_process(int curr) -{ - unsigned int cpu = smp_processor_id(); - __u64 tmp_jiffs = get_jiffies_64(); - - if (curr) - __get_cpu_var(thermal_throttle_count)++; - - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) - return 0; - - __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; - - /* if we just entered the thermal event */ - if (curr) { - printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); - - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); - } - - return 1; -} - -#ifdef CONFIG_SYSFS -/* Add/Remove thermal_throttle interface for CPU device */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) -{ - return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); -} - -static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) -{ - return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); -} - -/* Mutex protecting device creation against CPU hotplug */ -static DEFINE_MUTEX(therm_cpu_lock); - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - int err; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev); - mutex_unlock(&therm_cpu_lock); - WARN_ON(err); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - mutex_lock(&therm_cpu_lock); - thermal_throttle_remove_dev(sys_dev); - mutex_unlock(&therm_cpu_lock); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block thermal_throttle_cpu_notifier = -{ - .notifier_call = thermal_throttle_cpu_callback, -}; - -static __init int thermal_throttle_init_device(void) -{ - unsigned int cpu = 0; - int err; - - if (!atomic_read(&therm_throt_en)) - return 0; - - register_hotcpu_notifier(&thermal_throttle_cpu_notifier); - -#ifdef CONFIG_HOTPLUG_CPU - mutex_lock(&therm_cpu_lock); -#endif - /* connect live CPUs to sysfs */ - for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); - WARN_ON(err); - } -#ifdef CONFIG_HOTPLUG_CPU - mutex_unlock(&therm_cpu_lock); -#endif - - return 0; -} - -device_initcall(thermal_throttle_init_device); -#endif /* CONFIG_SYSFS */ diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c deleted file mode 100644 index 9e424b6c293..00000000000 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * IDT Winchip specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine check handler for WinChip C6 */ -static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) -{ - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting on the Winchip C6 series */ -void winchip_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - machine_check_vector = winchip_machine_check; - wmb(); - rdmsr(MSR_IDT_FCR1, lo, hi); - lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ - lo&= ~(1<<4); /* Enable MCE */ - wrmsr(MSR_IDT_FCR1, lo, hi); - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); -} diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile new file mode 100644 index 00000000000..f1ebe1c1c17 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -0,0 +1,2 @@ +obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o +obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c new file mode 100644 index 00000000000..eef63e3630c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -0,0 +1,102 @@ +/* + * Athlon/Hammer specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For AMD Athlon/Duron */ +static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=1; i, Dave Jones + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +int mce_disabled = 0; +int nr_mce_banks; + +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled==1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86==5) + intel_p5_mcheck_init(c); + if (c->x86==6) + intel_p6_mcheck_init(c); + if (c->x86==15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86==5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h new file mode 100644 index 00000000000..81fb6e2d35f --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -0,0 +1,14 @@ +#include +#include + +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); + +/* Call the installed machine check handler for this CPU setup. */ +extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); + +extern int nr_mce_banks; + diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c new file mode 100644 index 00000000000..bf39409b383 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -0,0 +1,91 @@ +/* + * Non Fatal Machine Check Exception Reporting + * + * (C) Copyright 2002 Dave Jones. + * + * This file contains routines to check for non-fatal MCEs every 15s + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +static int firstbank; + +#define MCE_RATE 15*HZ /* timer rate is 15s */ + +static void mce_checkregs (void *info) +{ + u32 low, high; + int i; + + for (i=firstbank; i +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "mce.h" + +/* as supported by the P4/Xeon family */ +struct intel_mce_extended_msrs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; + u32 esp; + u32 eflags; + u32 eip; + /* u32 *reserved[]; */ +}; + +static int mce_num_extended_msrs = 0; + + +#ifdef CONFIG_X86_MCE_P4THERMAL +static void unexpected_thermal_interrupt(struct pt_regs *regs) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct pt_regs *regs) +{ + __u64 msr_val; + + ack_APIC_irq(); + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + therm_throt_process(msr_val & 0x1); +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct pt_regs *regs) +{ + irq_enter(); + vendor_thermal_interrupt(regs); + irq_exit(); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", + cpu); + return; /* -EBUSY */ + } + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + +/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ +static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +{ + u32 h; + + rdmsr (MSR_IA32_MCG_EAX, r->eax, h); + rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr (MSR_IA32_MCG_EDX, r->edx, h); + rdmsr (MSR_IA32_MCG_ESI, r->esi, h); + rdmsr (MSR_IA32_MCG_EDI, r->edi, h); + rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr (MSR_IA32_MCG_ESP, r->esp, h); + rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr (MSR_IA32_MCG_EIP, r->eip, h); +} + +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + if (mce_num_extended_msrs > 0) { + struct intel_mce_extended_msrs dbg; + intel_get_extended_msrs(&dbg); + printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags); + printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); + printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + dbg.esi, dbg.edi, dbg.ebp, dbg.esp); + } + + for (i=0; i> 16) & 0xff; + printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + " available\n", + smp_processor_id(), mce_num_extended_msrs); + +#ifdef CONFIG_X86_MCE_P4THERMAL + /* Check for P4/Xeon Thermal monitor */ + intel_init_thermal(c); +#endif + } +} diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c new file mode 100644 index 00000000000..94bc43d950c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -0,0 +1,53 @@ +/* + * P5 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for Pentium class Intel */ +static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) +{ + u32 loaddr, hi, lotype; + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); + rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); + printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); + if(lotype&(1<<5)) + printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting for processors with Intel style MCE */ +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /*Check for MCE support */ + if( !cpu_has(c, X86_FEATURE_MCE) ) + return; + + /* Default P5 to off as its often misconnected */ + if(mce_disabled != -1) + return; + machine_check_vector = pentium_machine_check; + wmb(); + + /* Read registers before enabling */ + rdmsr(MSR_IA32_P5_MC_ADDR, l, h); + rdmsr(MSR_IA32_P5_MC_TYPE, l, h); + printk(KERN_INFO "Intel old style machine check architecture supported.\n"); + + /* Enable MCE */ + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); +} diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c new file mode 100644 index 00000000000..deeae42ce19 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -0,0 +1,119 @@ +/* + * P6 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For PII/PIII */ +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; i +#include +#include +#include +#include +#include +#include + +/* How long to wait between reporting thermal events */ +#define CHECK_INTERVAL (300 * HZ) + +static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +atomic_t therm_throt_en = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ +} + +define_therm_throt_sysdev_show_func(count); +define_therm_throt_sysdev_one_ro(count); + +static struct attribute *thermal_throttle_attrs[] = { + &attr_count.attr, + NULL +}; + +static struct attribute_group thermal_throttle_attr_group = { + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" +}; +#endif /* CONFIG_SYSFS */ + +/*** + * therm_throt_process - Process thermal throttling event from interrupt + * @curr: Whether the condition is current or not (boolean), since the + * thermal interrupt normally gets called both when the thermal + * event begins and once the event has ended. + * + * This function is called by the thermal interrupt after the + * IRQ has been acknowledged. + * + * It will take care of rate limiting and printing messages to the syslog. + * + * Returns: 0 : Event should NOT be further logged, i.e. still in + * "timeout" from previous log message. + * 1 : Event should be logged further, and a message has been + * printed to the syslog. + */ +int therm_throt_process(int curr) +{ + unsigned int cpu = smp_processor_id(); + __u64 tmp_jiffs = get_jiffies_64(); + + if (curr) + __get_cpu_var(thermal_throttle_count)++; + + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + return 0; + + __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + + /* if we just entered the thermal event */ + if (curr) { + printk(KERN_CRIT "CPU%d: Temperature above threshold, " + "cpu clock throttled (total events = %lu)\n", cpu, + __get_cpu_var(thermal_throttle_count)); + + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } + + return 1; +} + +#ifdef CONFIG_SYSFS +/* Add/Remove thermal_throttle interface for CPU device */ +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) +{ + return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +/* Mutex protecting device creation against CPU hotplug */ +static DEFINE_MUTEX(therm_cpu_lock); + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + int err; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mutex_lock(&therm_cpu_lock); + err = thermal_throttle_add_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + WARN_ON(err); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + mutex_lock(&therm_cpu_lock); + thermal_throttle_remove_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block thermal_throttle_cpu_notifier = +{ + .notifier_call = thermal_throttle_cpu_callback, +}; + +static __init int thermal_throttle_init_device(void) +{ + unsigned int cpu = 0; + int err; + + if (!atomic_read(&therm_throt_en)) + return 0; + + register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&therm_cpu_lock); +#endif + /* connect live CPUs to sysfs */ + for_each_online_cpu(cpu) { + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + WARN_ON(err); + } +#ifdef CONFIG_HOTPLUG_CPU + mutex_unlock(&therm_cpu_lock); +#endif + + return 0; +} + +device_initcall(thermal_throttle_init_device); +#endif /* CONFIG_SYSFS */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c new file mode 100644 index 00000000000..9e424b6c293 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -0,0 +1,36 @@ +/* + * IDT Winchip specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for WinChip C6 */ +static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting on the Winchip C6 series */ +void winchip_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + machine_check_vector = winchip_machine_check; + wmb(); + rdmsr(MSR_IDT_FCR1, lo, hi); + lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ + lo&= ~(1<<4); /* Enable MCE */ + wrmsr(MSR_IDT_FCR1, lo, hi); + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); +} diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index 8253617af12..1c9de796fa1 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -48,7 +48,7 @@ obj-y += pcspeaker.o CFLAGS_vsyscall_64.o := $(PROFILING) -g0 -therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o +therm_throt-y += ../../x86/kernel/cpu/mcheck/therm_throt.o bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o topology-y += ../../i386/kernel/topology.o -- cgit v1.2.3-70-g09d2 From ee580dc91efd83e6b55955e7261e8ad2a0e08d1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:27 +0200 Subject: i386: move kernel/cpu/cpufreq Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/cpufreq/Kconfig | 250 ---- arch/i386/kernel/cpu/cpufreq/Makefile | 16 - arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 799 ------------ arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c | 441 ------- arch/i386/kernel/cpu/cpufreq/e_powersaver.c | 334 ----- arch/i386/kernel/cpu/cpufreq/elanfreq.c | 309 ----- arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | 495 -------- arch/i386/kernel/cpu/cpufreq/longhaul.c | 1024 ---------------- arch/i386/kernel/cpu/cpufreq/longhaul.h | 353 ------ arch/i386/kernel/cpu/cpufreq/longrun.c | 325 ----- arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 316 ----- arch/i386/kernel/cpu/cpufreq/powernow-k6.c | 256 ---- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 703 ----------- arch/i386/kernel/cpu/cpufreq/powernow-k7.h | 44 - arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 1363 --------------------- arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 232 ---- arch/i386/kernel/cpu/cpufreq/sc520_freq.c | 191 --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 634 ---------- arch/i386/kernel/cpu/cpufreq/speedstep-ich.c | 440 ------- arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | 444 ------- arch/i386/kernel/cpu/cpufreq/speedstep-lib.h | 49 - arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 424 ------- arch/x86/kernel/cpu/cpufreq/Kconfig | 250 ++++ arch/x86/kernel/cpu/cpufreq/Makefile | 16 + arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 799 ++++++++++++ arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 441 +++++++ arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 334 +++++ arch/x86/kernel/cpu/cpufreq/elanfreq.c | 309 +++++ arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | 495 ++++++++ arch/x86/kernel/cpu/cpufreq/longhaul.c | 1024 ++++++++++++++++ arch/x86/kernel/cpu/cpufreq/longhaul.h | 353 ++++++ arch/x86/kernel/cpu/cpufreq/longrun.c | 325 +++++ arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 316 +++++ arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 256 ++++ arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 703 +++++++++++ arch/x86/kernel/cpu/cpufreq/powernow-k7.h | 44 + arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 1363 +++++++++++++++++++++ arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 232 ++++ arch/x86/kernel/cpu/cpufreq/sc520_freq.c | 191 +++ arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 634 ++++++++++ arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 440 +++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 444 +++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 49 + arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 424 +++++++ arch/x86_64/kernel/Makefile_64 | 2 +- 47 files changed, 9445 insertions(+), 9445 deletions(-) delete mode 100644 arch/i386/kernel/cpu/cpufreq/Kconfig delete mode 100644 arch/i386/kernel/cpu/cpufreq/Makefile delete mode 100644 arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/e_powersaver.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/elanfreq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/gx-suspmod.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/longhaul.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/longhaul.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/longrun.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/p4-clockmod.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k6.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k7.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k7.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k8.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k8.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/sc520_freq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-ich.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-lib.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-lib.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-smi.c create mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig create mode 100644 arch/x86/kernel/cpu/cpufreq/Makefile create mode 100644 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c create mode 100644 arch/x86/kernel/cpu/cpufreq/e_powersaver.c create mode 100644 arch/x86/kernel/cpu/cpufreq/elanfreq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c create mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.c create mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.h create mode 100644 arch/x86/kernel/cpu/cpufreq/longrun.c create mode 100644 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k6.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.h create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.h create mode 100644 arch/x86/kernel/cpu/cpufreq/sc520_freq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.h create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c (limited to 'arch/x86/kernel') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 97b64d7d6bf..fc86d41d252 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1067,7 +1067,7 @@ config APM_REAL_MODE_POWER_OFF endif # APM -source "arch/i386/kernel/cpu/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig" endmenu diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 09effc02e35..8d9ce0232ad 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -15,6 +15,6 @@ obj-y += umc.o obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig deleted file mode 100644 index d8c6f132dc7..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ /dev/null @@ -1,250 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - For details, take a look at . - - If in doubt, say N. - -config ELAN_CPUFREQ - tristate "AMD Elan SC400 and SC410" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC400 and SC410 - processors. - - You need to specify the processor maximum speed as boot - parameter: elanfreq=maxspeed (in kHz) or as module - parameter "max_freq". - - For details, take a look at . - - If in doubt, say N. - -config SC520_CPUFREQ - tristate "AMD Elan SC520" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC520 processor. - - For details, take a look at . - - If in doubt, say N. - - -config X86_POWERNOW_K6 - tristate "AMD Mobile K6-2/K6-3 PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K6-2+ and mobile - AMD K6-3+ processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7 - tristate "AMD Mobile Athlon/Duron PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K7 mobile processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7_ACPI - bool - depends on X86_POWERNOW_K7 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) - default y - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool "ACPI Support" - select ACPI_PROCESSOR - depends on ACPI && X86_POWERNOW_K8 - default y - help - This provides access to the K8s Processor Performance States via ACPI. - This driver is probably required for CPUFreq to work with multi-socket and - SMP systems. It is not required on at least some single-socket yet - multi-core systems, even if SMP is enabled. - - It is safe to say Y here. - -config X86_GX_SUSPMOD - tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI - help - This add the CPUFreq driver for NatSemi Geode processors which - support suspend modulation. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" - select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE - help - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, - you also need to say Y to "Use ACPI tables to decode..." below - [which might imply enabling ACPI] if you want to use this driver - on non-Banias CPUs. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO_TABLE - bool "Built-in tables for Banias CPUs" - depends on X86_SPEEDSTEP_CENTRINO - default y - help - Use built-in tables for Banias CPUs if ACPI encoding - is not available. - - If in doubt, say N. - -config X86_SPEEDSTEP_ICH - tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, - ICH3 or ICH4 southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_SMI - tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) - on systems which have an Intel 440BX/ZX/MX southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_CPUFREQ_NFORCE2 - tristate "nVidia nForce2 FSB changing" - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for FSB changing on nVidia nForce2 - platforms. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGRUN - tristate "Transmeta LongRun" - help - This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors - which support LongRun. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGHAUL - tristate "VIA Cyrix III Longhaul" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for VIA C7 processors. - - If in doubt, say N. - -comment "shared options" - -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - -config X86_SPEEDSTEP_LIB - tristate - default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD - -config X86_SPEEDSTEP_RELAXED_CAP_CHECK - bool "Relaxed speedstep capability checks" - depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) - help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This - option lets the probing code bypass some of those checks if the - parameter "relaxed_check=1" is passed to the module. - -endif # CPU_FREQ - -endmenu diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile deleted file mode 100644 index 560f7760dae..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o -obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o -obj-$(CONFIG_X86_LONGHAUL) += longhaul.o -obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o -obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o -obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o -obj-$(CONFIG_X86_LONGRUN) += longrun.o -obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o -obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o -obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o -obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o -obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o -obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c deleted file mode 100644 index 705e13a3078..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ /dev/null @@ -1,799 +0,0 @@ -/* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) - * - * Copyright (C) 2001, 2002 Andy Grover - * Copyright (C) 2001, 2002 Paul Diefenbaugh - * Copyright (C) 2002 - 2004 Dominik Brodowski - * Copyright (C) 2006 Denis Sadykov - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) - -MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); -MODULE_DESCRIPTION("ACPI Processor P-States Driver"); -MODULE_LICENSE("GPL"); - -enum { - UNDEFINED_CAPABLE = 0, - SYSTEM_INTEL_MSR_CAPABLE, - SYSTEM_IO_CAPABLE, -}; - -#define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) - -struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; - unsigned int resume; - unsigned int cpu_feature; -}; - -static struct acpi_cpufreq_data *drv_data[NR_CPUS]; -/* acpi_perf_data is a pointer to percpu data. */ -static struct acpi_processor_performance *acpi_perf_data; - -static struct cpufreq_driver acpi_cpufreq_driver; - -static unsigned int acpi_pstate_strict; - -static int check_est_cpu(unsigned int cpuid) -{ - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; - - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; -} - -static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) -{ - struct acpi_processor_performance *perf; - int i; - - perf = data->acpi_data; - - for (i=0; istate_count; i++) { - if (value == perf->states[i].status) - return data->freq_table[i].frequency; - } - return 0; -} - -static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) -{ - int i; - struct acpi_processor_performance *perf; - - msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; - - for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == perf->states[data->freq_table[i].index].status) - return data->freq_table[i].frequency; - } - return data->freq_table[0].frequency; -} - -static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) -{ - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - return extract_msr(val, data); - case SYSTEM_IO_CAPABLE: - return extract_io(val, data); - default: - return 0; - } -} - -struct msr_addr { - u32 reg; -}; - -struct io_addr { - u16 port; - u8 bit_width; -}; - -typedef union { - struct msr_addr msr; - struct io_addr io; -} drv_addr_union; - -struct drv_cmd { - unsigned int type; - cpumask_t mask; - drv_addr_union addr; - u32 val; -}; - -static void do_drv_read(struct drv_cmd *cmd) -{ - u32 h; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, - &cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -static void do_drv_write(struct drv_cmd *cmd) -{ - u32 lo, hi; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, lo, hi); - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); - wrmsr(cmd->addr.msr.reg, lo, hi); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, - cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -static void drv_read(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - cmd->val = 0; - - set_cpus_allowed(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed(current, saved_mask); -} - -static void drv_write(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - unsigned int i; - - for_each_cpu_mask(i, cmd->mask) { - set_cpus_allowed(current, cpumask_of_cpu(i)); - do_drv_write(cmd); - } - - set_cpus_allowed(current, saved_mask); - return; -} - -static u32 get_cur_val(cpumask_t mask) -{ - struct acpi_processor_performance *perf; - struct drv_cmd cmd; - - if (unlikely(cpus_empty(mask))) - return 0; - - switch (drv_data[first_cpu(mask)]->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = drv_data[first_cpu(mask)]->acpi_data; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; - } - - cmd.mask = mask; - - drv_read(&cmd); - - dprintk("get_cur_val = %u\n", cmd.val); - - return cmd.val; -} - -/* - * Return the measured active (C0) frequency on this CPU since last call - * to this function. - * Input: cpu number - * Return: Average CPU frequency in terms of max frequency (zero on error) - * - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance - * over a period of time, while CPU is in C0 state. - * IA32_MPERF counts at the rate of max advertised frequency - * IA32_APERF counts at the rate of actual CPU frequency - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and - * no meaning should be associated with absolute values of these MSRs. - */ -static unsigned int get_measured_perf(unsigned int cpu) -{ - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf_cur, mperf_cur; - - cpumask_t saved_mask; - unsigned int perf_percent; - unsigned int retval; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (get_cpu() != cpu) { - /* We were not able to run on requested processor */ - put_cpu(); - return 0; - } - - rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0,0); - wrmsr(MSR_IA32_MPERF, 0,0); - -#ifdef __i386__ - /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. - */ - if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { - int shift_count; - u32 h; - - h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); - shift_count = fls(h); - - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { - int shift_count = 7; - aperf_cur.split.lo >>= shift_count; - mperf_cur.split.lo >>= shift_count; - } - - if (aperf_cur.split.lo && mperf_cur.split.lo) - perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; - else - perf_percent = 0; - -#else - if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { - int shift_count = 7; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (aperf_cur.whole && mperf_cur.whole) - perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; - else - perf_percent = 0; - -#endif - - retval = drv_data[cpu]->max_freq * perf_percent / 100; - - put_cpu(); - set_cpus_allowed(current, saved_mask); - - dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); - return retval; -} - -static unsigned int get_cur_freq_on_cpu(unsigned int cpu) -{ - struct acpi_cpufreq_data *data = drv_data[cpu]; - unsigned int freq; - - dprintk("get_cur_freq_on_cpu (%d)\n", cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return 0; - } - - freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); - dprintk("cur freq = %u\n", freq); - - return freq; -} - -static unsigned int check_freqs(cpumask_t mask, unsigned int freq, - struct acpi_cpufreq_data *data) -{ - unsigned int cur_freq; - unsigned int i; - - for (i=0; i<100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); - if (cur_freq == freq) - return 1; - udelay(10); - } - return 0; -} - -static int acpi_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - struct acpi_processor_performance *perf; - struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; - struct drv_cmd cmd; - unsigned int next_state = 0; /* Index into freq_table */ - unsigned int next_perf_state = 0; /* Index into perf table */ - unsigned int i; - int result = 0; - - dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return -ENODEV; - } - - perf = data->acpi_data; - result = cpufreq_frequency_table_target(policy, - data->freq_table, - target_freq, - relation, &next_state); - if (unlikely(result)) - return -ENODEV; - -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); -#else - online_policy_cpus = policy->cpus; -#endif - - next_perf_state = data->freq_table[next_state].index; - if (perf->state == next_perf_state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", - next_perf_state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", - next_perf_state); - return 0; - } - } - - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - return -ENODEV; - } - - cpus_clear(cmd.mask); - - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = online_policy_cpus; - else - cpu_set(policy->cpu, cmd.mask); - - freqs.old = perf->states[perf->state].core_frequency * 1000; - freqs.new = data->freq_table[next_state].frequency; - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - drv_write(&cmd); - - if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { - dprintk("acpi_cpufreq_target failed (%d)\n", - policy->cpu); - return -EAGAIN; - } - } - - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - perf->state = next_perf_state; - - return result; -} - -static int acpi_cpufreq_verify(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_verify\n"); - - return cpufreq_frequency_table_verify(policy, data->freq_table); -} - -static unsigned long -acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) -{ - struct acpi_processor_performance *perf = data->acpi_data; - - if (cpu_khz) { - /* search the closest match to cpu_khz */ - unsigned int i; - unsigned long freq; - unsigned long freqn = perf->states[0].core_frequency * 1000; - - for (i=0; i<(perf->state_count-1); i++) { - freq = freqn; - freqn = perf->states[i+1].core_frequency * 1000; - if ((2 * cpu_khz) > (freqn + freq)) { - perf->state = i; - return freq; - } - } - perf->state = perf->state_count-1; - return freqn; - } else { - /* assume CPU is at P0... */ - perf->state = 0; - return perf->states[0].core_frequency * 1000; - } -} - -/* - * acpi_cpufreq_early_init - initialize ACPI P-States library - * - * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) - * in order to determine correct frequency and voltage pairings. We can - * do _PDC and _PSD and find out the processor dependency for the - * actual init that will happen later... - */ -static int __init acpi_cpufreq_early_init(void) -{ - dprintk("acpi_cpufreq_early_init\n"); - - acpi_perf_data = alloc_percpu(struct acpi_processor_performance); - if (!acpi_perf_data) { - dprintk("Memory allocation error for acpi_perf_data.\n"); - return -ENOMEM; - } - - /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; -} - -#ifdef CONFIG_SMP -/* - * Some BIOSes do SW_ANY coordination internally, either set it up in hw - * or do it in BIOS firmware and won't inform about it to OS. If not - * detected, this has a side effect of making CPU run at a different speed - * than OS intended it to run at. Detect it and handle it cleanly. - */ -static int bios_with_sw_any_bug; - -static int sw_any_bug_found(struct dmi_system_id *d) -{ - bios_with_sw_any_bug = 1; - return 0; -} - -static struct dmi_system_id sw_any_bug_dmi_table[] = { - { - .callback = sw_any_bug_found, - .ident = "Supermicro Server X6DLP", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), - DMI_MATCH(DMI_BIOS_VERSION, "080010"), - DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), - }, - }, - { } -}; -#endif - -static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; - struct acpi_cpufreq_data *data; - unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - struct acpi_processor_performance *perf; - - dprintk("acpi_cpufreq_cpu_init\n"); - - data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->acpi_data = percpu_ptr(acpi_perf_data, cpu); - drv_data[cpu] = data; - - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) - acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - - result = acpi_processor_register_performance(data->acpi_data, cpu); - if (result) - goto err_free; - - perf = data->acpi_data; - policy->shared_type = perf->shared_type; - - /* - * Will let policy->cpus know about dependency only when software - * coordination is required. - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = perf->shared_cpu_map; - } - -#ifdef CONFIG_SMP - dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; - } -#endif - - /* capability check */ - if (perf->state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if (perf->control_register.space_id != perf->status_register.space_id) { - result = -ENODEV; - goto err_unreg; - } - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - dprintk("SYSTEM IO addr space\n"); - data->cpu_feature = SYSTEM_IO_CAPABLE; - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - dprintk("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; - } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; - default: - dprintk("Unknown addr space %d\n", - (u32) (perf->control_register.space_id)); - result = -ENODEV; - goto err_unreg; - } - - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * - (perf->state_count+1), GFP_KERNEL); - if (!data->freq_table) { - result = -ENOMEM; - goto err_unreg; - } - - /* detect transition latency */ - policy->cpuinfo.transition_latency = 0; - for (i=0; istate_count; i++) { - if ((perf->states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = - perf->states[i].transition_latency * 1000; - } - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - data->max_freq = perf->states[0].core_frequency * 1000; - /* table init */ - for (i=0; istate_count; i++) { - if (i>0 && perf->states[i].core_frequency >= - data->freq_table[valid_states-1].frequency / 1000) - continue; - - data->freq_table[valid_states].index = i; - data->freq_table[valid_states].frequency = - perf->states[i].core_frequency * 1000; - valid_states++; - } - data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; - perf->state = 0; - - result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) - goto err_freqfree; - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - /* Current speed is unknown and not detectable by IO port */ - policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - acpi_cpufreq_driver.get = get_cur_freq_on_cpu; - policy->cur = get_cur_freq_on_cpu(cpu); - break; - default: - break; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - /* Check for APERF/MPERF support in hardware */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { - unsigned int ecx; - ecx = cpuid_ecx(6); - if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; - } - - dprintk("CPU%u - ACPI performance management activated.\n", cpu); - for (i = 0; i < perf->state_count; i++) - dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == perf->state ? '*' : ' '), i, - (u32) perf->states[i].core_frequency, - (u32) perf->states[i].power, - (u32) perf->states[i].transition_latency); - - cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - - /* - * the first call to ->target() should result in us actually - * writing something to the appropriate registers. - */ - data->resume = 1; - - return result; - -err_freqfree: - kfree(data->freq_table); -err_unreg: - acpi_processor_unregister_performance(perf, cpu); -err_free: - kfree(data); - drv_data[cpu] = NULL; - - return result; -} - -static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_cpu_exit\n"); - - if (data) { - cpufreq_frequency_table_put_attr(policy->cpu); - drv_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); - kfree(data); - } - - return 0; -} - -static int acpi_cpufreq_resume(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_resume\n"); - - data->resume = 1; - - return 0; -} - -static struct freq_attr *acpi_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, -}; - -static int __init acpi_cpufreq_init(void) -{ - int ret; - - dprintk("acpi_cpufreq_init\n"); - - ret = acpi_cpufreq_early_init(); - if (ret) - return ret; - - return cpufreq_register_driver(&acpi_cpufreq_driver); -} - -static void __exit acpi_cpufreq_exit(void) -{ - dprintk("acpi_cpufreq_exit\n"); - - cpufreq_unregister_driver(&acpi_cpufreq_driver); - - free_percpu(acpi_perf_data); - - return; -} - -module_param(acpi_pstate_strict, uint, 0644); -MODULE_PARM_DESC(acpi_pstate_strict, - "value 0 or non-zero. non-zero -> strict ACPI checks are " - "performed during frequency changes."); - -late_initcall(acpi_cpufreq_init); -module_exit(acpi_cpufreq_exit); - -MODULE_ALIAS("acpi"); diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c deleted file mode 100644 index 66acd503991..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * (C) 2004-2006 Sebastian Witt - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include -#include - -#define NFORCE2_XTAL 25 -#define NFORCE2_BOOTFSB 0x48 -#define NFORCE2_PLLENABLE 0xa8 -#define NFORCE2_PLLREG 0xa4 -#define NFORCE2_PLLADR 0xa0 -#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) - -#define NFORCE2_MIN_FSB 50 -#define NFORCE2_SAFE_DISTANCE 50 - -/* Delay in ms between FSB changes */ -//#define NFORCE2_DELAY 10 - -/* nforce2_chipset: - * FSB is changed using the chipset - */ -static struct pci_dev *nforce2_chipset_dev; - -/* fid: - * multiplier * 10 - */ -static int fid = 0; - -/* min_fsb, max_fsb: - * minimum and maximum FSB (= FSB at boot time) - */ -static int min_fsb = 0; -static int max_fsb = 0; - -MODULE_AUTHOR("Sebastian Witt "); -MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); -MODULE_LICENSE("GPL"); - -module_param(fid, int, 0444); -module_param(min_fsb, int, 0444); - -MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); -MODULE_PARM_DESC(min_fsb, - "Minimum FSB to use, if not defined: current FSB - 50"); - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) - -/** - * nforce2_calc_fsb - calculate FSB - * @pll: PLL value - * - * Calculates FSB from PLL value - */ -static int nforce2_calc_fsb(int pll) -{ - unsigned char mul, div; - - mul = (pll >> 8) & 0xff; - div = pll & 0xff; - - if (div > 0) - return NFORCE2_XTAL * mul / div; - - return 0; -} - -/** - * nforce2_calc_pll - calculate PLL value - * @fsb: FSB - * - * Calculate PLL value for given FSB - */ -static int nforce2_calc_pll(unsigned int fsb) -{ - unsigned char xmul, xdiv; - unsigned char mul = 0, div = 0; - int tried = 0; - - /* Try to calculate multiplier and divider up to 4 times */ - while (((mul == 0) || (div == 0)) && (tried <= 3)) { - for (xdiv = 2; xdiv <= 0x80; xdiv++) - for (xmul = 1; xmul <= 0xfe; xmul++) - if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == - fsb + tried) { - mul = xmul; - div = xdiv; - } - tried++; - } - - if ((mul == 0) || (div == 0)) - return -1; - - return NFORCE2_PLL(mul, div); -} - -/** - * nforce2_write_pll - write PLL value to chipset - * @pll: PLL value - * - * Writes new FSB PLL value to chipset - */ -static void nforce2_write_pll(int pll) -{ - int temp; - - /* Set the pll addr. to 0x00 */ - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); - - /* Now write the value in all 64 registers */ - for (temp = 0; temp <= 0x3f; temp++) - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); - - return; -} - -/** - * nforce2_fsb_read - Read FSB - * - * Read FSB from chipset - * If bootfsb != 0, return FSB at boot-time - */ -static unsigned int nforce2_fsb_read(int bootfsb) -{ - struct pci_dev *nforce2_sub5; - u32 fsb, temp = 0; - - /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ - nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); - if (!nforce2_sub5) - return 0; - - pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); - fsb /= 1000000; - - /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); - - if(bootfsb || !temp) - return fsb; - - /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); - fsb = nforce2_calc_fsb(temp); - - return fsb; -} - -/** - * nforce2_set_fsb - set new FSB - * @fsb: New FSB - * - * Sets new FSB - */ -static int nforce2_set_fsb(unsigned int fsb) -{ - u32 temp = 0; - unsigned int tfsb; - int diff; - int pll = 0; - - if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); - return -EINVAL; - } - - tfsb = nforce2_fsb_read(0); - if (!tfsb) { - printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); - return -EINVAL; - } - - /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); - if (!temp) { - pll = nforce2_calc_pll(tfsb); - - if (pll < 0) - return -EINVAL; - - nforce2_write_pll(pll); - } - - /* Enable write access */ - temp = 0x01; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); - - diff = tfsb - fsb; - - if (!diff) - return 0; - - while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { - if (diff < 0) - tfsb++; - else - tfsb--; - - /* Calculate the PLL reg. value */ - if ((pll = nforce2_calc_pll(tfsb)) == -1) - return -EINVAL; - - nforce2_write_pll(pll); -#ifdef NFORCE2_DELAY - mdelay(NFORCE2_DELAY); -#endif - } - - temp = 0x40; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); - - return 0; -} - -/** - * nforce2_get - get the CPU frequency - * @cpu: CPU number - * - * Returns the CPU frequency - */ -static unsigned int nforce2_get(unsigned int cpu) -{ - if (cpu) - return 0; - return nforce2_fsb_read(0) * fid * 100; -} - -/** - * nforce2_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int nforce2_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ -// unsigned long flags; - struct cpufreq_freqs freqs; - unsigned int target_fsb; - - if ((target_freq > policy->max) || (target_freq < policy->min)) - return -EINVAL; - - target_fsb = target_freq / (fid * 100); - - freqs.old = nforce2_get(policy->cpu); - freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ - - if (freqs.old == freqs.new) - return 0; - - dprintk("Old CPU frequency %d kHz, new %d kHz\n", - freqs.old, freqs.new); - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Disable IRQs */ - //local_irq_save(flags); - - if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", - target_fsb); - else - dprintk("Changed FSB successfully to %d\n", - target_fsb); - - /* Enable IRQs */ - //local_irq_restore(flags); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - -/** - * nforce2_verify - verifies a new CPUFreq policy - * @policy: new policy - */ -static int nforce2_verify(struct cpufreq_policy *policy) -{ - unsigned int fsb_pol_max; - - fsb_pol_max = policy->max / (fid * 100); - - if (policy->min < (fsb_pol_max * fid * 100)) - policy->max = (fsb_pol_max + 1) * fid * 100; - - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - -static int nforce2_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int fsb; - unsigned int rfid; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* Get current FSB */ - fsb = nforce2_fsb_read(0); - - if (!fsb) - return -EIO; - - /* FIX: Get FID from CPU */ - if (!fid) { - if (!cpu_khz) { - printk(KERN_WARNING - "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); - return -ENODEV; - } - - fid = cpu_khz / (fsb * 100); - rfid = fid % 5; - - if (rfid) { - if (rfid > 2) - fid += 5 - rfid; - else - fid -= rfid; - } - } - - printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, - fid / 10, fid % 10); - - /* Set maximum FSB to FSB at boot time */ - max_fsb = nforce2_fsb_read(1); - - if(!max_fsb) - return -EIO; - - if (!min_fsb) - min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; - - if (min_fsb < NFORCE2_MIN_FSB) - min_fsb = NFORCE2_MIN_FSB; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = min_fsb * fid * 100; - policy->cpuinfo.max_freq = max_fsb * fid * 100; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = nforce2_get(policy->cpu); - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - return 0; -} - -static int nforce2_cpu_exit(struct cpufreq_policy *policy) -{ - return 0; -} - -static struct cpufreq_driver nforce2_driver = { - .name = "nforce2", - .verify = nforce2_verify, - .target = nforce2_target, - .get = nforce2_get, - .init = nforce2_cpu_init, - .exit = nforce2_cpu_exit, - .owner = THIS_MODULE, -}; - -/** - * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic - * - * Detects nForce2 A2 and C1 stepping - * - */ -static unsigned int nforce2_detect_chipset(void) -{ - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - PCI_DEVICE_ID_NVIDIA_NFORCE2, - PCI_ANY_ID, PCI_ANY_ID, NULL); - - if (nforce2_chipset_dev == NULL) - return -ENODEV; - - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - nforce2_chipset_dev->revision); - printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); - - return 0; -} - -/** - * nforce2_init - initializes the nForce2 CPUFreq driver - * - * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init nforce2_init(void) -{ - /* TODO: do we need to detect the processor? */ - - /* detect chipset */ - if (nforce2_detect_chipset()) { - printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); - return -ENODEV; - } - - return cpufreq_register_driver(&nforce2_driver); -} - -/** - * nforce2_exit - unregisters cpufreq module - * - * Unregisters nForce2 FSB change support. - */ -static void __exit nforce2_exit(void) -{ - cpufreq_unregister_driver(&nforce2_driver); -} - -module_init(nforce2_init); -module_exit(nforce2_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c deleted file mode 100644 index f43d98e11cc..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Based on documentation provided by Dave Jones. Thanks! - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#define EPS_BRAND_C7M 0 -#define EPS_BRAND_C7 1 -#define EPS_BRAND_EDEN 2 -#define EPS_BRAND_C3 3 - -struct eps_cpu_data { - u32 fsb; - struct cpufreq_frequency_table freq_table[]; -}; - -static struct eps_cpu_data *eps_cpu[NR_CPUS]; - - -static unsigned int eps_get(unsigned int cpu) -{ - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (cpu) - return 0; - centaur = eps_cpu[cpu]; - if (centaur == NULL) - return 0; - - /* Return current frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - return centaur->fsb * ((lo >> 8) & 0xff); -} - -static int eps_set_state(struct eps_cpu_data *centaur, - unsigned int cpu, - u32 dest_state) -{ - struct cpufreq_freqs freqs; - u32 lo, hi; - int err = 0; - int i; - - freqs.old = eps_get(cpu); - freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Wait while CPU is busy */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i = 0; - while (lo & ((1 << 16) | (1 << 17))) { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } - /* Set new multiplier and voltage */ - wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); - /* Wait until transition end */ - i = 0; - do { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } while (lo & ((1 << 16) | (1 << 17))); - - /* Return current frequency */ -postchange: - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - freqs.new = centaur->fsb * ((lo >> 8) & 0xff); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - return err; -} - -static int eps_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct eps_cpu_data *centaur; - unsigned int newstate = 0; - unsigned int cpu = policy->cpu; - unsigned int dest_state; - int ret; - - if (unlikely(eps_cpu[cpu] == NULL)) - return -ENODEV; - centaur = eps_cpu[cpu]; - - if (unlikely(cpufreq_frequency_table_target(policy, - &eps_cpu[cpu]->freq_table[0], - target_freq, - relation, - &newstate))) { - return -EINVAL; - } - - /* Make frequency transition */ - dest_state = centaur->freq_table[newstate].index & 0xffff; - ret = eps_set_state(centaur, cpu, dest_state); - if (ret) - printk(KERN_ERR "eps: Timeout!\n"); - return ret; -} - -static int eps_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, - &eps_cpu[policy->cpu]->freq_table[0]); -} - -static int eps_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - u32 lo, hi; - u64 val; - u8 current_multiplier, current_voltage; - u8 max_multiplier, max_voltage; - u8 min_multiplier, min_voltage; - u8 brand; - u32 fsb; - struct eps_cpu_data *centaur; - struct cpufreq_frequency_table *f_table; - int k, step, voltage; - int ret; - int states; - - if (policy->cpu != 0) - return -ENODEV; - - /* Check brand */ - printk("eps: Detected VIA "); - rdmsr(0x1153, lo, hi); - brand = (((lo >> 2) ^ lo) >> 18) & 3; - switch(brand) { - case EPS_BRAND_C7M: - printk("C7-M\n"); - break; - case EPS_BRAND_C7: - printk("C7\n"); - break; - case EPS_BRAND_EDEN: - printk("Eden\n"); - break; - case EPS_BRAND_C3: - printk("C3\n"); - return -ENODEV; - break; - } - /* Enable Enhanced PowerSaver */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - val |= 1 << 16; - wrmsrl(MSR_IA32_MISC_ENABLE, val); - /* Can be locked at 0 */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - printk("eps: Can't enable Enhanced PowerSaver\n"); - return -ENODEV; - } - } - - /* Print voltage and multiplier */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - current_voltage = lo & 0xff; - printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); - current_multiplier = (lo >> 8) & 0xff; - printk("eps: Current multiplier = %d\n", current_multiplier); - - /* Print limits */ - max_voltage = hi & 0xff; - printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); - max_multiplier = (hi >> 8) & 0xff; - printk("eps: Highest multiplier = %d\n", max_multiplier); - min_voltage = (hi >> 16) & 0xff; - printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); - min_multiplier = (hi >> 24) & 0xff; - printk("eps: Lowest multiplier = %d\n", min_multiplier); - - /* Sanity checks */ - if (current_multiplier == 0 || max_multiplier == 0 - || min_multiplier == 0) - return -EINVAL; - if (current_multiplier > max_multiplier - || max_multiplier <= min_multiplier) - return -EINVAL; - if (current_voltage > 0x1c || max_voltage > 0x1c) - return -EINVAL; - if (max_voltage < min_voltage) - return -EINVAL; - - /* Calc FSB speed */ - fsb = cpu_khz / current_multiplier; - /* Calc number of p-states supported */ - if (brand == EPS_BRAND_C7M) - states = max_multiplier - min_multiplier + 1; - else - states = 2; - - /* Allocate private data and frequency table for current cpu */ - centaur = kzalloc(sizeof(struct eps_cpu_data) - + (states + 1) * sizeof(struct cpufreq_frequency_table), - GFP_KERNEL); - if (!centaur) - return -ENOMEM; - eps_cpu[0] = centaur; - - /* Copy basic values */ - centaur->fsb = fsb; - - /* Fill frequency and MSR value table */ - f_table = ¢aur->freq_table[0]; - if (brand != EPS_BRAND_C7M) { - f_table[0].frequency = fsb * min_multiplier; - f_table[0].index = (min_multiplier << 8) | min_voltage; - f_table[1].frequency = fsb * max_multiplier; - f_table[1].index = (max_multiplier << 8) | max_voltage; - f_table[2].frequency = CPUFREQ_TABLE_END; - } else { - k = 0; - step = ((max_voltage - min_voltage) * 256) - / (max_multiplier - min_multiplier); - for (i = min_multiplier; i <= max_multiplier; i++) { - voltage = (k * step) / 256 + min_voltage; - f_table[k].frequency = fsb * i; - f_table[k].index = (i << 8) | voltage; - k++; - } - f_table[k].frequency = CPUFREQ_TABLE_END; - } - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ - policy->cur = fsb * current_multiplier; - - ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); - if (ret) { - kfree(centaur); - return ret; - } - - cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); - return 0; -} - -static int eps_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (eps_cpu[cpu] == NULL) - return -ENODEV; - centaur = eps_cpu[cpu]; - - /* Get max frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - /* Set max frequency */ - eps_set_state(centaur, cpu, hi & 0xffff); - /* Bye */ - cpufreq_frequency_table_put_attr(policy->cpu); - kfree(eps_cpu[cpu]); - eps_cpu[cpu] = NULL; - return 0; -} - -static struct freq_attr* eps_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver eps_driver = { - .verify = eps_verify, - .target = eps_target, - .init = eps_cpu_init, - .exit = eps_cpu_exit, - .get = eps_get, - .name = "e_powersaver", - .owner = THIS_MODULE, - .attr = eps_attr, -}; - -static int __init eps_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - /* This driver will work only on Centaur C7 processors with - * Enhanced SpeedStep/PowerSaver registers */ - if (c->x86_vendor != X86_VENDOR_CENTAUR - || c->x86 != 6 || c->x86_model != 10) - return -ENODEV; - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - if (cpufreq_register_driver(&eps_driver)) - return -EINVAL; - return 0; -} - -static void __exit eps_exit(void) -{ - cpufreq_unregister_driver(&eps_driver); -} - -MODULE_AUTHOR("Rafa³ Bilski "); -MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); -MODULE_LICENSE("GPL"); - -module_init(eps_init); -module_exit(eps_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c deleted file mode 100644 index f317276afa7..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * elanfreq: cpufreq driver for the AMD ELAN family - * - * (c) Copyright 2002 Robert Schwebel - * - * Parts of this code are (c) Sven Geggus - * - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel - * - */ - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ -#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ - -/* Module parameter */ -static int max_freq; - -struct s_elan_multiplier { - int clock; /* frequency in kHz */ - int val40h; /* PMU Force Mode register */ - int val80h; /* CPU Clock Speed Register */ -}; - -/* - * It is important that the frequencies - * are listed in ascending order here! - */ -struct s_elan_multiplier elan_multiplier[] = { - {1000, 0x02, 0x18}, - {2000, 0x02, 0x10}, - {4000, 0x02, 0x08}, - {8000, 0x00, 0x00}, - {16000, 0x00, 0x02}, - {33000, 0x00, 0x04}, - {66000, 0x01, 0x04}, - {99000, 0x01, 0x05} -}; - -static struct cpufreq_frequency_table elanfreq_table[] = { - {0, 1000}, - {1, 2000}, - {2, 4000}, - {3, 8000}, - {4, 16000}, - {5, 33000}, - {6, 66000}, - {7, 99000}, - {0, CPUFREQ_TABLE_END}, -}; - - -/** - * elanfreq_get_cpu_frequency: determine current cpu speed - * - * Finds out at which frequency the CPU of the Elan SOC runs - * at the moment. Frequencies from 1 to 33 MHz are generated - * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" - * and have the rest of the chip running with 33 MHz. - */ - -static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg; /* Clock Speed Register */ - - local_irq_disable(); - outb_p(0x80,REG_CSCIR); - clockspeed_reg = inb_p(REG_CSCDR); - local_irq_enable(); - - if ((clockspeed_reg & 0xE0) == 0xE0) - return 0; - - /* Are we in CPU clock multiplied mode (66/99 MHz)? */ - if ((clockspeed_reg & 0xE0) == 0xC0) { - if ((clockspeed_reg & 0x01) == 0) - return 66000; - else - return 99000; - } - - /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0)==0xA0) - return 33000; - - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); -} - - -/** - * elanfreq_set_cpu_frequency: Change the CPU core frequency - * @cpu: cpu number - * @freq: frequency in kHz - * - * This function takes a frequency value and changes the CPU frequency - * according to this. Note that the frequency has to be checked by - * elanfreq_validatespeed() for correctness! - * - * There is no return value. - */ - -static void elanfreq_set_cpu_state (unsigned int state) -{ - struct cpufreq_freqs freqs; - - freqs.old = elanfreq_get_cpu_frequency(0); - freqs.new = elan_multiplier[state].clock; - freqs.cpu = 0; /* elanfreq.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", - elan_multiplier[state].clock); - - - /* - * Access to the Elan's internal registers is indexed via - * 0x22: Chip Setup & Control Register Index Register (CSCI) - * 0x23: Chip Setup & Control Register Data Register (CSCD) - * - */ - - /* - * 0x40 is the Power Management Unit's Force Mode Register. - * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) - */ - - local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00,REG_CSCDR); - local_irq_enable(); /* wait till internal pipelines and */ - udelay(1000); /* buffers have cleaned up */ - - local_irq_disable(); - - /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80,REG_CSCIR); - outb_p(elan_multiplier[state].val80h,REG_CSCDR); - - /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40,REG_CSCIR); - outb_p(elan_multiplier[state].val40h,REG_CSCDR); - udelay(10000); - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - - -/** - * elanfreq_validatespeed: test if frequency range is valid - * @policy: the policy to validate - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int elanfreq_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); -} - -static int elanfreq_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) - return -EINVAL; - - elanfreq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int elanfreq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - unsigned int i; - int result; - - /* capability check */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) - return -ENODEV; - - /* max freq */ - if (!max_freq) - max_freq = elanfreq_get_cpu_frequency(0); - - /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if (elanfreq_table[i].frequency > max_freq) - elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; - } - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = elanfreq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); - return 0; -} - - -static int elanfreq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -#ifndef MODULE -/** - * elanfreq_setup - elanfreq command line parameter parsing - * - * elanfreq command line parameter. Use: - * elanfreq=66000 - * to set the maximum CPU frequency to 66 MHz. Note that in - * case you do not give this boot parameter, the maximum - * frequency will fall back to _current_ CPU frequency which - * might be lower. If you build this as a module, use the - * max_freq module parameter instead. - */ -static int __init elanfreq_setup(char *str) -{ - max_freq = simple_strtoul(str, &str, 0); - printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); - return 1; -} -__setup("elanfreq=", elanfreq_setup); -#endif - - -static struct freq_attr* elanfreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver elanfreq_driver = { - .get = elanfreq_get_cpu_frequency, - .verify = elanfreq_verify, - .target = elanfreq_target, - .init = elanfreq_cpu_init, - .exit = elanfreq_cpu_exit, - .name = "elanfreq", - .owner = THIS_MODULE, - .attr = elanfreq_attr, -}; - - -static int __init elanfreq_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - /* Test if we have the right hardware */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) { - printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; - } - return cpufreq_register_driver(&elanfreq_driver); -} - - -static void __exit elanfreq_exit(void) -{ - cpufreq_unregister_driver(&elanfreq_driver); -} - - -module_param (max_freq, int, 0444); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); - -module_init(elanfreq_init); -module_exit(elanfreq_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c deleted file mode 100644 index 461dabc4e49..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Cyrix MediaGX and NatSemi Geode Suspend Modulation - * (C) 2002 Zwane Mwaikambo - * (C) 2002 Hiroshi Miura - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Theoritical note: - * - * (see Geode(tm) CS5530 manual (rev.4.1) page.56) - * - * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 - * are based on Suspend Moduration. - * - * Suspend Modulation works by asserting and de-asserting the SUSP# pin - * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# - * the CPU enters an idle state. GX1 stops its core clock when SUSP# is - * asserted then power consumption is reduced. - * - * Suspend Modulation's OFF/ON duration are configurable - * with 'Suspend Modulation OFF Count Register' - * and 'Suspend Modulation ON Count Register'. - * These registers are 8bit counters that represent the number of - * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) - * to the processor. - * - * These counters define a ratio which is the effective frequency - * of operation of the system. - * - * OFF Count - * F_eff = Fgx * ---------------------- - * OFF Count + ON Count - * - * 0 <= On Count, Off Count <= 255 - * - * From these limits, we can get register values - * - * off_duration + on_duration <= MAX_DURATION - * on_duration = off_duration * (stock_freq - freq) / freq - * - * off_duration = (freq * DURATION) / stock_freq - * on_duration = DURATION - off_duration - * - * - *--------------------------------------------------------------------------- - * - * ChangeLog: - * Dec. 12, 2003 Hiroshi Miura - * - fix on/off register mistake - * - fix cpu_khz calc when it stops cpu modulation. - * - * Dec. 11, 2002 Hiroshi Miura - * - rewrite for Cyrix MediaGX Cx5510/5520 and - * NatSemi Geode Cs5530(A). - * - * Jul. ??, 2002 Zwane Mwaikambo - * - cs5530_mod patch for 2.4.19-rc1. - * - *--------------------------------------------------------------------------- - * - * Todo - * Test on machines with 5510, 5530, 5530A - */ - -/************************************************************************ - * Suspend Modulation - Definitions * - ************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* PCI config registers, all at F0 */ -#define PCI_PMER1 0x80 /* power management enable register 1 */ -#define PCI_PMER2 0x81 /* power management enable register 2 */ -#define PCI_PMER3 0x82 /* power management enable register 3 */ -#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ -#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ -#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ -#define PCI_MODON 0x95 /* suspend modulation ON counter register */ -#define PCI_SUSCFG 0x96 /* suspend configuration register */ - -/* PMER1 bits */ -#define GPM (1<<0) /* global power management */ -#define GIT (1<<1) /* globally enable PM device idle timers */ -#define GTR (1<<2) /* globally enable IO traps */ -#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ -#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ - -/* SUSCFG bits */ -#define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ -#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ - /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ -#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ -#define PWRSVE_ISA (1<<3) /* stop ISA clock */ -#define PWRSVE (1<<4) /* active idle */ - -struct gxfreq_params { - u8 on_duration; - u8 off_duration; - u8 pci_suscfg; - u8 pci_pmer1; - u8 pci_pmer2; - struct pci_dev *cs55x0; -}; - -static struct gxfreq_params *gx_params; -static int stock_freq; - -/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ -static int pci_busclk = 0; -module_param (pci_busclk, int, 0444); - -/* maximum duration for which the cpu may be suspended - * (32us * MAX_DURATION). If no parameter is given, this defaults - * to 255. - * Note that this leads to a maximum of 8 ms(!) where the CPU clock - * is suspended -- processing power is just 0.39% of what it used to be, - * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ -static int max_duration = 255; -module_param (max_duration, int, 0444); - -/* For the default policy, we want at least some processing power - * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) - */ -#define POLICY_MIN_DIV 20 - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) - -/** - * we can detect a core multipiler from dir0_lsb - * from GX1 datasheet p.56, - * MULT[3:0]: - * 0000 = SYSCLK multiplied by 4 (test only) - * 0001 = SYSCLK multiplied by 10 - * 0010 = SYSCLK multiplied by 4 - * 0011 = SYSCLK multiplied by 6 - * 0100 = SYSCLK multiplied by 9 - * 0101 = SYSCLK multiplied by 5 - * 0110 = SYSCLK multiplied by 7 - * 0111 = SYSCLK multiplied by 8 - * of 33.3MHz - **/ -static int gx_freq_mult[16] = { - 4, 10, 4, 6, 9, 5, 7, 8, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -/**************************************************************** - * Low Level chipset interface * - ****************************************************************/ -static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, - { 0, }, -}; - -/** - * gx_detect_chipset: - * - **/ -static __init struct pci_dev *gx_detect_chipset(void) -{ - struct pci_dev *gx_pci = NULL; - - /* check if CPU is a MediaGX or a Geode. */ - if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && - (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { - dprintk("error: no MediaGX/Geode processor found!\n"); - return NULL; - } - - /* detect which companion chip is used */ - while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { - if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) - return gx_pci; - } - - dprintk("error: no supported chipset found!\n"); - return NULL; -} - -/** - * gx_get_cpuspeed: - * - * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. - */ -static unsigned int gx_get_cpuspeed(unsigned int cpu) -{ - if ((gx_params->pci_suscfg & SUSMOD) == 0) - return stock_freq; - - return (stock_freq * gx_params->off_duration) - / (gx_params->on_duration + gx_params->off_duration); -} - -/** - * gx_validate_speed: - * determine current cpu speed - * - **/ - -static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) -{ - unsigned int i; - u8 tmp_on, tmp_off; - int old_tmp_freq = stock_freq; - int tmp_freq; - - *off_duration=1; - *on_duration=0; - - for (i=max_duration; i>0; i--) { - tmp_off = ((khz * i) / stock_freq) & 0xff; - tmp_on = i - tmp_off; - tmp_freq = (stock_freq * tmp_off) / i; - /* if this relation is closer to khz, use this. If it's equal, - * prefer it, too - lower latency */ - if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { - *on_duration = tmp_on; - *off_duration = tmp_off; - old_tmp_freq = tmp_freq; - } - } - - return old_tmp_freq; -} - - -/** - * gx_set_cpuspeed: - * set cpu speed in khz. - **/ - -static void gx_set_cpuspeed(unsigned int khz) -{ - u8 suscfg, pmer1; - unsigned int new_khz; - unsigned long flags; - struct cpufreq_freqs freqs; - - freqs.cpu = 0; - freqs.old = gx_get_cpuspeed(0); - - new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); - - freqs.new = new_khz; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - local_irq_save(flags); - - if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ - switch (gx_params->cs55x0->device) { - case PCI_DEVICE_ID_CYRIX_5530_LEGACY: - pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; - /* FIXME: need to test other values -- Zwane,Miura */ - pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - - if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ - suscfg = gx_params->pci_suscfg | SUSMOD; - } else { /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; - } - break; - case PCI_DEVICE_ID_CYRIX_5520: - case PCI_DEVICE_ID_CYRIX_5510: - suscfg = gx_params->pci_suscfg | SUSMOD; - break; - default: - local_irq_restore(flags); - dprintk("fatal: try to set unknown chipset.\n"); - return; - } - } else { - suscfg = gx_params->pci_suscfg & ~(SUSMOD); - gx_params->off_duration = 0; - gx_params->on_duration = 0; - dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); - } - - pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); - - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); - - local_irq_restore(flags); - - gx_params->pci_suscfg = suscfg; - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", - gx_params->on_duration * 32, gx_params->off_duration * 32); - dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); -} - -/**************************************************************** - * High level functions * - ****************************************************************/ - -/* - * cpufreq_gx_verify: test if frequency range is valid - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int cpufreq_gx_verify(struct cpufreq_policy *policy) -{ - unsigned int tmp_freq = 0; - u8 tmp1, tmp2; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - - /* it needs to be assured that at least one supported frequency is - * within policy->min and policy->max. If it is not, policy->max - * needs to be increased until one freuqency is supported. - * policy->min may not be decreased, though. This way we guarantee a - * specific processing capacity. - */ - tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); - if (tmp_freq < policy->min) - tmp_freq += stock_freq / max_duration; - policy->min = tmp_freq; - if (policy->min > policy->max) - policy->max = tmp_freq; - tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); - if (tmp_freq > policy->max) - tmp_freq -= stock_freq / max_duration; - policy->max = tmp_freq; - if (policy->max < policy->min) - policy->max = policy->min; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - - return 0; -} - -/* - * cpufreq_gx_target: - * - */ -static int cpufreq_gx_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - u8 tmp1, tmp2; - unsigned int tmp_freq; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - - tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); - while (tmp_freq < policy->min) { - tmp_freq += stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - while (tmp_freq > policy->max) { - tmp_freq -= stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - - gx_set_cpuspeed(tmp_freq); - - return 0; -} - -static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int maxfreq, curfreq; - - if (!policy || policy->cpu != 0) - return -ENODEV; - - /* determine maximum frequency */ - if (pci_busclk) { - maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } else if (cpu_khz) { - maxfreq = cpu_khz; - } else { - maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } - stock_freq = maxfreq; - curfreq = gx_get_cpuspeed(0); - - dprintk("cpu max frequency is %d.\n", maxfreq); - dprintk("cpu current frequency is %dkHz.\n",curfreq); - - /* setup basic struct for cpufreq API */ - policy->cpu = 0; - - if (max_duration < POLICY_MIN_DIV) - policy->min = maxfreq / max_duration; - else - policy->min = maxfreq / POLICY_MIN_DIV; - policy->max = maxfreq; - policy->cur = curfreq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.min_freq = maxfreq / max_duration; - policy->cpuinfo.max_freq = maxfreq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - - return 0; -} - -/* - * cpufreq_gx_init: - * MediaGX/Geode GX initialize cpufreq driver - */ -static struct cpufreq_driver gx_suspmod_driver = { - .get = gx_get_cpuspeed, - .verify = cpufreq_gx_verify, - .target = cpufreq_gx_target, - .init = cpufreq_gx_cpu_init, - .name = "gx-suspmod", - .owner = THIS_MODULE, -}; - -static int __init cpufreq_gx_init(void) -{ - int ret; - struct gxfreq_params *params; - struct pci_dev *gx_pci; - - /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) - return -ENODEV; - - /* check whether module parameters are sane */ - if (max_duration > 0xff) - max_duration = 0xff; - - dprintk("geode suspend modulation available.\n"); - - params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); - if (params == NULL) - return -ENOMEM; - - params->cs55x0 = gx_pci; - gx_params = params; - - /* keep cs55x0 configurations */ - pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); - pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); - pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); - pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); - pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { - kfree(params); - return ret; /* register error! */ - } - - return 0; -} - -static void __exit cpufreq_gx_exit(void) -{ - cpufreq_unregister_driver(&gx_suspmod_driver); - pci_dev_put(gx_params->cs55x0); - kfree(gx_params); -} - -MODULE_AUTHOR ("Hiroshi Miura "); -MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); -MODULE_LICENSE ("GPL"); - -module_init(cpufreq_gx_init); -module_exit(cpufreq_gx_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c deleted file mode 100644 index f0cce3c2dc3..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * (C) 2001-2004 Dave Jones. - * (C) 2002 Padraig Brady. - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by VIA. - * - * VIA have currently 3 different versions of Longhaul. - * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. - * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. - * Version 2 of longhaul is backward compatible with v1, but adds - * LONGHAUL MSR for purpose of both frequency and voltage scaling. - * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). - * Version 3 of longhaul got renamed to Powersaver and redesigned - * to use only the POWERSAVER MSR at 0x110a. - * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. - * It's pretty much the same feature wise to longhaul v2, though - * there is provision for scaling FSB too, but this doesn't work - * too well in practice so we don't even try to use this. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "longhaul.h" - -#define PFX "longhaul: " - -#define TYPE_LONGHAUL_V1 1 -#define TYPE_LONGHAUL_V2 2 -#define TYPE_POWERSAVER 3 - -#define CPU_SAMUEL 1 -#define CPU_SAMUEL2 2 -#define CPU_EZRA 3 -#define CPU_EZRA_T 4 -#define CPU_NEHEMIAH 5 -#define CPU_NEHEMIAH_C 6 - -/* Flags */ -#define USE_ACPI_C3 (1 << 1) -#define USE_NORTHBRIDGE (1 << 2) - -static int cpu_model; -static unsigned int numscales=16; -static unsigned int fsb; - -static const struct mV_pos *vrm_mV_table; -static const unsigned char *mV_vrm_table; - -static unsigned int highest_speed, lowest_speed; /* kHz */ -static unsigned int minmult, maxmult; -static int can_scale_voltage; -static struct acpi_processor *pr = NULL; -static struct acpi_processor_cx *cx = NULL; -static u32 acpi_regs_addr; -static u8 longhaul_flags; -static unsigned int longhaul_index; - -/* Module parameters */ -static int scale_voltage; -static int disable_acpi_c3; -static int revid_errata; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) - - -/* Clock ratios multiplied by 10 */ -static int clock_ratio[32]; -static int eblcr_table[32]; -static int longhaul_version; -static struct cpufreq_frequency_table *longhaul_table; - -#ifdef CONFIG_CPU_FREQ_DEBUG -static char speedbuffer[8]; - -static char *print_speed(int speed) -{ - if (speed < 1000) { - snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); - return speedbuffer; - } - - if (speed%1000 == 0) - snprintf(speedbuffer, sizeof(speedbuffer), - "%dGHz", speed/1000); - else - snprintf(speedbuffer, sizeof(speedbuffer), - "%d.%dGHz", speed/1000, (speed%1000)/100); - - return speedbuffer; -} -#endif - - -static unsigned int calc_speed(int mult) -{ - int khz; - khz = (mult/10)*fsb; - if (mult%10) - khz += fsb/2; - khz *= 1000; - return khz; -} - - -static int longhaul_get_cpu_mult(void) -{ - unsigned long invalue=0,lo, hi; - - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; - if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { - if (lo & (1<<27)) - invalue+=16; - } - return eblcr_table[invalue]; -} - -/* For processor with BCR2 MSR */ - -static void do_longhaul1(unsigned int clock_ratio_index) -{ - union msr_bcr2 bcr2; - - rdmsrl(MSR_VIA_BCR2, bcr2.val); - /* Enable software clock multiplier */ - bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; - - /* Sync to timer tick */ - safe_halt(); - /* Change frequency on next halt or sleep */ - wrmsrl(MSR_VIA_BCR2, bcr2.val); - /* Invoke transition */ - ACPI_FLUSH_CPU_CACHE(); - halt(); - - /* Disable software clock multiplier */ - local_irq_disable(); - rdmsrl(MSR_VIA_BCR2, bcr2.val); - bcr2.bits.ESOFTBF = 0; - wrmsrl(MSR_VIA_BCR2, bcr2.val); -} - -/* For processor with Longhaul MSR */ - -static void do_powersaver(int cx_address, unsigned int clock_ratio_index, - unsigned int dir) -{ - union msr_longhaul longhaul; - u32 t; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Setup new frequency */ - if (!revid_errata) - longhaul.bits.RevisionKey = longhaul.bits.RevisionID; - else - longhaul.bits.RevisionKey = 0; - longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; - /* Setup new voltage */ - if (can_scale_voltage) - longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; - /* Sync to timer tick */ - safe_halt(); - /* Raise voltage if necessary */ - if (can_scale_voltage && dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } - - /* Change frequency on next halt or sleep */ - longhaul.bits.EnableSoftBusRatio = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - /* Disable bus ratio bit */ - longhaul.bits.EnableSoftBusRatio = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - - /* Reduce voltage if necessary */ - if (can_scale_voltage && !dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } -} - -/** - * longhaul_set_cpu_frequency() - * @clock_ratio_index : bitpattern of the new multiplier. - * - * Sets a new clock ratio. - */ - -static void longhaul_setstate(unsigned int table_index) -{ - unsigned int clock_ratio_index; - int speed, mult; - struct cpufreq_freqs freqs; - unsigned long flags; - unsigned int pic1_mask, pic2_mask; - u16 bm_status = 0; - u32 bm_timeout = 1000; - unsigned int dir = 0; - - clock_ratio_index = longhaul_table[table_index].index; - /* Safety precautions */ - mult = clock_ratio[clock_ratio_index & 0x1f]; - if (mult == -1) - return; - speed = calc_speed(mult); - if ((speed > highest_speed) || (speed < lowest_speed)) - return; - /* Voltage transition before frequency transition? */ - if (can_scale_voltage && longhaul_index < table_index) - dir = 1; - - freqs.old = calc_speed(longhaul_get_cpu_mult()); - freqs.new = speed; - freqs.cpu = 0; /* longhaul.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", - fsb, mult/10, mult%10, print_speed(speed/1000)); -retry_loop: - preempt_disable(); - local_irq_save(flags); - - pic2_mask = inb(0xA1); - pic1_mask = inb(0x21); /* works on C3. save mask. */ - outb(0xFF,0xA1); /* Overkill */ - outb(0xFE,0x21); /* TMR0 only */ - - /* Wait while PCI bus is busy. */ - if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE - || ((pr != NULL) && pr->flags.bm_control))) { - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - while (bm_status && bm_timeout) { - outw(1 << 4, acpi_regs_addr); - bm_timeout--; - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - } - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Disable AGP and PCI arbiters */ - outb(3, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Disable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); - } - switch (longhaul_version) { - - /* - * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) - * Software controlled multipliers only. - */ - case TYPE_LONGHAUL_V1: - do_longhaul1(clock_ratio_index); - break; - - /* - * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] - * - * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) - * Nehemiah can do FSB scaling too, but this has never been proven - * to work in practice. - */ - case TYPE_LONGHAUL_V2: - case TYPE_POWERSAVER: - if (longhaul_flags & USE_ACPI_C3) { - /* Don't allow wakeup */ - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index, dir); - } else { - do_powersaver(0, clock_ratio_index, dir); - } - break; - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Enable arbiters */ - outb(0, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Enable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); - } - outb(pic2_mask,0xA1); /* restore mask */ - outb(pic1_mask,0x21); - - local_irq_restore(flags); - preempt_enable(); - - freqs.new = calc_speed(longhaul_get_cpu_mult()); - /* Check if requested frequency is set. */ - if (unlikely(freqs.new != speed)) { - printk(KERN_INFO PFX "Failed to set requested frequency!\n"); - /* Revision ID = 1 but processor is expecting revision key - * equal to 0. Jumpers at the bottom of processor will change - * multiplier and FSB, but will not change bits in Longhaul - * MSR nor enable voltage scaling. */ - if (!revid_errata) { - printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " - "option.\n"); - revid_errata = 1; - msleep(200); - goto retry_loop; - } - /* Why ACPI C3 sometimes doesn't work is a mystery for me. - * But it does happen. Processor is entering ACPI C3 state, - * but it doesn't change frequency. I tried poking various - * bits in northbridge registers, but without success. */ - if (longhaul_flags & USE_ACPI_C3) { - printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); - longhaul_flags &= ~USE_ACPI_C3; - if (revid_errata) { - printk(KERN_INFO PFX "Disabling \"Ignore " - "Revision ID\" option.\n"); - revid_errata = 0; - } - msleep(200); - goto retry_loop; - } - /* This shouldn't happen. Longhaul ver. 2 was reported not - * working on processors without voltage scaling, but with - * RevID = 1. RevID errata will make things right. Just - * to be 100% sure. */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); - longhaul_version = TYPE_LONGHAUL_V1; - msleep(200); - goto retry_loop; - } - } - /* Report true CPU frequency */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - if (!bm_timeout) - printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); -} - -/* - * Centaur decided to make life a little more tricky. - * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. - * Samuel2 and above have to try and guess what the FSB is. - * We do this by assuming we booted at maximum multiplier, and interpolate - * between that value multiplied by possible FSBs and cpu_mhz which - * was calculated at boot time. Really ugly, but no other way to do this. - */ - -#define ROUNDING 0xf - -static int guess_fsb(int mult) -{ - int speed = cpu_khz / 1000; - int i; - int speeds[] = { 666, 1000, 1333, 2000 }; - int f_max, f_min; - - for (i = 0; i < 4; i++) { - f_max = ((speeds[i] * mult) + 50) / 100; - f_max += (ROUNDING / 2); - f_min = f_max - ROUNDING; - if ((speed <= f_max) && (speed >= f_min)) - return speeds[i] / 10; - } - return 0; -} - - -static int __init longhaul_get_ranges(void) -{ - unsigned int i, j, k = 0; - unsigned int ratio; - int mult; - - /* Get current frequency */ - mult = longhaul_get_cpu_mult(); - if (mult == -1) { - printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); - return -EINVAL; - } - fsb = guess_fsb(mult); - if (fsb == 0) { - printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); - return -EINVAL; - } - /* Get max multiplier - as we always did. - * Longhaul MSR is usefull only when voltage scaling is enabled. - * C3 is booting at max anyway. */ - maxmult = mult; - /* Get min multiplier */ - switch (cpu_model) { - case CPU_NEHEMIAH: - minmult = 50; - break; - case CPU_NEHEMIAH_C: - minmult = 40; - break; - default: - minmult = 30; - break; - } - - dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", - minmult/10, minmult%10, maxmult/10, maxmult%10); - - highest_speed = calc_speed(maxmult); - lowest_speed = calc_speed(minmult); - dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, - print_speed(lowest_speed/1000), - print_speed(highest_speed/1000)); - - if (lowest_speed == highest_speed) { - printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); - return -EINVAL; - } - if (lowest_speed > highest_speed) { - printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", - lowest_speed, highest_speed); - return -EINVAL; - } - - longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); - if(!longhaul_table) - return -ENOMEM; - - for (j = 0; j < numscales; j++) { - ratio = clock_ratio[j]; - if (ratio == -1) - continue; - if (ratio > maxmult || ratio < minmult) - continue; - longhaul_table[k].frequency = calc_speed(ratio); - longhaul_table[k].index = j; - k++; - } - if (k <= 1) { - kfree(longhaul_table); - return -ENODEV; - } - /* Sort */ - for (j = 0; j < k - 1; j++) { - unsigned int min_f, min_i; - min_f = longhaul_table[j].frequency; - min_i = j; - for (i = j + 1; i < k; i++) { - if (longhaul_table[i].frequency < min_f) { - min_f = longhaul_table[i].frequency; - min_i = i; - } - } - if (min_i != j) { - unsigned int temp; - temp = longhaul_table[j].frequency; - longhaul_table[j].frequency = longhaul_table[min_i].frequency; - longhaul_table[min_i].frequency = temp; - temp = longhaul_table[j].index; - longhaul_table[j].index = longhaul_table[min_i].index; - longhaul_table[min_i].index = temp; - } - } - - longhaul_table[k].frequency = CPUFREQ_TABLE_END; - - /* Find index we are running on */ - for (j = 0; j < k; j++) { - if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { - longhaul_index = j; - break; - } - } - return 0; -} - - -static void __init longhaul_setup_voltagescaling(void) -{ - union msr_longhaul longhaul; - struct mV_pos minvid, maxvid, vid; - unsigned int j, speed, pos, kHz_step, numvscales; - int min_vid_speed; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!(longhaul.bits.RevisionID & 1)) { - printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); - return; - } - - if (!longhaul.bits.VRMRev) { - printk(KERN_INFO PFX "VRM 8.5\n"); - vrm_mV_table = &vrm85_mV[0]; - mV_vrm_table = &mV_vrm85[0]; - } else { - printk(KERN_INFO PFX "Mobile VRM\n"); - if (cpu_model < CPU_NEHEMIAH) - return; - vrm_mV_table = &mobilevrm_mV[0]; - mV_vrm_table = &mV_mobilevrm[0]; - } - - minvid = vrm_mV_table[longhaul.bits.MinimumVID]; - maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; - - if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " - "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); - return; - } - - if (minvid.mV == maxvid.mV) { - printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " - "both %d.%03d. Voltage scaling disabled\n", - maxvid.mV/1000, maxvid.mV%1000); - return; - } - - /* How many voltage steps */ - numvscales = maxvid.pos - minvid.pos + 1; - printk(KERN_INFO PFX - "Max VID=%d.%03d " - "Min VID=%d.%03d, " - "%d possible voltage scales\n", - maxvid.mV/1000, maxvid.mV%1000, - minvid.mV/1000, minvid.mV%1000, - numvscales); - - /* Calculate max frequency at min voltage */ - j = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4) - j += 16; - min_vid_speed = eblcr_table[j]; - if (min_vid_speed == -1) - return; - switch (longhaul.bits.MinMHzFSB) { - case 0: - min_vid_speed *= 13333; - break; - case 1: - min_vid_speed *= 10000; - break; - case 3: - min_vid_speed *= 6666; - break; - default: - return; - break; - } - if (min_vid_speed >= highest_speed) - return; - /* Calculate kHz for one voltage step */ - kHz_step = (highest_speed - min_vid_speed) / numvscales; - - j = 0; - while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { - speed = longhaul_table[j].frequency; - if (speed > min_vid_speed) - pos = (speed - min_vid_speed) / kHz_step + minvid.pos; - else - pos = minvid.pos; - longhaul_table[j].index |= mV_vrm_table[pos] << 8; - vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); - j++; - } - - can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled.\n"); -} - - -static int longhaul_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, longhaul_table); -} - - -static int longhaul_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int table_index = 0; - unsigned int i; - unsigned int dir = 0; - u8 vid, current_vid; - - if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) - return -EINVAL; - - /* Don't set same frequency again */ - if (longhaul_index == table_index) - return 0; - - if (!can_scale_voltage) - longhaul_setstate(table_index); - else { - /* On test system voltage transitions exceeding single - * step up or down were turning motherboard off. Both - * "ondemand" and "userspace" are unsafe. C7 is doing - * this in hardware, C3 is old and we need to do this - * in software. */ - i = longhaul_index; - current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; - if (table_index > longhaul_index) - dir = 1; - while (i != table_index) { - vid = (longhaul_table[i].index >> 8) & 0x1f; - if (vid != current_vid) { - longhaul_setstate(i); - current_vid = vid; - msleep(200); - } - if (dir) - i++; - else - i--; - } - longhaul_setstate(table_index); - } - longhaul_index = table_index; - return 0; -} - - -static unsigned int longhaul_get(unsigned int cpu) -{ - if (cpu) - return 0; - return calc_speed(longhaul_get_cpu_mult()); -} - -static acpi_status longhaul_walk_callback(acpi_handle obj_handle, - u32 nesting_level, - void *context, void **return_value) -{ - struct acpi_device *d; - - if ( acpi_bus_get_device(obj_handle, &d) ) { - return 0; - } - *return_value = (void *)acpi_driver_data(d); - return 1; -} - -/* VIA don't support PM2 reg, but have something similar */ -static int enable_arbiter_disable(void) -{ - struct pci_dev *dev; - int status = 1; - int reg; - u8 pci_cmd; - - /* Find PLE133 host bridge */ - reg = 0x78; - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, - NULL); - /* Find CLE266 host bridge */ - if (dev == NULL) { - reg = 0x76; - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_862X_0, NULL); - /* Find CN400 V-Link host bridge */ - if (dev == NULL) - dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); - } - if (dev != NULL) { - /* Enable access to port 0x22 */ - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - pci_cmd |= 1<<7; - pci_write_config_byte(dev, reg, pci_cmd); - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - printk(KERN_ERR PFX - "Can't enable access to port 0x22.\n"); - status = 0; - } - } - pci_dev_put(dev); - return status; - } - return 0; -} - -static int longhaul_setup_southbridge(void) -{ - struct pci_dev *dev; - u8 pci_cmd; - - /* Find VT8235 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); - if (dev == NULL) - /* Find VT8237 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_8237, NULL); - if (dev != NULL) { - /* Set transition time to max */ - pci_read_config_byte(dev, 0xec, &pci_cmd); - pci_cmd &= ~(1 << 2); - pci_write_config_byte(dev, 0xec, pci_cmd); - pci_read_config_byte(dev, 0xe4, &pci_cmd); - pci_cmd &= ~(1 << 7); - pci_write_config_byte(dev, 0xe4, pci_cmd); - pci_read_config_byte(dev, 0xe5, &pci_cmd); - pci_cmd |= 1 << 7; - pci_write_config_byte(dev, 0xe5, pci_cmd); - /* Get address of ACPI registers block*/ - pci_read_config_byte(dev, 0x81, &pci_cmd); - if (pci_cmd & 1 << 7) { - pci_read_config_dword(dev, 0x88, &acpi_regs_addr); - acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); - } - - pci_dev_put(dev); - return 1; - } - return 0; -} - -static int __init longhaul_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - char *cpuname=NULL; - int ret; - u32 lo, hi; - - /* Check what we have on this motherboard */ - switch (c->x86_model) { - case 6: - cpu_model = CPU_SAMUEL; - cpuname = "C3 'Samuel' [C5A]"; - longhaul_version = TYPE_LONGHAUL_V1; - memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); - memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); - break; - - case 7: - switch (c->x86_mask) { - case 0: - longhaul_version = TYPE_LONGHAUL_V1; - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - /* Note, this is not a typo, early Samuel2's had - * Samuel1 ratios. */ - memcpy(clock_ratio, samuel1_clock_ratio, - sizeof(samuel1_clock_ratio)); - memcpy(eblcr_table, samuel2_eblcr, - sizeof(samuel2_eblcr)); - break; - case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; - if (c->x86_mask < 8) { - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - } else { - cpu_model = CPU_EZRA; - cpuname = "C3 'Ezra' [C5C]"; - } - memcpy(clock_ratio, ezra_clock_ratio, - sizeof(ezra_clock_ratio)); - memcpy(eblcr_table, ezra_eblcr, - sizeof(ezra_eblcr)); - break; - } - break; - - case 8: - cpu_model = CPU_EZRA_T; - cpuname = "C3 'Ezra-T' [C5M]"; - longhaul_version = TYPE_POWERSAVER; - numscales=32; - memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); - memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); - break; - - case 9: - longhaul_version = TYPE_POWERSAVER; - numscales = 32; - memcpy(clock_ratio, - nehemiah_clock_ratio, - sizeof(nehemiah_clock_ratio)); - memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { - case 0 ... 1: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah A' [C5XLOE]"; - break; - case 2 ... 4: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah B' [C5XLOH]"; - break; - case 5 ... 15: - cpu_model = CPU_NEHEMIAH_C; - cpuname = "C3 'Nehemiah C' [C5P]"; - break; - } - break; - - default: - cpuname = "Unknown"; - break; - } - /* Check Longhaul ver. 2 */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - rdmsr(MSR_VIA_LONGHAUL, lo, hi); - if (lo == 0 && hi == 0) - /* Looks like MSR isn't present */ - longhaul_version = TYPE_LONGHAUL_V1; - } - - printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); - switch (longhaul_version) { - case TYPE_LONGHAUL_V1: - case TYPE_LONGHAUL_V2: - printk ("Longhaul v%d supported.\n", longhaul_version); - break; - case TYPE_POWERSAVER: - printk ("Powersaver supported.\n"); - break; - }; - - /* Doesn't hurt */ - longhaul_setup_southbridge(); - - /* Find ACPI data for processor */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, - NULL, (void *)&pr); - - /* Check ACPI support for C3 state */ - if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { - cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address > 0 && cx->latency <= 1000) - longhaul_flags |= USE_ACPI_C3; - } - /* Disable if it isn't working */ - if (disable_acpi_c3) - longhaul_flags &= ~USE_ACPI_C3; - /* Check if northbridge is friendly */ - if (enable_arbiter_disable()) - longhaul_flags |= USE_NORTHBRIDGE; - - /* Check ACPI support for bus master arbiter disable */ - if (!(longhaul_flags & USE_ACPI_C3 - || longhaul_flags & USE_NORTHBRIDGE) - && ((pr == NULL) || !(pr->flags.bm_control))) { - printk(KERN_ERR PFX - "No ACPI support. Unsupported northbridge.\n"); - return -ENODEV; - } - - if (longhaul_flags & USE_NORTHBRIDGE) - printk(KERN_INFO PFX "Using northbridge support.\n"); - if (longhaul_flags & USE_ACPI_C3) - printk(KERN_INFO PFX "Using ACPI support.\n"); - - ret = longhaul_get_ranges(); - if (ret != 0) - return ret; - - if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) - longhaul_setup_voltagescaling(); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 200000; /* nsec */ - policy->cur = calc_speed(longhaul_get_cpu_mult()); - - ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); - if (ret) - return ret; - - cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); - - return 0; -} - -static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr* longhaul_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver longhaul_driver = { - .verify = longhaul_verify, - .target = longhaul_target, - .get = longhaul_get, - .init = longhaul_cpu_init, - .exit = __devexit_p(longhaul_cpu_exit), - .name = "longhaul", - .owner = THIS_MODULE, - .attr = longhaul_attr, -}; - - -static int __init longhaul_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) - return -ENODEV; - -#ifdef CONFIG_SMP - if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); - return -ENODEV; - } -#endif -#ifdef CONFIG_X86_IO_APIC - if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); - return -ENODEV; - } -#endif - switch (c->x86_model) { - case 6 ... 9: - return cpufreq_register_driver(&longhaul_driver); - case 10: - printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); - default: - ;; - } - - return -ENODEV; -} - - -static void __exit longhaul_exit(void) -{ - int i; - - for (i=0; i < numscales; i++) { - if (clock_ratio[i] == maxmult) { - longhaul_setstate(i); - break; - } - } - - cpufreq_unregister_driver(&longhaul_driver); - kfree(longhaul_table); -} - -/* Even if BIOS is exporting ACPI C3 state, and it is used - * with success when CPU is idle, this state doesn't - * trigger frequency transition in some cases. */ -module_param (disable_acpi_c3, int, 0644); -MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); -/* Change CPU voltage with frequency. Very usefull to save - * power, but most VIA C3 processors aren't supporting it. */ -module_param (scale_voltage, int, 0644); -MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); -/* Force revision key to 0 for processors which doesn't - * support voltage scaling, but are introducing itself as - * such. */ -module_param(revid_errata, int, 0644); -MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); - -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(longhaul_init); -module_exit(longhaul_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h deleted file mode 100644 index 4fcc320997d..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ /dev/null @@ -1,353 +0,0 @@ -/* - * longhaul.h - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * VIA-specific information - */ - -union msr_bcr2 { - struct { - unsigned Reseved:19, // 18:0 - ESOFTBF:1, // 19 - Reserved2:3, // 22:20 - CLOCKMUL:4, // 26:23 - Reserved3:5; // 31:27 - } bits; - unsigned long val; -}; - -union msr_longhaul { - struct { - unsigned RevisionID:4, // 3:0 - RevisionKey:4, // 7:4 - EnableSoftBusRatio:1, // 8 - EnableSoftVID:1, // 9 - EnableSoftBSEL:1, // 10 - Reserved:3, // 11:13 - SoftBusRatio4:1, // 14 - VRMRev:1, // 15 - SoftBusRatio:4, // 19:16 - SoftVID:5, // 24:20 - Reserved2:3, // 27:25 - SoftBSEL:2, // 29:28 - Reserved3:2, // 31:30 - MaxMHzBR:4, // 35:32 - MaximumVID:5, // 40:36 - MaxMHzFSB:2, // 42:41 - MaxMHzBR4:1, // 43 - Reserved4:4, // 47:44 - MinMHzBR:4, // 51:48 - MinimumVID:5, // 56:52 - MinMHzFSB:2, // 58:57 - MinMHzBR4:1, // 59 - Reserved5:4; // 63:60 - } bits; - unsigned long long val; -}; - -/* - * Clock ratio tables. Div/Mod by 10 to get ratio. - * The eblcr ones specify the ratio read from the CPU. - * The clock_ratio ones specify what to write to the CPU. - */ - -/* - * VIA C3 Samuel 1 & Samuel 2 (stepping 0) - */ -static const int __initdata samuel1_clock_ratio[16] = { - -1, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - -1, /* 0100 -> RESERVED */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - -1, /* 1111 -> RESERVED */ -}; - -static const int __initdata samuel1_eblcr[16] = { - 50, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - -1, /* 0111 -> RESERVED */ - -1, /* 1000 -> RESERVED */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - -1, /* 1100 -> RESERVED */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Samuel2 Stepping 1->15 - */ -static const int __initdata samuel2_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 110, /* 0111 -> 11.0x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 130, /* 1110 -> 13.0x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Ezra - */ -static const int __initdata ezra_clock_ratio[16] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ -}; - -static const int __initdata ezra_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 (Ezra-T) [C5M]. - */ -static const int __initdata ezrat_clock_ratio[32] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - - -1, /* 0000 -> RESERVED (10.0x) */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (9.0x)*/ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> RESERVED (12.0x) */ -}; - -static const int __initdata ezrat_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - - -1, /* 0000 -> RESERVED (9.0x) */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (10.0x)*/ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - -1, /* 1100 -> RESERVED (12.0x) */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145, /* 1111 -> 14.5x */ -}; - -/* - * VIA C3 Nehemiah */ - -static const int __initdata nehemiah_clock_ratio[32] = { - 100, /* 0000 -> 10.0x */ - -1, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - -1, /* 0000 -> 10.0x */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> 9.0x */ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> 12.0x */ -}; - -static const int __initdata nehemiah_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 160, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - 90, /* 0000 -> 9.0x */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - 100, /* 0011 -> 10.0x */ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - 120, /* 1100 -> 12.0x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145 /* 1111 -> 14.5x */ -}; - -/* - * Voltage scales. Div/Mod by 1000 to get actual voltage. - * Which scale to use depends on the VRM type in use. - */ - -struct mV_pos { - unsigned short mV; - unsigned short pos; -}; - -static const struct mV_pos __initdata vrm85_mV[32] = { - {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, - {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, - {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, - {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, - {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, - {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, - {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, - {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} -}; - -static const unsigned char __initdata mV_vrm85[32] = { - 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, - 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, - 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, - 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 -}; - -static const struct mV_pos __initdata mobilevrm_mV[32] = { - {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, - {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, - {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, - {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, - {975, 15}, {950, 14}, {925, 13}, {900, 12}, - {875, 11}, {850, 10}, {825, 9}, {800, 8}, - {775, 7}, {750, 6}, {725, 5}, {700, 4}, - {675, 3}, {650, 2}, {625, 1}, {600, 0} -}; - -static const unsigned char __initdata mV_mobilevrm[32] = { - 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, - 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, - 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, - 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 -}; - diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c deleted file mode 100644 index b2689514295..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longrun.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) - -static struct cpufreq_driver longrun_driver; - -/** - * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz - * values into per cent values. In TMTA microcode, the following is valid: - * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - */ -static unsigned int longrun_low_freq, longrun_high_freq; - - -/** - * longrun_get_policy - get the current LongRun policy - * @policy: struct cpufreq_policy where current policy is written into - * - * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS - * and MSR_TMTA_LONGRUN_CTRL - */ -static void __init longrun_get_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); - if (msr_lo & 0x01) - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else - policy->policy = CPUFREQ_POLICY_POWERSAVE; - - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); - msr_lo &= 0x0000007F; - msr_hi &= 0x0000007F; - - if ( longrun_high_freq <= longrun_low_freq ) { - /* Assume degenerate Longrun table */ - policy->min = policy->max = longrun_high_freq; - } else { - policy->min = longrun_low_freq + msr_lo * - ((longrun_high_freq - longrun_low_freq) / 100); - policy->max = longrun_low_freq + msr_hi * - ((longrun_high_freq - longrun_low_freq) / 100); - } - policy->cpu = 0; -} - - -/** - * longrun_set_policy - sets a new CPUFreq policy - * @policy: new policy - * - * Sets a new CPUFreq policy on LongRun-capable processors. This function - * has to be called with cpufreq_driver locked. - */ -static int longrun_set_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - u32 pctg_lo, pctg_hi; - - if (!policy) - return -EINVAL; - - if ( longrun_high_freq <= longrun_low_freq ) { - /* Assume degenerate Longrun table */ - pctg_lo = pctg_hi = 100; - } else { - pctg_lo = (policy->min - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - pctg_hi = (policy->max - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - } - - if (pctg_hi > 100) - pctg_hi = 100; - if (pctg_lo > pctg_hi) - pctg_lo = pctg_hi; - - /* performance or economy mode */ - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - msr_lo &= 0xFFFFFFFE; - switch (policy->policy) { - case CPUFREQ_POLICY_PERFORMANCE: - msr_lo |= 0x00000001; - break; - case CPUFREQ_POLICY_POWERSAVE: - break; - } - wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - - /* lower and upper boundary */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_lo |= pctg_lo; - msr_hi |= pctg_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - return 0; -} - - -/** - * longrun_verify_poliy - verifies a new CPUFreq policy - * @policy: the policy to verify - * - * Validates a new CPUFreq policy. This function has to be called with - * cpufreq_driver locked. - */ -static int longrun_verify_policy(struct cpufreq_policy *policy) -{ - if (!policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) - return -EINVAL; - - return 0; -} - -static unsigned int longrun_get(unsigned int cpu) -{ - u32 eax, ebx, ecx, edx; - - if (cpu) - return 0; - - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - dprintk("cpuid eax is %u\n", eax); - - return (eax * 1000); -} - -/** - * longrun_determine_freqs - determines the lowest and highest possible core frequency - * @low_freq: an int to put the lowest frequency into - * @high_freq: an int to put the highest frequency into - * - * Determines the lowest and highest possible core frequencies on this CPU. - * This is necessary to calculate the performance percentage according to - * TMTA rules: - * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) - */ -static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, - unsigned int *high_freq) -{ - u32 msr_lo, msr_hi; - u32 save_lo, save_hi; - u32 eax, ebx, ecx, edx; - u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; - - if (!low_freq || !high_freq) - return -EINVAL; - - if (cpu_has(c, X86_FEATURE_LRTI)) { - /* if the LongRun Table Interface is present, the - * detection is a bit easier: - * For minimum frequency, read out the maximum - * level (msr_hi), write that into "currently - * selected level", and read out the frequency. - * For maximum frequency, read out level zero. - */ - /* minimum */ - rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); - wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *low_freq = msr_lo * 1000; /* to kHz */ - - /* maximum */ - wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *high_freq = msr_lo * 1000; /* to kHz */ - - dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - return 0; - } - - /* set the upper border to the value determined during TSC init */ - *high_freq = (cpu_khz / 1000); - *high_freq = *high_freq * 1000; - dprintk("high frequency is %u kHz\n", *high_freq); - - /* get current borders */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - save_lo = msr_lo & 0x0000007F; - save_hi = msr_hi & 0x0000007F; - - /* if current perf_pctg is larger than 90%, we need to decrease the - * upper limit to make the calculation more accurate. - */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - /* try decreasing in 10% steps, some processors react only - * on some barrier values */ - for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { - /* set to 0 to try_hi perf_pctg */ - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_hi |= try_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - /* read out current core MHz and current perf_pctg */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - - /* restore values */ - wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); - } - dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); - - /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - * eqals - * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) - * - * high_freq * perf_pctg is stored tempoarily into "ebx". - */ - ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ - - if ((ecx > 95) || (ecx == 0) || (eax < ebx)) - return -EIO; - - edx = (eax - ebx) / (100 - ecx); - *low_freq = edx * 1000; /* back to kHz */ - - dprintk("low frequency is %u kHz\n", *low_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - - return 0; -} - - -static int __init longrun_cpu_init(struct cpufreq_policy *policy) -{ - int result = 0; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* detect low and high frequency */ - result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); - if (result) - return result; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = longrun_low_freq; - policy->cpuinfo.max_freq = longrun_high_freq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - longrun_get_policy(policy); - - return 0; -} - - -static struct cpufreq_driver longrun_driver = { - .flags = CPUFREQ_CONST_LOOPS, - .verify = longrun_verify_policy, - .setpolicy = longrun_set_policy, - .get = longrun_get, - .init = longrun_cpu_init, - .name = "longrun", - .owner = THIS_MODULE, -}; - - -/** - * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver - * - * Initializes the LongRun support. - */ -static int __init longrun_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if (c->x86_vendor != X86_VENDOR_TRANSMETA || - !cpu_has(c, X86_FEATURE_LONGRUN)) - return -ENODEV; - - return cpufreq_register_driver(&longrun_driver); -} - - -/** - * longrun_exit - unregisters LongRun support - */ -static void __exit longrun_exit(void) -{ - cpufreq_unregister_driver(&longrun_driver); -} - - -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); -MODULE_LICENSE ("GPL"); - -module_init(longrun_init); -module_exit(longrun_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c deleted file mode 100644 index 4c76b511e19..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Pentium 4/Xeon CPU on demand clock modulation/speed scaling - * (C) 2002 - 2003 Dominik Brodowski - * (C) 2002 Zwane Mwaikambo - * (C) 2002 Arjan van de Ven - * (C) 2002 Tora T. Engstad - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Date Errata Description - * 20020525 N44, O17 12.5% or 25% DC causes lockup - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "speedstep-lib.h" - -#define PFX "p4-clockmod: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) - -/* - * Duty Cycle (3bits), note DC_DISABLE is not specified in - * intel docs i just use it to mean disable - */ -enum { - DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, - DC_64PT, DC_75PT, DC_88PT, DC_DISABLE -}; - -#define DC_ENTRIES 8 - - -static int has_N44_O17_errata[NR_CPUS]; -static unsigned int stock_freq; -static struct cpufreq_driver p4clockmod_driver; -static unsigned int cpufreq_p4_get(unsigned int cpu); - -static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) -{ - u32 l, h; - - if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) - return -EINVAL; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); - - if (l & 0x01) - dprintk("CPU#%d currently thermal throttled\n", cpu); - - if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) - newstate = DC_38PT; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - if (newstate == DC_DISABLE) { - dprintk("CPU#%d disabling modulation\n", cpu); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); - } else { - dprintk("CPU#%d setting duty cycle to %d%%\n", - cpu, ((125 * newstate) / 10)); - /* bits 63 - 5 : reserved - * bit 4 : enable/disable - * bits 3-1 : duty cycle - * bit 0 : reserved - */ - l = (l & ~14); - l = l | (1<<4) | ((newstate & 0x7)<<1); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); - } - - return 0; -} - - -static struct cpufreq_frequency_table p4clockmod_table[] = { - {DC_RESV, CPUFREQ_ENTRY_INVALID}, - {DC_DFLT, 0}, - {DC_25PT, 0}, - {DC_38PT, 0}, - {DC_50PT, 0}, - {DC_64PT, 0}, - {DC_75PT, 0}, - {DC_88PT, 0}, - {DC_DISABLE, 0}, - {DC_RESV, CPUFREQ_TABLE_END}, -}; - - -static int cpufreq_p4_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = DC_RESV; - struct cpufreq_freqs freqs; - int i; - - if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = cpufreq_p4_get(policy->cpu); - freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; - - if (freqs.new == freqs.old) - return 0; - - /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software - * Developer's Manual, Volume 3 - */ - for_each_cpu_mask(i, policy->cpus) - cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); - - /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -static int cpufreq_p4_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); -} - - -static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x06) { - if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " - "The acpi-cpufreq module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); - switch (c->x86_model) { - case 0x0E: /* Core */ - case 0x0F: /* Core Duo */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); - case 0x0D: /* Pentium M (Dothan) */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - /* fall through */ - case 0x09: /* Pentium M (Banias) */ - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); - } - } - - if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); - return 0; - } - - /* on P-4s, the TSC runs with constant frequency independent whether - * throttling is active or not. */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { - printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " - "The speedstep-ich or acpi cpufreq modules offer " - "voltage scaling in addition of frequency scaling. " - "You should use either one instead of p4-clockmod, " - "if possible.\n"); - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); - } - - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); -} - - - -static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - int cpuid = 0; - unsigned int i; - -#ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; -#endif - - /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; - switch (cpuid) { - case 0x0f07: - case 0x0f0a: - case 0x0f11: - case 0x0f12: - has_N44_O17_errata[policy->cpu] = 1; - dprintk("has errata -- disabling low frequencies\n"); - } - - /* get max frequency */ - stock_freq = cpufreq_p4_get_frequency(c); - if (!stock_freq) - return -EINVAL; - - /* table init */ - for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if ((i<2) && (has_N44_O17_errata[policy->cpu])) - p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else - p4clockmod_table[i].frequency = (stock_freq * i)/8; - } - cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 1000000; /* assumed */ - policy->cur = stock_freq; - - return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); -} - - -static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int cpufreq_p4_get(unsigned int cpu) -{ - u32 l, h; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - - if (l & 0x10) { - l = l >> 1; - l &= 0x7; - } else - l = DC_DISABLE; - - if (l != DC_DISABLE) - return (stock_freq * l / 8); - - return stock_freq; -} - -static struct freq_attr* p4clockmod_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver p4clockmod_driver = { - .verify = cpufreq_p4_verify, - .target = cpufreq_p4_target, - .init = cpufreq_p4_cpu_init, - .exit = cpufreq_p4_cpu_exit, - .get = cpufreq_p4_get, - .name = "p4-clockmod", - .owner = THIS_MODULE, - .attr = p4clockmod_attr, -}; - - -static int __init cpufreq_p4_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - int ret; - - /* - * THERM_CONTROL is architectural for IA32 now, so - * we can rely on the capability checks - */ - if (c->x86_vendor != X86_VENDOR_INTEL) - return -ENODEV; - - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) - return -ENODEV; - - ret = cpufreq_register_driver(&p4clockmod_driver); - if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); - - return (ret); -} - - -static void __exit cpufreq_p4_exit(void) -{ - cpufreq_unregister_driver(&p4clockmod_driver); -} - - -MODULE_AUTHOR ("Zwane Mwaikambo "); -MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); -MODULE_LICENSE ("GPL"); - -late_initcall(cpufreq_p4_init); -module_exit(cpufreq_p4_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c deleted file mode 100644 index f89524051e4..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ - -static unsigned int busfreq; /* FSB, in 10 kHz */ -static unsigned int max_multiplier; - - -/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ -static struct cpufreq_frequency_table clock_ratio[] = { - {45, /* 000 -> 4.5x */ 0}, - {50, /* 001 -> 5.0x */ 0}, - {40, /* 010 -> 4.0x */ 0}, - {55, /* 011 -> 5.5x */ 0}, - {20, /* 100 -> 2.0x */ 0}, - {30, /* 101 -> 3.0x */ 0}, - {60, /* 110 -> 6.0x */ 0}, - {35, /* 111 -> 3.5x */ 0}, - {0, CPUFREQ_TABLE_END} -}; - - -/** - * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier - * - * Returns the current setting of the frequency multiplier. Core clock - * speed is frequency of the Front-Side Bus multiplied with this value. - */ -static int powernow_k6_get_cpu_multiplier(void) -{ - u64 invalue = 0; - u32 msrval; - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - return clock_ratio[(invalue >> 5)&7].index; -} - - -/** - * powernow_k6_set_state - set the PowerNow! multiplier - * @best_i: clock_ratio[best_i] is the target multiplier - * - * Tries to change the PowerNow! multiplier - */ -static void powernow_k6_set_state (unsigned int best_i) -{ - unsigned long outvalue=0, invalue=0; - unsigned long msrval; - struct cpufreq_freqs freqs; - - if (clock_ratio[best_i].index > max_multiplier) { - printk(KERN_ERR "cpufreq: invalid target frequency\n"); - return; - } - - freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); - freqs.new = busfreq * clock_ratio[best_i].index; - freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* we now need to transform best_i to the BVC format, see AMD#23446 */ - - outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); - invalue = invalue & 0xf; - outvalue = outvalue | invalue; - outl(outvalue ,(POWERNOW_IOPORT + 0x8)); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return; -} - - -/** - * powernow_k6_verify - verifies a new CPUfreq policy - * @policy: new policy - * - * Policy must be within lowest and highest possible CPU Frequency, - * and at least one possible state must be within min and max. - */ -static int powernow_k6_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); -} - - -/** - * powernow_k6_setpolicy - sets a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * sets a new CPUFreq policy - */ -static int powernow_k6_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) - return -EINVAL; - - powernow_k6_set_state(newstate); - - return 0; -} - - -static int powernow_k6_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - /* get frequencies */ - max_multiplier = powernow_k6_get_cpu_multiplier(); - busfreq = cpu_khz / max_multiplier; - - /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { - if (clock_ratio[i].index > max_multiplier) - clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; - else - clock_ratio[i].frequency = busfreq * clock_ratio[i].index; - } - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = busfreq * max_multiplier; - - result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); - - return 0; -} - - -static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int i; - for (i=0; i<8; i++) { - if (i==max_multiplier) - powernow_k6_set_state(i); - } - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int powernow_k6_get(unsigned int cpu) -{ - return busfreq * powernow_k6_get_cpu_multiplier(); -} - -static struct freq_attr* powernow_k6_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_k6_driver = { - .verify = powernow_k6_verify, - .target = powernow_k6_target, - .init = powernow_k6_cpu_init, - .exit = powernow_k6_cpu_exit, - .get = powernow_k6_get, - .name = "powernow-k6", - .owner = THIS_MODULE, - .attr = powernow_k6_attr, -}; - - -/** - * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver - * - * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported - * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero - * on success. - */ -static int __init powernow_k6_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || - ((c->x86_model != 12) && (c->x86_model != 13))) - return -ENODEV; - - if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk("cpufreq: PowerNow IOPORT region already used.\n"); - return -EIO; - } - - if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region (POWERNOW_IOPORT, 16); - return -EINVAL; - } - - return 0; -} - - -/** - * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support - * - * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. - */ -static void __exit powernow_k6_exit(void) -{ - cpufreq_unregister_driver(&powernow_k6_driver); - release_region (POWERNOW_IOPORT, 16); -} - - -MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE ("GPL"); - -module_init(powernow_k6_init); -module_exit(powernow_k6_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c deleted file mode 100644 index ca3e1d34188..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ /dev/null @@ -1,703 +0,0 @@ -/* - * AMD K7 Powernow driver. - * (C) 2003 Dave Jones on behalf of SuSE Labs. - * (C) 2003-2004 Dave Jones - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. - * - We cli/sti on stepping A0 CPUs around the FID/VID transition. - * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. - * - We disable half multipliers if ACPI is used on A0 stepping CPUs. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -#include -#include -#endif - -#include "powernow-k7.h" - -#define PFX "powernow: " - - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags; - u16 settlingtime; - u8 reserved1; - u8 numpst; -}; - -struct pst_s { - u32 cpuid; - u8 fsbspeed; - u8 maxfid; - u8 startvid; - u8 numpstates; -}; - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -union powernow_acpi_control_t { - struct { - unsigned long fid:5, - vid:5, - sgtc:20, - res1:2; - } bits; - unsigned long val; -}; -#endif - -#ifdef CONFIG_CPU_FREQ_DEBUG -/* divide by 1000 to get VCore voltage in V. */ -static const int mobile_vid_table[32] = { - 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, - 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, - 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, - 1075, 1050, 1025, 1000, 975, 950, 925, 0, -}; -#endif - -/* divide by 10 to get FID. */ -static const int fid_codes[32] = { - 110, 115, 120, 125, 50, 55, 60, 65, - 70, 75, 80, 85, 90, 95, 100, 105, - 30, 190, 40, 200, 130, 135, 140, 210, - 150, 225, 160, 165, 170, 180, -1, -1, -}; - -/* This parameter is used in order to force ACPI instead of legacy method for - * configuration purpose. - */ - -static int acpi_force; - -static struct cpufreq_frequency_table *powernow_table; - -static unsigned int can_scale_bus; -static unsigned int can_scale_vid; -static unsigned int minimum_speed=-1; -static unsigned int maximum_speed; -static unsigned int number_scales; -static unsigned int fsb; -static unsigned int latency; -static char have_a0; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) - -static int check_fsb(unsigned int fsbspeed) -{ - int delta; - unsigned int f = fsb / 1000; - - delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; - return (delta < 5); -} - -static int check_powernow(void) -{ - struct cpuinfo_x86 *c = cpu_data; - unsigned int maxei, eax, ebx, ecx, edx; - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { -#ifdef MODULE - printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); -#endif - return 0; - } - - /* Get maximum capabilities */ - maxei = cpuid_eax (0x80000000); - if (maxei < 0x80000007) { /* Any powernow info ? */ -#ifdef MODULE - printk (KERN_INFO PFX "No powernow capabilities detected\n"); -#endif - return 0; - } - - if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); - have_a0 = 1; - } - - cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - - /* Check we can actually do something before we say anything.*/ - if (!(edx & (1 << 1 | 1 << 2))) - return 0; - - printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); - - if (edx & 1 << 1) { - printk ("frequency"); - can_scale_bus=1; - } - - if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk (" and "); - - if (edx & 1 << 2) { - printk ("voltage"); - can_scale_vid=1; - } - - printk (".\n"); - return 1; -} - - -static int get_ranges (unsigned char *pst) -{ - unsigned int j; - unsigned int speed; - u8 fid, vid; - - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); - if (!powernow_table) - return -ENOMEM; - - for (j=0 ; j < number_scales; j++) { - fid = *pst++; - - powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; - powernow_table[j].index = fid; /* lower 8 bits */ - - speed = powernow_table[j].frequency; - - if ((fid_codes[fid] % 10)==5) { -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (have_a0 == 1) - powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; -#endif - } - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - - vid = *pst++; - powernow_table[j].index |= (vid << 8); /* upper 8 bits */ - - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - } - powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; - powernow_table[number_scales].index = 0; - - return 0; -} - - -static void change_FID(int fid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.FID != fid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.FID = fid; - fidvidctl.bits.VIDC = 0; - fidvidctl.bits.FIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_VID(int vid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.VID != vid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.VID = vid; - fidvidctl.bits.FIDC = 0; - fidvidctl.bits.VIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_speed (unsigned int index) -{ - u8 fid, vid; - struct cpufreq_freqs freqs; - union msr_fidvidstatus fidvidstatus; - int cfid; - - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in powernow_decode_bios, - * vid are the upper 8 bits. - */ - - fid = powernow_table[index].index & 0xFF; - vid = (powernow_table[index].index & 0xFF00) >> 8; - - freqs.cpu = 0; - - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - freqs.old = fsb * fid_codes[cfid] / 10; - - freqs.new = powernow_table[index].frequency; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Now do the magic poking into the MSRs. */ - - if (have_a0 == 1) /* A0 errata 5 */ - local_irq_disable(); - - if (freqs.old > freqs.new) { - /* Going down, so change FID first */ - change_FID(fid); - change_VID(vid); - } else { - /* Going up, so change VID first */ - change_VID(vid); - change_FID(fid); - } - - - if (have_a0 == 1) - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -} - - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - -static struct acpi_processor_performance *acpi_processor_perf; - -static int powernow_acpi_init(void) -{ - int i; - int retval = 0; - union powernow_acpi_control_t pc; - - if (acpi_processor_perf != NULL && powernow_table != NULL) { - retval = -EINVAL; - goto err0; - } - - acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!acpi_processor_perf) { - retval = -ENOMEM; - goto err0; - } - - if (acpi_processor_register_performance(acpi_processor_perf, 0)) { - retval = -EIO; - goto err1; - } - - if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - number_scales = acpi_processor_perf->state_count; - - if (number_scales < 2) { - retval = -ENODEV; - goto err2; - } - - powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); - if (!powernow_table) { - retval = -ENOMEM; - goto err2; - } - - pc.val = (unsigned long) acpi_processor_perf->states[0].control; - for (i = 0; i < number_scales; i++) { - u8 fid, vid; - struct acpi_processor_px *state = - &acpi_processor_perf->states[i]; - unsigned int speed, speed_mhz; - - pc.val = (unsigned long) state->control; - dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", - i, - (u32) state->core_frequency, - (u32) state->power, - (u32) state->transition_latency, - (u32) state->control, - pc.bits.sgtc); - - vid = pc.bits.vid; - fid = pc.bits.fid; - - powernow_table[i].frequency = fsb * fid_codes[fid] / 10; - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - - speed = powernow_table[i].frequency; - speed_mhz = speed / 1000; - - /* processor_perflib will multiply the MHz value by 1000 to - * get a KHz value (e.g. 1266000). However, powernow-k7 works - * with true KHz values (e.g. 1266768). To ensure that all - * powernow frequencies are available, we must ensure that - * ACPI doesn't restrict them, so we round up the MHz value - * to ensure that perflib's computed KHz value is greater than - * or equal to powernow's KHz value. - */ - if (speed % 1000 > 0) - speed_mhz++; - - if ((fid_codes[fid] % 10)==5) { - if (have_a0 == 1) - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - } - - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed_mhz, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - - if (state->core_frequency != speed_mhz) { - state->core_frequency = speed_mhz; - dprintk(" Corrected ACPI frequency to %d\n", - speed_mhz); - } - - if (latency < pc.bits.sgtc) - latency = pc.bits.sgtc; - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - } - - powernow_table[i].frequency = CPUFREQ_TABLE_END; - powernow_table[i].index = 0; - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - return 0; - -err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); -err1: - kfree(acpi_processor_perf); -err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); - acpi_processor_perf = NULL; - return retval; -} -#else -static int powernow_acpi_init(void) -{ - printk(KERN_INFO PFX "no support for ACPI processor found." - " Please recompile your kernel with ACPI processor\n"); - return -EINVAL; -} -#endif - -static int powernow_decode_bios (int maxfid, int startvid) -{ - struct psb_s *psb; - struct pst_s *pst; - unsigned int i, j; - unsigned char *p; - unsigned int etuple; - unsigned int ret; - - etuple = cpuid_eax(0x80000001); - - for (i=0xC0000; i < 0xffff0 ; i+=16) { - - p = phys_to_virt(i); - - if (memcmp(p, "AMDK7PNOW!", 10) == 0){ - dprintk ("Found PSB header at %p\n", p); - psb = (struct psb_s *) p; - dprintk ("Table version: 0x%x\n", psb->tableversion); - if (psb->tableversion != 0x12) { - printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); - return -ENODEV; - } - - dprintk ("Flags: 0x%x\n", psb->flags); - if ((psb->flags & 1)==0) { - dprintk ("Mobile voltage regulator\n"); - } else { - dprintk ("Desktop voltage regulator\n"); - } - - latency = psb->settlingtime; - if (latency < 100) { - printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." - "Should be at least 100. Correcting.\n", latency); - latency = 100; - } - dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); - dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); - - p += sizeof (struct psb_s); - - pst = (struct pst_s *) p; - - for (j=0; jnumpst; j++) { - pst = (struct pst_s *) p; - number_scales = pst->numpstates; - - if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && - (maxfid==pst->maxfid) && (startvid==pst->startvid)) - { - dprintk ("PST:%d (@%p)\n", j, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", - pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); - - ret = get_ranges ((char *) pst + sizeof (struct pst_s)); - return ret; - } else { - unsigned int k; - p = (char *) pst + sizeof (struct pst_s); - for (k=0; k= 5) - m += 5; - - m /= 10; - - sgtc = 100 * m * latency; - sgtc = sgtc / 3; - if (sgtc > 0xfffff) { - printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); - sgtc = 0xfffff; - } - return sgtc; -} - -static unsigned int powernow_get(unsigned int cpu) -{ - union msr_fidvidstatus fidvidstatus; - unsigned int cfid; - - if (cpu) - return 0; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - - return (fsb * fid_codes[cfid] / 10); -} - - -static int __init acer_cpufreq_pst(struct dmi_system_id *d) -{ - printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); - printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); - return 0; -} - -/* - * Some Athlon laptops have really fucked PST tables. - * A BIOS update is all that can save them. - * Mention this, and disable cpufreq. - */ -static struct dmi_system_id __initdata powernow_dmi_table[] = { - { - .callback = acer_cpufreq_pst, - .ident = "Acer Aspire", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), - DMI_MATCH(DMI_BIOS_VERSION, "3A71"), - }, - }, - { } -}; - -static int __init powernow_cpu_init (struct cpufreq_policy *policy) -{ - union msr_fidvidstatus fidvidstatus; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - - recalibrate_cpu_khz(); - - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; - if (!fsb) { - printk(KERN_WARNING PFX "can not determine bus frequency\n"); - return -EINVAL; - } - dprintk("FSB: %3dMHz\n", fsb/1000); - - if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); - result = powernow_acpi_init(); - } else { - result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); - if (result) { - printk (KERN_INFO PFX "Trying ACPI perflib\n"); - maximum_speed = 0; - minimum_speed = -1; - latency = 0; - result = powernow_acpi_init(); - if (result) { - printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); - printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); - } - } else { - /* SGTC use the bus clock as timer */ - latency = fixup_sgtc(); - printk(KERN_INFO PFX "SGTC: %d\n", latency); - } - } - - if (result) - return result; - - printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", - minimum_speed/1000, maximum_speed/1000); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); - - policy->cur = powernow_get(0); - - cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, powernow_table); -} - -static int powernow_cpu_exit (struct cpufreq_policy *policy) { - cpufreq_frequency_table_put_attr(policy->cpu); - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); - kfree(acpi_processor_perf); - } -#endif - - kfree(powernow_table); - return 0; -} - -static struct freq_attr* powernow_table_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, -}; - -static int __init powernow_init (void) -{ - if (check_powernow()==0) - return -ENODEV; - return cpufreq_register_driver(&powernow_driver); -} - - -static void __exit powernow_exit (void) -{ - cpufreq_unregister_driver(&powernow_driver); -} - -module_param(acpi_force, int, 0444); -MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); - -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(powernow_init); -module_exit(powernow_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h deleted file mode 100644 index f8a63b3664e..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * AMD-specific information - * - */ - -union msr_fidvidctl { - struct { - unsigned FID:5, // 4:0 - reserved1:3, // 7:5 - VID:5, // 12:8 - reserved2:3, // 15:13 - FIDC:1, // 16 - VIDC:1, // 17 - reserved3:2, // 19:18 - FIDCHGRATIO:1, // 20 - reserved4:11, // 31-21 - SGTC:20, // 32:51 - reserved5:12; // 63:52 - } bits; - unsigned long long val; -}; - -union msr_fidvidstatus { - struct { - unsigned CFID:5, // 4:0 - reserved1:3, // 7:5 - SFID:5, // 12:8 - reserved2:3, // 15:13 - MFID:5, // 20:16 - reserved3:11, // 31:21 - CVID:5, // 36:32 - reserved4:3, // 39:37 - SVID:5, // 44:40 - reserved5:3, // 47:45 - MVID:5, // 52:48 - reserved6:11; // 63:53 - } bits; - unsigned long long val; -}; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c deleted file mode 100644 index 34ed53a0673..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ /dev/null @@ -1,1363 +0,0 @@ -/* - * (c) 2003-2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - * - * Support : mark.langsdorf@amd.com - * - * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones on behalf of SuSE Labs - * (C) 2004 Dominik Brodowski - * (C) 2004 Pavel Machek - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Valuable input gratefully received from Dave Jones, Pavel Machek, - * Dominik Brodowski, Jacob Shin, and others. - * Originally developed by Paul Devriendt. - * Processor information obtained from Chapter 9 (Power and Thermal Management) - * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD - * Opteron Processors" available for download from www.amd.com - * - * Tables for specific CPUs can be inferred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for current / set_cpus_allowed() */ - -#include -#include -#include - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -#include -#include -#include -#endif - -#define PFX "powernow-k8: " -#define BFX PFX "BIOS error: " -#define VERSION "version 2.00.00" -#include "powernow-k8.h" - -/* serialize freq changes */ -static DEFINE_MUTEX(fidvid_mutex); - -static struct powernow_k8_data *powernow_data[NR_CPUS]; - -static int cpu_family = CPU_OPTERON; - -#ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; -#endif - -/* Return a frequency in MHz, given an input fid */ -static u32 find_freq_from_fid(u32 fid) -{ - return 800 + (fid * 100); -} - - -/* Return a frequency in KHz, given an input fid */ -static u32 find_khz_freq_from_fid(u32 fid) -{ - return 1000 * find_freq_from_fid(fid); -} - -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) -{ - return 100 * (fid + 0x10) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; -} - -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; -} - -/* Return the vco fid for an input fid - * - * Each "low" fid has corresponding "high" fid, and you can get to "low" fids - * only from corresponding high fids. This returns "high" fid corresponding to - * "low" one. - */ -static u32 convert_fid_to_vco_fid(u32 fid) -{ - if (fid < HI_FID_TABLE_BOTTOM) - return 8 + (2 * fid); - else - return fid; -} - -/* - * Return 1 if the pending bit is set. Unless we just instructed the processor - * to transition to a new state, seeing this bit set is really bad news. - */ -static int pending_bit_stuck(void) -{ - u32 lo, hi; - - if (cpu_family == CPU_HW_PSTATE) - return 0; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; -} - -/* - * Update the global current fid / vid values from the status msr. - * Returns 1 on error. - */ -static int query_current_values_with_pending_wait(struct powernow_k8_data *data) -{ - u32 lo, hi; - u32 i = 0; - - if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - return 0; - } - do { - if (i++ > 10000) { - dprintk("detected change pending stuck\n"); - return 1; - } - rdmsr(MSR_FIDVID_STATUS, lo, hi); - } while (lo & MSR_S_LO_CHANGE_PENDING); - - data->currvid = hi & MSR_S_HI_CURRENT_VID; - data->currfid = lo & MSR_S_LO_CURRENT_FID; - - return 0; -} - -/* the isochronous relief time */ -static void count_off_irt(struct powernow_k8_data *data) -{ - udelay((1 << data->irt) * 10); - return; -} - -/* the voltage stabalization time */ -static void count_off_vst(struct powernow_k8_data *data) -{ - udelay(data->vstable * VST_UNITS_20US); - return; -} - -/* need to init the control msr to a safe value (for each cpu) */ -static void fidvid_msr_init(void) -{ - u32 lo, hi; - u8 fid, vid; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - vid = hi & MSR_S_HI_CURRENT_VID; - fid = lo & MSR_S_LO_CURRENT_FID; - lo = fid | (vid << MSR_C_LO_VID_SHIFT); - hi = MSR_C_HI_STP_GNT_BENIGN; - dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); - wrmsr(MSR_FIDVID_CTL, lo, hi); -} - - -/* write the new fid value along with the other control fields to the msr */ -static int write_new_fid(struct powernow_k8_data *data, u32 fid) -{ - u32 lo; - u32 savevid = data->currvid; - u32 i = 0; - - if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on fid write\n"); - return 1; - } - - lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; - - dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", - fid, lo, data->plllock * PLL_LOCK_CONVERSION); - - do { - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - if (i++ > 100) { - printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - count_off_irt(data); - - if (savevid != data->currvid) { - printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", - savevid, data->currvid); - return 1; - } - - if (fid != data->currfid) { - printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, - data->currfid); - return 1; - } - - return 0; -} - -/* Write a new vid to the hardware */ -static int write_new_vid(struct powernow_k8_data *data, u32 vid) -{ - u32 lo; - u32 savefid = data->currfid; - int i = 0; - - if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on vid write\n"); - return 1; - } - - lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; - - dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", - vid, lo, STOP_GRANT_5NS); - - do { - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (vid != data->currvid) { - printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, - data->currvid); - return 1; - } - - return 0; -} - -/* - * Reduce the vid by the max of step or reqvid. - * Decreasing vid codes represent increasing voltages: - * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. - */ -static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) -{ - if ((data->currvid - reqvid) > step) - reqvid = data->currvid - step; - - if (write_new_vid(data, reqvid)) - return 1; - - count_off_vst(data); - - return 0; -} - -/* Change hardware pstate by single MSR write */ -static int transition_pstate(struct powernow_k8_data *data, u32 pstate) -{ - wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); - return 0; -} - -/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ -static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) -{ - if (core_voltage_pre_transition(data, reqvid)) - return 1; - - if (core_frequency_transition(data, reqfid)) - return 1; - - if (core_voltage_post_transition(data, reqvid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((reqfid != data->currfid) || (reqvid != data->currvid)) { - printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", - smp_processor_id(), - reqfid, reqvid, data->currfid, data->currvid); - return 1; - } - - dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", - smp_processor_id(), data->currfid, data->currvid); - - return 0; -} - -/* Phase 1 - core voltage transition ... setup voltage */ -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) -{ - u32 rvosteps = data->rvo; - u32 savefid = data->currfid; - u32 maxvid, lo; - - dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqvid, data->rvo); - - rdmsr(MSR_FIDVID_STATUS, lo, maxvid); - maxvid = 0x1f & (maxvid >> 16); - dprintk("ph1 maxvid=0x%x\n", maxvid); - if (reqvid < maxvid) /* lower numbers are higher voltages */ - reqvid = maxvid; - - while (data->currvid > reqvid) { - dprintk("ph1: curr 0x%x, req vid 0x%x\n", - data->currvid, reqvid); - if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) - return 1; - } - - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { - if (data->currvid == maxvid) { - rvosteps = 0; - } else { - dprintk("ph1: changing vid for rvo, req 0x%x\n", - data->currvid - 1); - if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) - return 1; - rvosteps--; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); - return 1; - } - - dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 2 - core frequency transition */ -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) -{ - u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; - - if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", - reqfid, data->currfid); - return 1; - } - - if (data->currfid == reqfid) { - printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); - return 0; - } - - dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqfid); - - vcoreqfid = convert_fid_to_vco_fid(reqfid); - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - - while (vcofiddiff > 2) { - (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); - - if (reqfid > data->currfid) { - if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + fid_interval)) { - return 1; - } - } else { - if (write_new_fid - (data, 2 + convert_fid_to_vco_fid(data->currfid))) { - return 1; - } - } - } else { - if (write_new_fid(data, data->currfid - fid_interval)) - return 1; - } - - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - } - - if (write_new_fid(data, reqfid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (data->currfid != reqfid) { - printk(KERN_ERR PFX - "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", - data->currfid, reqfid); - return 1; - } - - if (savevid != data->currvid) { - printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", - savevid, data->currvid); - return 1; - } - - dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 3 - core voltage transition flow ... jump to the final vid. */ -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) -{ - u32 savefid = data->currfid; - u32 savereqvid = reqvid; - - dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid); - - if (reqvid != data->currvid) { - if (write_new_vid(data, reqvid)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX - "ph3: bad fid change, save 0x%x, curr 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (data->currvid != reqvid) { - printk(KERN_ERR PFX - "ph3: failed vid transition\n, req 0x%x, curr 0x%x", - reqvid, data->currvid); - return 1; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savereqvid != data->currvid) { - dprintk("ph3 failed, currvid 0x%x\n", data->currvid); - return 1; - } - - if (savefid != data->currfid) { - dprintk("ph3 failed, currfid changed 0x%x\n", - data->currfid); - return 1; - } - - dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -static int check_supported_cpu(unsigned int cpu) -{ - cpumask_t oldmask = CPU_MASK_ALL; - u32 eax, ebx, ecx, edx; - unsigned int rc = 0; - - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); - goto out; - } - - if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) - goto out; - - eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && - ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) - goto out; - - if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { - if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { - printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); - goto out; - } - - eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); - if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { - printk(KERN_INFO PFX - "No frequency change capabilities detected\n"); - goto out; - } - - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX "Power state transitions not supported\n"); - goto out; - } - } else { /* must be a HW Pstate capable processor */ - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) - cpu_family = CPU_HW_PSTATE; - else - goto out; - } - - rc = 1; - -out: - set_cpus_allowed(current, oldmask); - return rc; -} - -static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) -{ - unsigned int j; - u8 lastfid = 0xff; - - for (j = 0; j < data->numps; j++) { - if (pst[j].vid > LEAST_VID) { - printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); - return -EINVAL; - } - if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ - printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ - printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (pst[j].fid > MAX_FID) { - printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { - /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); - return -EINVAL; - } - if (pst[j].fid < lastfid) - lastfid = pst[j].fid; - } - if (lastfid & 1) { - printk(KERN_ERR BFX "lastfid invalid\n"); - return -EINVAL; - } - if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO BFX "first fid not from lo freq table\n"); - - return 0; -} - -static void print_basics(struct powernow_k8_data *data) -{ - int j; - for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { - if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", - j, - (data->powernow_table[j].index & 0xff00) >> 8, - (data->powernow_table[j].index & 0xff0000) >> 16, - data->powernow_table[j].frequency/1000); - } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", - j, - data->powernow_table[j].index & 0xff, - data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8); - } - } - } - if (data->batps) - printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); -} - -static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) -{ - struct cpufreq_frequency_table *powernow_table; - unsigned int j; - - if (data->batps) { /* use ACPI support to get full speed on mains power */ - printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); - data->numps = data->batps; - } - - for ( j=1; jnumps; j++ ) { - if (pst[j-1].fid >= pst[j].fid) { - printk(KERN_ERR PFX "PST out of sequence\n"); - return -EINVAL; - } - } - - if (data->numps < 2) { - printk(KERN_ERR PFX "no p states to transition\n"); - return -ENODEV; - } - - if (check_pst_table(data, pst, maxvid)) - return -EINVAL; - - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->numps + 1)), GFP_KERNEL); - if (!powernow_table) { - printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); - return -ENOMEM; - } - - for (j = 0; j < data->numps; j++) { - powernow_table[j].index = pst[j].fid; /* lower 8 bits */ - powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ - powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); - } - powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; - powernow_table[data->numps].index = 0; - - if (query_current_values_with_pending_wait(data)) { - kfree(powernow_table); - return -EIO; - } - - dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); - data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) - print_basics(data); - - for (j = 0; j < data->numps; j++) - if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) - return 0; - - dprintk("currfid/vid do not match PST, ignoring\n"); - return 0; -} - -/* Find and validate the PSB/PST table in BIOS. */ -static int find_psb_table(struct powernow_k8_data *data) -{ - struct psb_s *psb; - unsigned int i; - u32 mvs; - u8 maxvid; - u32 cpst = 0; - u32 thiscpuid; - - for (i = 0xc0000; i < 0xffff0; i += 0x10) { - /* Scan BIOS looking for the signature. */ - /* It can not be at ffff0 - it is too big. */ - - psb = phys_to_virt(i); - if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) - continue; - - dprintk("found PSB header at 0x%p\n", psb); - - dprintk("table vers: 0x%x\n", psb->tableversion); - if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_ERR BFX "PSB table is not v1.4\n"); - return -ENODEV; - } - - dprintk("flags: 0x%x\n", psb->flags1); - if (psb->flags1) { - printk(KERN_ERR BFX "unknown flags\n"); - return -ENODEV; - } - - data->vstable = psb->vstable; - dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); - - dprintk("flags2: 0x%x\n", psb->flags2); - data->rvo = psb->flags2 & 3; - data->irt = ((psb->flags2) >> 2) & 3; - mvs = ((psb->flags2) >> 4) & 3; - data->vidmvs = 1 << mvs; - data->batps = ((psb->flags2) >> 6) & 3; - - dprintk("ramp voltage offset: %d\n", data->rvo); - dprintk("isochronous relief time: %d\n", data->irt); - dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); - - dprintk("numpst: 0x%x\n", psb->num_tables); - cpst = psb->num_tables; - if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ - thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { - cpst = 1; - } - } - if (cpst != 1) { - printk(KERN_ERR BFX "numpst must be 1\n"); - return -ENODEV; - } - - data->plllock = psb->plllocktime; - dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); - dprintk("maxfid: 0x%x\n", psb->maxfid); - dprintk("maxvid: 0x%x\n", psb->maxvid); - maxvid = psb->maxvid; - - data->numps = psb->numps; - dprintk("numpstates: 0x%x\n", data->numps); - return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); - } - /* - * If you see this message, complain to BIOS manufacturer. If - * he tells you "we do not support Linux" or some similar - * nonsense, remember that Windows 2000 uses the same legacy - * mechanism that the old Linux PSB driver uses. Tell them it - * is broken with Windows 2000. - * - * The reference to the AMD documentation is chapter 9 in the - * BIOS and Kernel Developer's Guide, which is available on - * www.amd.com - */ - printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); - return -ENODEV; -} - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) -{ - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) - return; - - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; -} - -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) -{ - struct cpufreq_frequency_table *powernow_table; - int ret_val; - - if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed: bad ACPI data\n"); - return -EIO; - } - - /* verify the data contained in the ACPI structures */ - if (data->acpi_data.state_count <= 1) { - dprintk("No ACPI P-States\n"); - goto err_out; - } - - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); - goto err_out; - } - - /* fill in data->powernow_table */ - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data.state_count + 1)), GFP_KERNEL); - if (!powernow_table) { - dprintk("powernow_table memory alloc failure\n"); - goto err_out; - } - - if (cpu_family == CPU_HW_PSTATE) - ret_val = fill_powernow_table_pstate(data, powernow_table); - else - ret_val = fill_powernow_table_fidvid(data, powernow_table); - if (ret_val) - goto err_out_mem; - - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; - data->powernow_table = powernow_table; - - /* fill in data */ - data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) - print_basics(data); - powernow_k8_acpi_pst_values(data, 0); - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - return 0; - -err_out_mem: - kfree(powernow_table); - -err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data.state_count = 0; - - return -ENODEV; -} - -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) -{ - int i; - - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 index; - u32 hi = 0, lo = 0; - u32 fid; - u32 did; - - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { - printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - } - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - } - return 0; -} - -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) -{ - int i; - int cntlofreq = 0; - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 fid; - u32 vid; - - if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; - } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; - } - - dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); - - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - powernow_table[i].frequency = find_khz_freq_from_fid(fid); - - /* verify frequency is OK */ - if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || - (powernow_table[i].frequency < (MIN_FREQ * 1000))) { - dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ - if (vid == VID_OFF) { - dprintk("invalid vid %u, ignoring\n", vid); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - /* verify only 1 entry from the lo frequency table */ - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, ignore this one ... */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); - return 1; - } - - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } else - cntlofreq = i; - } - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - } - return 0; -} - -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) -{ - if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); -} - -#else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } -#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ - -/* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) -{ - u32 fid = 0; - u32 vid = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* fid/vid correctness check for k8 */ - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid - * are the upper 8 bits. - */ - fid = data->powernow_table[index].index & 0xFF; - vid = (data->powernow_table[index].index & 0xFF00) >> 8; - - dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((data->currvid == vid) && (data->currfid == fid)) { - dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", - fid, vid); - return 0; - } - - if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX - "ignoring illegal change in lo freq table-%x to 0x%x\n", - data->currfid, fid); - return 1; - } - - dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", - smp_processor_id(), fid, vid); - freqs.old = find_khz_freq_from_fid(data->currfid); - freqs.new = find_khz_freq_from_fid(fid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_fid_vid(data, fid, vid); - freqs.new = find_khz_freq_from_fid(data->currfid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) -{ - u32 fid = 0; - u32 did = 0; - u32 pstate = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* get fid did for hardware pstate transition */ - pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) - return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) -{ - cpumask_t oldmask = CPU_MASK_ALL; - struct powernow_k8_data *data = powernow_data[pol->cpu]; - u32 checkfid; - u32 checkvid; - unsigned int newstate; - int ret = -EIO; - - if (!data) - return -EINVAL; - - checkfid = data->currfid; - checkvid = data->currvid; - - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing targ, change pending bit set\n"); - goto err_out; - } - - dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", - pol->cpu, targfreq, pol->min, pol->max, relation); - - if (query_current_values_with_pending_wait(data)) - goto err_out; - - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else { - dprintk("targ: curr fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); - } - } - - if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) - goto err_out; - - mutex_lock(&fidvid_mutex); - - powernow_k8_acpi_pst_values(data, newstate); - - if (cpu_family == CPU_HW_PSTATE) - ret = transition_frequency_pstate(data, newstate); - else - ret = transition_frequency_fidvid(data, newstate); - if (ret) { - printk(KERN_ERR PFX "transition frequency failed\n"); - ret = 1; - mutex_unlock(&fidvid_mutex); - goto err_out; - } - mutex_unlock(&fidvid_mutex); - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - ret = 0; - -err_out: - set_cpus_allowed(current, oldmask); - return ret; -} - -/* Driver entry point to verify the policy and range of frequencies */ -static int powernowk8_verify(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = powernow_data[pol->cpu]; - - if (!data) - return -EINVAL; - - return cpufreq_frequency_table_verify(pol, data->powernow_table); -} - -/* per CPU init entry point to the driver */ -static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data; - cpumask_t oldmask = CPU_MASK_ALL; - int rc; - - if (!cpu_online(pol->cpu)) - return -ENODEV; - - if (!check_supported_cpu(pol->cpu)) - return -ENODEV; - - data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); - if (!data) { - printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); - return -ENOMEM; - } - - data->cpu = pol->cpu; - - if (powernow_k8_cpu_init_acpi(data)) { - /* - * Use the PSB BIOS structure. This is only availabe on - * an UP version, and is deprecated by AMD. - */ - if (num_online_cpus() != 1) { - printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); - kfree(data); - return -ENODEV; - } - if (pol->cpu != 0) { - printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); - kfree(data); - return -ENODEV; - } - rc = find_psb_table(data); - if (rc) { - kfree(data); - return -ENODEV; - } - } - - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out; - } - - if (query_current_values_with_pending_wait(data)) - goto err_out; - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - /* run on any CPU again */ - set_cpus_allowed(current, oldmask); - - pol->governor = CPUFREQ_DEFAULT_GOVERNOR; - if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); - else - pol->cpus = cpu_core_map[pol->cpu]; - data->available_cores = &(pol->cpus); - - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - dprintk("policy current frequency %d kHz\n", pol->cur); - - /* min/max the cpu is capable of */ - if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { - printk(KERN_ERR PFX "invalid powernow_table\n"); - powernow_k8_cpu_exit_acpi(data); - kfree(data->powernow_table); - kfree(data); - return -EINVAL; - } - - cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - - if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else - dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - powernow_data[pol->cpu] = data; - - return 0; - -err_out: - set_cpus_allowed(current, oldmask); - powernow_k8_cpu_exit_acpi(data); - - kfree(data); - return -ENODEV; -} - -static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = powernow_data[pol->cpu]; - - if (!data) - return -EINVAL; - - powernow_k8_cpu_exit_acpi(data); - - cpufreq_frequency_table_put_attr(pol->cpu); - - kfree(data->powernow_table); - kfree(data); - - return 0; -} - -static unsigned int powernowk8_get (unsigned int cpu) -{ - struct powernow_k8_data *data; - cpumask_t oldmask = current->cpus_allowed; - unsigned int khz = 0; - - data = powernow_data[first_cpu(cpu_core_map[cpu])]; - - if (!data) - return -EINVAL; - - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed(current, oldmask); - return 0; - } - - if (query_current_values_with_pending_wait(data)) - goto out; - - if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - khz = find_khz_freq_from_fid(data->currfid); - - -out: - set_cpus_allowed(current, oldmask); - return khz; -} - -static struct freq_attr* powernow_k8_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, -}; - -/* driver entry point for init */ -static int __cpuinit powernowk8_init(void) -{ - unsigned int i, supported_cpus = 0; - unsigned int booted_cores = 1; - - for_each_online_cpu(i) { - if (check_supported_cpu(i)) - supported_cpus++; - } - -#ifdef CONFIG_SMP - booted_cores = cpu_data[0].booted_cores; -#endif - - if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d %s " - "processors (%d cpu cores) (" VERSION ")\n", - supported_cpus/booted_cores, - boot_cpu_data.x86_model_id, supported_cpus); - return cpufreq_register_driver(&cpufreq_amd64_driver); - } - - return -ENODEV; -} - -/* driver entry point for term */ -static void __exit powernowk8_exit(void) -{ - dprintk("exit\n"); - - cpufreq_unregister_driver(&cpufreq_amd64_driver); -} - -MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); -MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); -MODULE_LICENSE("GPL"); - -late_initcall(powernowk8_init); -module_exit(powernowk8_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h deleted file mode 100644 index b06c812208c..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * (c) 2003-2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - */ - -struct powernow_k8_data { - unsigned int cpu; - - u32 numps; /* number of p-states */ - u32 batps; /* number of p-states supported on battery */ - - /* these values are constant when the PSB is used to determine - * vid/fid pairings, but are modified during the ->target() call - * when ACPI is used */ - u32 rvo; /* ramp voltage offset */ - u32 irt; /* isochronous relief time */ - u32 vidmvs; /* usable value calculated from mvs */ - u32 vstable; /* voltage stabilization time, units 20 us */ - u32 plllock; /* pll lock time, units 1 us */ - u32 exttype; /* extended interface = 1 */ - - /* keep track of the current fid / vid or did */ - u32 currvid, currfid, currdid; - - /* the powernow_table includes all frequency and vid/fid pairings: - * fid are the lower 8 bits of the index, vid are the upper 8 bits. - * frequency is in kHz */ - struct cpufreq_frequency_table *powernow_table; - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI - /* the acpi table needs to be kept. it's only available if ACPI was - * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance acpi_data; -#endif - /* we need to keep track of associated cores, but let cpufreq - * handle hotplug events - so just point at cpufreq pol->cpus - * structure */ - cpumask_t *available_cores; -}; - - -/* processor's cpuid instruction support */ -#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ -#define CPUID_XFAM 0x0ff00000 /* extended family */ -#define CPUID_XFAM_K8 0 -#define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_MASK 0x00080000 -#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ -#define CPUID_USE_XFAM_XMOD 0x00000f00 -#define CPUID_GET_MAX_CAPABILITIES 0x80000000 -#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 -#define P_STATE_TRANSITION_CAPABLE 6 - -/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ -/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ -/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ -/* the register number is placed in ecx, and the data is returned in edx:eax. */ - -#define MSR_FIDVID_CTL 0xc0010041 -#define MSR_FIDVID_STATUS 0xc0010042 - -/* Field definitions within the FID VID Low Control MSR : */ -#define MSR_C_LO_INIT_FID_VID 0x00010000 -#define MSR_C_LO_NEW_VID 0x00003f00 -#define MSR_C_LO_NEW_FID 0x0000003f -#define MSR_C_LO_VID_SHIFT 8 - -/* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff - -/* Field definitions within the FID VID Low Status MSR : */ -#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ -#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 -#define MSR_S_LO_MAX_FID 0x003f0000 -#define MSR_S_LO_START_FID 0x00003f00 -#define MSR_S_LO_CURRENT_FID 0x0000003f - -/* Field definitions within the FID VID High Status MSR : */ -#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 -#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 -#define MSR_S_HI_START_VID 0x00003f00 -#define MSR_S_HI_CURRENT_VID 0x0000003f -#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 - - -/* Hardware Pstate _PSS and MSR definitions */ -#define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_FID_MASK 0x0000003f -#define HW_PSTATE_DID_MASK 0x000001c0 -#define HW_PSTATE_DID_SHIFT 6 -#define HW_PSTATE_MASK 0x00000007 -#define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_FID_INDEX_SHIFT 8 -#define HW_FID_INDEX_MASK 0x0000ff00 -#define HW_DID_INDEX_SHIFT 16 -#define HW_DID_INDEX_MASK 0x00ff0000 -#define HW_WATTS_MASK 0xff -#define HW_PWR_DVR_MASK 0x300 -#define HW_PWR_DVR_SHIFT 8 -#define HW_PWR_MAX_MULT 3 -#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ -#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ -#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ -#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ - -/* define the two driver architectures */ -#define CPU_OPTERON 0 -#define CPU_HW_PSTATE 1 - - -/* - * There are restrictions frequencies have to follow: - * - only 1 entry in the low fid table ( <=1.4GHz ) - * - lowest entry in the high fid table must be >= 2 * the entry in the - * low fid table - * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry - * in the low fid table - * - the parts can only step at <= 200 MHz intervals, odd fid values are - * supported in revision G and later revisions. - * - lowest frequency must be >= interprocessor hypertransport link speed - * (only applies to MP systems obviously) - */ - -/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ -#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ - -#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ -#define HI_VCOFREQ_TABLE_BOTTOM 1600 - -#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ - -#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ -#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ - -#define MIN_FREQ 800 /* Min and max freqs, per spec */ -#define MAX_FREQ 5000 - -#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ -#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ - -#define VID_OFF 0x3f - -#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ - -#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ - -#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ -#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ - -/* - * Most values of interest are enocoded in a single field of the _PSS - * entries: the "control" value. - */ - -#define IRT_SHIFT 30 -#define RVO_SHIFT 28 -#define EXT_TYPE_SHIFT 27 -#define PLL_L_SHIFT 20 -#define MVS_SHIFT 18 -#define VST_SHIFT 11 -#define VID_SHIFT 6 -#define IRT_MASK 3 -#define RVO_MASK 3 -#define EXT_TYPE_MASK 1 -#define PLL_L_MASK 0x7f -#define MVS_MASK 3 -#define VST_MASK 0x7f -#define VID_MASK 0x1f -#define FID_MASK 0x1f -#define EXT_VID_MASK 0x3f -#define EXT_FID_MASK 0x3f - - -/* - * Version 1.4 of the PSB table. This table is constructed by BIOS and is - * to tell the OS's power management driver which VIDs and FIDs are - * supported by this particular processor. - * If the data in the PSB / PST is wrong, then this driver will program the - * wrong values into hardware, which is very likely to lead to a crash. - */ - -#define PSB_ID_STRING "AMDK7PNOW!" -#define PSB_ID_STRING_LEN 10 - -#define PSB_VERSION_1_4 0x14 - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags1; - u16 vstable; - u8 flags2; - u8 num_tables; - u32 cpuid; - u8 plllocktime; - u8 maxfid; - u8 maxvid; - u8 numps; -}; - -/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ -struct pst_s { - u8 fid; - u8 vid; -}; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) - -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); - -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -#endif - -#ifdef CONFIG_SMP -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ -} -#else -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ - cpu_set(0, cpu_sharedcore_mask[0]); -} -#endif diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c deleted file mode 100644 index b8fb4b521c6..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * sc520_freq.c: cpufreq driver for the AMD Elan sc520 - * - * Copyright (C) 2005 Sean Young - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Based on elanfreq.c - * - * 2005-03-30: - initial revision - */ - -#include -#include -#include - -#include -#include - -#include -#include -#include - -#define MMCR_BASE 0xfffef000 /* The default base address */ -#define OFFS_CPUCTL 0x2 /* CPU Control Register */ - -static __u8 __iomem *cpuctl; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) - -static struct cpufreq_frequency_table sc520_freq_table[] = { - {0x01, 100000}, - {0x02, 133000}, - {0, CPUFREQ_TABLE_END}, -}; - -static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg = *cpuctl; - - switch (clockspeed_reg & 0x03) { - default: - printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); - case 0x01: - return 100000; - case 0x02: - return 133000; - } -} - -static void sc520_freq_set_cpu_state (unsigned int state) -{ - - struct cpufreq_freqs freqs; - u8 clockspeed_reg; - - freqs.old = sc520_freq_get_cpu_frequency(0); - freqs.new = sc520_freq_table[state].frequency; - freqs.cpu = 0; /* AMD Elan is UP */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk("attempting to set frequency to %i kHz\n", - sc520_freq_table[state].frequency); - - local_irq_disable(); - - clockspeed_reg = *cpuctl & ~0x03; - *cpuctl = clockspeed_reg | sc520_freq_table[state].index; - - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - -static int sc520_freq_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); -} - -static int sc520_freq_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) - return -EINVAL; - - sc520_freq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int sc520_freq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - int result; - - /* capability check */ - if (c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) - return -ENODEV; - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 1000000; /* 1ms */ - policy->cur = sc520_freq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); - - return 0; -} - - -static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -static struct freq_attr* sc520_freq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver sc520_freq_driver = { - .get = sc520_freq_get_cpu_frequency, - .verify = sc520_freq_verify, - .target = sc520_freq_target, - .init = sc520_freq_cpu_init, - .exit = sc520_freq_cpu_exit, - .name = "sc520_freq", - .owner = THIS_MODULE, - .attr = sc520_freq_attr, -}; - - -static int __init sc520_freq_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - int err; - - /* Test if we have the right hardware */ - if(c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) { - dprintk("no Elan SC520 processor found!\n"); - return -ENODEV; - } - cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); - if(!cpuctl) { - printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); - return -ENOMEM; - } - - err = cpufreq_register_driver(&sc520_freq_driver); - if (err) - iounmap(cpuctl); - - return err; -} - - -static void __exit sc520_freq_exit(void) -{ - cpufreq_unregister_driver(&sc520_freq_driver); - iounmap(cpuctl); -} - - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sean Young "); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); - -module_init(sc520_freq_init); -module_exit(sc520_freq_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c deleted file mode 100644 index 6c5dc2c85ae..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ /dev/null @@ -1,634 +0,0 @@ -/* - * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium - * M (part of the Centrino chipset). - * - * Since the original Pentium M, most new Intel CPUs support Enhanced - * SpeedStep. - * - * Despite the "SpeedStep" in the name, this is almost entirely unlike - * traditional SpeedStep. - * - * Modelled on speedstep.c - * - * Copyright (C) 2003 Jeremy Fitzhardinge - */ - -#include -#include -#include -#include -#include /* current */ -#include -#include - -#include -#include -#include - -#define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) - -#define INTEL_MSR_RANGE (0xffff) - -struct cpu_id -{ - __u8 x86; /* CPU family */ - __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ -}; - -enum { - CPU_BANIAS, - CPU_DOTHAN_A1, - CPU_DOTHAN_A2, - CPU_DOTHAN_B0, - CPU_MP4HT_D0, - CPU_MP4HT_E0, -}; - -static const struct cpu_id cpu_ids[] = { - [CPU_BANIAS] = { 6, 9, 5 }, - [CPU_DOTHAN_A1] = { 6, 13, 1 }, - [CPU_DOTHAN_A2] = { 6, 13, 2 }, - [CPU_DOTHAN_B0] = { 6, 13, 6 }, - [CPU_MP4HT_D0] = {15, 3, 4 }, - [CPU_MP4HT_E0] = {15, 4, 1 }, -}; -#define N_IDS ARRAY_SIZE(cpu_ids) - -struct cpu_model -{ - const struct cpu_id *cpu_id; - const char *model_name; - unsigned max_freq; /* max clock in kHz */ - - struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ -}; -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); - -/* Operating points for current CPU */ -static struct cpu_model *centrino_model[NR_CPUS]; -static const struct cpu_id *centrino_cpu[NR_CPUS]; - -static struct cpufreq_driver centrino_driver; - -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE - -/* Computes the correct form for IA32_PERF_CTL MSR for a particular - frequency/voltage operating point; frequency in MHz, volts in mV. - This is stored as "index" in the structure. */ -#define OP(mhz, mv) \ - { \ - .frequency = (mhz) * 1000, \ - .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ - } - -/* - * These voltage tables were derived from the Intel Pentium M - * datasheet, document 25261202.pdf, Table 5. I have verified they - * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium - * M. - */ - -/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ -static struct cpufreq_frequency_table banias_900[] = -{ - OP(600, 844), - OP(800, 988), - OP(900, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ -static struct cpufreq_frequency_table banias_1000[] = -{ - OP(600, 844), - OP(800, 972), - OP(900, 988), - OP(1000, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ -static struct cpufreq_frequency_table banias_1100[] = -{ - OP( 600, 956), - OP( 800, 1020), - OP( 900, 1100), - OP(1000, 1164), - OP(1100, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - - -/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ -static struct cpufreq_frequency_table banias_1200[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP( 900, 1020), - OP(1000, 1100), - OP(1100, 1164), - OP(1200, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.30GHz (Banias) */ -static struct cpufreq_frequency_table banias_1300[] = -{ - OP( 600, 956), - OP( 800, 1260), - OP(1000, 1292), - OP(1200, 1356), - OP(1300, 1388), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.40GHz (Banias) */ -static struct cpufreq_frequency_table banias_1400[] = -{ - OP( 600, 956), - OP( 800, 1180), - OP(1000, 1308), - OP(1200, 1436), - OP(1400, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.50GHz (Banias) */ -static struct cpufreq_frequency_table banias_1500[] = -{ - OP( 600, 956), - OP( 800, 1116), - OP(1000, 1228), - OP(1200, 1356), - OP(1400, 1452), - OP(1500, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.60GHz (Banias) */ -static struct cpufreq_frequency_table banias_1600[] = -{ - OP( 600, 956), - OP( 800, 1036), - OP(1000, 1164), - OP(1200, 1276), - OP(1400, 1420), - OP(1600, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.70GHz (Banias) */ -static struct cpufreq_frequency_table banias_1700[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP(1000, 1116), - OP(1200, 1228), - OP(1400, 1308), - OP(1700, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; -#undef OP - -#define _BANIAS(cpuid, max, name) \ -{ .cpu_id = cpuid, \ - .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ - .max_freq = (max)*1000, \ - .op_points = banias_##max, \ -} -#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) - -/* CPU models, their operating frequency range, and freq/voltage - operating points */ -static struct cpu_model models[] = -{ - _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), - BANIAS(1000), - BANIAS(1100), - BANIAS(1200), - BANIAS(1300), - BANIAS(1400), - BANIAS(1500), - BANIAS(1600), - BANIAS(1700), - - /* NULL model_name is a wildcard */ - { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, - - { NULL, } -}; -#undef _BANIAS -#undef BANIAS - -static int centrino_cpu_init_table(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; - struct cpu_model *model; - - for(model = models; model->cpu_id != NULL; model++) - if (centrino_verify_cpu_id(cpu, model->cpu_id) && - (model->model_name == NULL || - strcmp(cpu->x86_model_id, model->model_name) == 0)) - break; - - if (model->cpu_id == NULL) { - /* No match at all */ - dprintk("no support for CPU model \"%s\": " - "send /proc/cpuinfo to " MAINTAINER "\n", - cpu->x86_model_id); - return -ENOENT; - } - - if (model->op_points == NULL) { - /* Matched a non-match */ - dprintk("no table support for CPU model \"%s\"\n", - cpu->x86_model_id); - dprintk("try using the acpi-cpufreq driver\n"); - return -ENOENT; - } - - centrino_model[policy->cpu] = model; - - dprintk("found \"%s\": max frequency: %dkHz\n", - model->model_name, model->max_freq); - - return 0; -} - -#else -static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } -#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ - -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) -{ - if ((c->x86 == x->x86) && - (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) - return 1; - return 0; -} - -/* To be called only after centrino_model is initialized */ -static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) -{ - int i; - - /* - * Extract clock in kHz from PERF_CTL value - * for centrino, as some DSDTs are buggy. - * Ideally, this can be done using the acpi_data structure. - */ - if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { - msr = (msr >> 8) & 0xff; - return msr * 100000; - } - - if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) - return 0; - - msr &= 0xffff; - for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == centrino_model[cpu]->op_points[i].index) - return centrino_model[cpu]->op_points[i].frequency; - } - if (failsafe) - return centrino_model[cpu]->op_points[i-1].frequency; - else - return 0; -} - -/* Return the current CPU frequency in kHz */ -static unsigned int get_cur_freq(unsigned int cpu) -{ - unsigned l, h; - unsigned clock_freq; - cpumask_t saved_mask; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - return 0; - - rdmsr(MSR_IA32_PERF_STATUS, l, h); - clock_freq = extract_clock(l, cpu, 0); - - if (unlikely(clock_freq == 0)) { - /* - * On some CPUs, we can see transient MSR values (which are - * not present in _PSS), while CPU is doing some automatic - * P-state transition (like TM2). Get the last freq set - * in PERF_CTL. - */ - rdmsr(MSR_IA32_PERF_CTL, l, h); - clock_freq = extract_clock(l, cpu, 1); - } - - set_cpus_allowed(current, saved_mask); - return clock_freq; -} - - -static int centrino_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; - unsigned freq; - unsigned l, h; - int ret; - int i; - - /* Only Intel makes Enhanced Speedstep-capable CPUs */ - if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) - centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (policy->cpu != 0) - return -ENODEV; - - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; - - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; - - if (!centrino_cpu[policy->cpu]) { - dprintk("found unsupported CPU with " - "Enhanced SpeedStep: send /proc/cpuinfo to " - MAINTAINER "\n"); - return -ENODEV; - } - - if (centrino_cpu_init_table(policy)) { - return -ENODEV; - } - - /* Check to see if Enhanced SpeedStep is enabled, and try to - enable it if not. */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - - if (!(l & (1<<16))) { - l |= (1<<16); - dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); - wrmsr(MSR_IA32_MISC_ENABLE, l, h); - - /* check to see if it stuck */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { - printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); - return -ENODEV; - } - } - - freq = get_cur_freq(policy->cpu); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ - policy->cur = freq; - - dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); - - ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); - if (ret) - return (ret); - - cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); - - return 0; -} - -static int centrino_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - - if (!centrino_model[cpu]) - return -ENODEV; - - cpufreq_frequency_table_put_attr(cpu); - - centrino_model[cpu] = NULL; - - return 0; -} - -/** - * centrino_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within this model's frequency range at least one - * border included. - */ -static int centrino_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); -} - -/** - * centrino_setpolicy - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int centrino_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; - struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; - cpumask_t saved_mask; - cpumask_t set_mask; - cpumask_t covered_cpus; - int retval = 0; - unsigned int j, k, first_cpu, tmp; - - if (unlikely(centrino_model[cpu] == NULL)) - return -ENODEV; - - if (unlikely(cpufreq_frequency_table_target(policy, - centrino_model[cpu]->op_points, - target_freq, - relation, - &newstate))) { - return -EINVAL; - } - -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); -#else - online_policy_cpus = policy->cpus; -#endif - - saved_mask = current->cpus_allowed; - first_cpu = 1; - cpus_clear(covered_cpus); - for_each_cpu_mask(j, online_policy_cpus) { - /* - * Support for SMP systems. - * Make sure we are running on CPU that wants to change freq - */ - cpus_clear(set_mask); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - cpus_or(set_mask, set_mask, online_policy_cpus); - else - cpu_set(j, set_mask); - - set_cpus_allowed(current, set_mask); - preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { - dprintk("couldn't limit to CPUs in this domain\n"); - retval = -EAGAIN; - if (first_cpu) { - /* We haven't started the transition yet. */ - goto migrate_end; - } - preempt_enable(); - break; - } - - msr = centrino_model[cpu]->op_points[newstate].index; - - if (first_cpu) { - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (msr == (oldmsr & 0xffff)) { - dprintk("no change needed - msr was and needs " - "to be %x\n", oldmsr); - retval = 0; - goto migrate_end; - } - - freqs.old = extract_clock(oldmsr, cpu, 0); - freqs.new = extract_clock(msr, cpu, 0); - - dprintk("target=%dkHz old=%d new=%d msr=%04x\n", - target_freq, freqs.old, freqs.new, msr); - - for_each_cpu_mask(k, online_policy_cpus) { - freqs.cpu = k; - cpufreq_notify_transition(&freqs, - CPUFREQ_PRECHANGE); - } - - first_cpu = 0; - /* all but 16 LSB are reserved, treat them with care */ - oldmsr &= ~0xffff; - msr &= 0xffff; - oldmsr |= msr; - } - - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - preempt_enable(); - break; - } - - cpu_set(j, covered_cpus); - preempt_enable(); - } - - for_each_cpu_mask(k, online_policy_cpus) { - freqs.cpu = k; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - if (unlikely(retval)) { - /* - * We have failed halfway through the frequency change. - * We have sent callbacks to policy->cpus and - * MSRs have already been written on coverd_cpus. - * Best effort undo.. - */ - - if (!cpus_empty(covered_cpus)) { - for_each_cpu_mask(j, covered_cpus) { - set_cpus_allowed(current, cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } - } - - tmp = freqs.new; - freqs.new = freqs.old; - freqs.old = tmp; - for_each_cpu_mask(j, online_policy_cpus) { - freqs.cpu = j; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - } - set_cpus_allowed(current, saved_mask); - return 0; - -migrate_end: - preempt_enable(); - set_cpus_allowed(current, saved_mask); - return 0; -} - -static struct freq_attr* centrino_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver centrino_driver = { - .name = "centrino", /* should be speedstep-centrino, - but there's a 16 char limit */ - .init = centrino_cpu_init, - .exit = centrino_cpu_exit, - .verify = centrino_verify, - .target = centrino_target, - .get = get_cur_freq, - .attr = centrino_attr, - .owner = THIS_MODULE, -}; - - -/** - * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver - * - * Initializes the Enhanced SpeedStep support. Returns -ENODEV on - * unsupported devices, -ENOENT if there's no voltage table for this - * particular CPU model, -EINVAL on problems during initiatization, - * and zero on success. - * - * This is quite picky. Not only does the CPU have to advertise the - * "est" flag in the cpuid capability flags, we look for a specific - * CPU model and stepping, and we need to have the exact model name in - * our voltage tables. That is, be paranoid about not releasing - * someone's valuable magic smoke. - */ -static int __init centrino_init(void) -{ - struct cpuinfo_x86 *cpu = cpu_data; - - if (!cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - return cpufreq_register_driver(¢rino_driver); -} - -static void __exit centrino_exit(void) -{ - cpufreq_unregister_driver(¢rino_driver); -} - -MODULE_AUTHOR ("Jeremy Fitzhardinge "); -MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(centrino_init); -module_exit(centrino_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c deleted file mode 100644 index a5b2346faf1..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * (C) 2001 Dave Jones, Arjan van de ven. - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information, and on Intel documentation - * for chipsets ICH2-M and ICH3-M. - * - * Many thanks to Ducrot Bruno for finding and fixing the last - * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler - * for extensive testing. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include - -#include "speedstep-lib.h" - - -/* speedstep_chipset: - * It is necessary to know which chipset is used. As accesses to - * this device occur at various places in this module, we need a - * static struct pci_dev * pointing to that device. - */ -static struct pci_dev *speedstep_chipset_dev; - - -/* speedstep_processor - */ -static unsigned int speedstep_processor = 0; - -static u32 pmbase; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) - - -/** - * speedstep_find_register - read the PMBASE address - * - * Returns: -ENODEV if no register could be found - */ -static int speedstep_find_register (void) -{ - if (!speedstep_chipset_dev) - return -ENODEV; - - /* get PMBASE */ - pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); - if (!(pmbase & 0x01)) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - pmbase &= 0xFFFFFFFE; - if (!pmbase) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - dprintk("pmbase is 0x%x\n", pmbase); - return 0; -} - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - * Tries to change the SpeedStep state. - */ -static void speedstep_set_state (unsigned int state) -{ - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - /* read state */ - value = inb(pmbase + 0x50); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - /* write new state */ - value &= 0xFE; - value |= state; - - dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); - - /* Disable bus master arbitration */ - pm2_blk = inb(pmbase + 0x20); - pm2_blk |= 0x01; - outb(pm2_blk, (pmbase + 0x20)); - - /* Actual transition */ - outb(value, (pmbase + 0x50)); - - /* Restore bus master arbitration */ - pm2_blk &= 0xfe; - outb(pm2_blk, (pmbase + 0x20)); - - /* check if transition was successful */ - value = inb(pmbase + 0x50); - - /* Enable IRQs */ - local_irq_restore(flags); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - if (state == (value & 0x1)) { - dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); - } else { - printk (KERN_ERR "cpufreq: change failed - I/O error\n"); - } - - return; -} - - -/** - * speedstep_activate - activate SpeedStep control in the chipset - * - * Tries to activate the SpeedStep status and control registers. - * Returns -EINVAL on an unsupported chipset, and zero on success. - */ -static int speedstep_activate (void) -{ - u16 value = 0; - - if (!speedstep_chipset_dev) - return -EINVAL; - - pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); - if (!(value & 0x08)) { - value |= 0x08; - dprintk("activating SpeedStep (TM) registers\n"); - pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); - } - - return 0; -} - - -/** - * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic - * - * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to - * the LPC bridge / PM module which contains all power-management - * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected - * chipset, or zero on failure. - */ -static unsigned int speedstep_detect_chipset (void) -{ - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801DB_12, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 4; /* 4-M */ - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801CA_12, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 3; /* 3-M */ - - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801BA_10, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) { - /* speedstep.c causes lockups on Dell Inspirons 8000 and - * 8100 which use a pretty old revision of the 82815 - * host brige. Abort on these systems. - */ - static struct pci_dev *hostbridge; - - hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82815_MC, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - - if (!hostbridge) - return 2; /* 2-M */ - - if (hostbridge->revision < 5) { - dprintk("hostbridge does not support speedstep\n"); - speedstep_chipset_dev = NULL; - pci_dev_put(hostbridge); - return 0; - } - - pci_dev_put(hostbridge); - return 2; /* 2-M */ - } - - return 0; -} - -static unsigned int _speedstep_get(cpumask_t cpus) -{ - unsigned int speed; - cpumask_t cpus_allowed; - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpus); - speed = speedstep_get_processor_frequency(speedstep_processor); - set_cpus_allowed(current, cpus_allowed); - dprintk("detected %u kHz as current frequency\n", speed); - return speed; -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - return _speedstep_get(cpumask_of_cpu(cpu)); -} - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int speedstep_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - cpumask_t cpus_allowed; - int i; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = _speedstep_get(policy->cpus); - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = policy->cpu; - - dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); - - /* no transition necessary */ - if (freqs.old == freqs.new) - return 0; - - cpus_allowed = current->cpus_allowed; - - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - /* switch to physical CPU where state is to be changed */ - set_cpus_allowed(current, policy->cpus); - - speedstep_set_state(newstate); - - /* allow to be run on all CPUs */ - set_cpus_allowed(current, cpus_allowed); - - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result = 0; - unsigned int speed; - cpumask_t cpus_allowed; - - /* only run on CPU to be set, or on its sibling */ -#ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; -#endif - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, policy->cpus); - - /* detect low and high frequency and transition latency */ - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &policy->cpuinfo.transition_latency, - &speedstep_set_state); - set_cpus_allowed(current, cpus_allowed); - if (result) - return result; - - /* get current speed setting */ - speed = _speedstep_get(policy->cpus); - if (!speed) - return -EIO; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr* speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-ich", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - /* detect processor */ - speedstep_processor = speedstep_detect_processor(); - if (!speedstep_processor) { - dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); - return -ENODEV; - } - - /* detect chipset */ - if (!speedstep_detect_chipset()) { - dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); - return -ENODEV; - } - - /* activate speedstep support */ - if (speedstep_activate()) { - pci_dev_put(speedstep_chipset_dev); - return -EINVAL; - } - - if (speedstep_find_register()) - return -ENODEV; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - pci_dev_put(speedstep_chipset_dev); - cpufreq_unregister_driver(&speedstep_driver); -} - - -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); -MODULE_LICENSE ("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c deleted file mode 100644 index b1acc8ce316..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include "speedstep-lib.h" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -static int relaxed_check = 0; -#else -#define relaxed_check 0 -#endif - -/********************************************************************* - * GET PROCESSOR CORE SPEED IN KHZ * - *********************************************************************/ - -static unsigned int pentium3_get_frequency (unsigned int processor) -{ - /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ - struct { - unsigned int ratio; /* Frequency Multiplier (x10) */ - u8 bitmap; /* power on configuration bits - [27, 25:22] (in MSR 0x2a) */ - } msr_decode_mult [] = { - { 30, 0x01 }, - { 35, 0x05 }, - { 40, 0x02 }, - { 45, 0x06 }, - { 50, 0x00 }, - { 55, 0x04 }, - { 60, 0x0b }, - { 65, 0x0f }, - { 70, 0x09 }, - { 75, 0x0d }, - { 80, 0x0a }, - { 85, 0x26 }, - { 90, 0x20 }, - { 100, 0x2b }, - { 0, 0xff } /* error or unknown value */ - }; - - /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ - struct { - unsigned int value; /* Front Side Bus speed in MHz */ - u8 bitmap; /* power on configuration bits [18: 19] - (in MSR 0x2a) */ - } msr_decode_fsb [] = { - { 66, 0x0 }, - { 100, 0x2 }, - { 133, 0x1 }, - { 0, 0xff} - }; - - u32 msr_lo, msr_tmp; - int i = 0, j = 0; - - /* read MSR 0x2a - we only need the low 32 bits */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - msr_tmp = msr_lo; - - /* decode the FSB */ - msr_tmp &= 0x00c0000; - msr_tmp >>= 18; - while (msr_tmp != msr_decode_fsb[i].bitmap) { - if (msr_decode_fsb[i].bitmap == 0xff) - return 0; - i++; - } - - /* decode the multiplier */ - if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { - dprintk("workaround for early PIIIs\n"); - msr_lo &= 0x03c00000; - } else - msr_lo &= 0x0bc00000; - msr_lo >>= 22; - while (msr_lo != msr_decode_mult[j].bitmap) { - if (msr_decode_mult[j].bitmap == 0xff) - return 0; - j++; - } - - dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); - - return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); -} - - -static unsigned int pentiumM_get_frequency(void) -{ - u32 msr_lo, msr_tmp; - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - /* see table B-2 of 24547212.pdf */ - if (msr_lo & 0x00040000) { - printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); - return 0; - } - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); - - return (msr_tmp * 100 * 1000); -} - -static unsigned int pentium_core_get_frequency(void) -{ - u32 fsb = 0; - u32 msr_lo, msr_tmp; - - rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); - /* see table B-2 of 25366920.pdf */ - switch (msr_lo & 0x07) { - case 5: - fsb = 100000; - break; - case 1: - fsb = 133333; - break; - case 3: - fsb = 166667; - break; - default: - printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); - } - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); - - return (msr_tmp * fsb); -} - - -static unsigned int pentium4_get_frequency(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - u32 msr_lo, msr_hi, mult; - unsigned int fsb = 0; - - rdmsr(0x2c, msr_lo, msr_hi); - - dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); - - /* decode the FSB: see IA-32 Intel (C) Architecture Software - * Developer's Manual, Volume 3: System Prgramming Guide, - * revision #12 in Table B-1: MSRs in the Pentium 4 and - * Intel Xeon Processors, on page B-4 and B-5. - */ - if (c->x86_model < 2) - fsb = 100 * 1000; - else { - u8 fsb_code = (msr_lo >> 16) & 0x7; - switch (fsb_code) { - case 0: - fsb = 100 * 1000; - break; - case 1: - fsb = 13333 * 10; - break; - case 2: - fsb = 200 * 1000; - break; - } - } - - if (!fsb) - printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); - - /* Multiplier. */ - if (c->x86_model < 2) - mult = msr_lo >> 27; - else - mult = msr_lo >> 24; - - dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); - - return (fsb * mult); -} - - -unsigned int speedstep_get_processor_frequency(unsigned int processor) -{ - switch (processor) { - case SPEEDSTEP_PROCESSOR_PCORE: - return pentium_core_get_frequency(); - case SPEEDSTEP_PROCESSOR_PM: - return pentiumM_get_frequency(); - case SPEEDSTEP_PROCESSOR_P4D: - case SPEEDSTEP_PROCESSOR_P4M: - return pentium4_get_frequency(); - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: - return pentium3_get_frequency(processor); - default: - return 0; - }; - return 0; -} -EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); - - -/********************************************************************* - * DETECT SPEEDSTEP-CAPABLE PROCESSOR * - *********************************************************************/ - -unsigned int speedstep_detect_processor (void) -{ - struct cpuinfo_x86 *c = cpu_data; - u32 ebx, msr_lo, msr_hi; - - dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); - - if ((c->x86_vendor != X86_VENDOR_INTEL) || - ((c->x86 != 6) && (c->x86 != 0xF))) - return 0; - - if (c->x86 == 0xF) { - /* Intel Mobile Pentium 4-M - * or Intel Mobile Pentium 4 with 533 MHz FSB */ - if (c->x86_model != 2) - return 0; - - ebx = cpuid_ebx(0x00000001); - ebx &= 0x000000FF; - - dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); - - switch (c->x86_mask) { - case 4: - /* - * B-stepping [M-P4-M] - * sample has ebx = 0x0f, production has 0x0e. - */ - if ((ebx == 0x0e) || (ebx == 0x0f)) - return SPEEDSTEP_PROCESSOR_P4M; - break; - case 7: - /* - * C-stepping [M-P4-M] - * needs to have ebx=0x0e, else it's a celeron: - * cf. 25130917.pdf / page 7, footnote 5 even - * though 25072120.pdf / page 7 doesn't say - * samples are only of B-stepping... - */ - if (ebx == 0x0e) - return SPEEDSTEP_PROCESSOR_P4M; - break; - case 9: - /* - * D-stepping [M-P4-M or M-P4/533] - * - * this is totally strange: CPUID 0x0F29 is - * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. - * The latter need to be sorted out as they don't - * support speedstep. - * Celerons with CPUID 0x0F29 may have either - * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything - * specific. - * M-P4-Ms may have either ebx=0xe or 0xf [see above] - * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] - * also, M-P4M HTs have ebx=0x8, too - * For now, they are distinguished by the model_id string - */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) - return SPEEDSTEP_PROCESSOR_P4M; - break; - default: - break; - } - return 0; - } - - switch (c->x86_model) { - case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ - ebx = cpuid_ebx(0x00000001); - dprintk("ebx is %x\n", ebx); - - ebx &= 0x000000FF; - - if (ebx != 0x06) - return 0; - - /* So far all PIII-M processors support SpeedStep. See - * Intel's 24540640.pdf of June 2003 - */ - return SPEEDSTEP_PROCESSOR_PIII_T; - - case 0x08: /* Intel PIII [Coppermine] */ - - /* all mobile PIII Coppermines have FSB 100 MHz - * ==> sort out a few desktop PIIIs. */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); - msr_lo &= 0x00c0000; - if (msr_lo != 0x0080000) - return 0; - - /* - * If the processor is a mobile version, - * platform ID has bit 50 set - * it has SpeedStep technology if either - * bit 56 or 57 is set - */ - rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { - dprintk("early PIII version\n"); - return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; - } else - return SPEEDSTEP_PROCESSOR_PIII_C; - } - - default: - return 0; - } -} -EXPORT_SYMBOL_GPL(speedstep_detect_processor); - - -/********************************************************************* - * DETECT SPEEDSTEP SPEEDS * - *********************************************************************/ - -unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)) -{ - unsigned int prev_speed; - unsigned int ret = 0; - unsigned long flags; - struct timeval tv1, tv2; - - if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) - return -EINVAL; - - dprintk("trying to determine both speeds\n"); - - /* get current speed */ - prev_speed = speedstep_get_processor_frequency(processor); - if (!prev_speed) - return -EIO; - - dprintk("previous speed is %u\n", prev_speed); - - local_irq_save(flags); - - /* switch to low state */ - set_state(SPEEDSTEP_LOW); - *low_speed = speedstep_get_processor_frequency(processor); - if (!*low_speed) { - ret = -EIO; - goto out; - } - - dprintk("low speed is %u\n", *low_speed); - - /* start latency measurement */ - if (transition_latency) - do_gettimeofday(&tv1); - - /* switch to high state */ - set_state(SPEEDSTEP_HIGH); - - /* end latency measurement */ - if (transition_latency) - do_gettimeofday(&tv2); - - *high_speed = speedstep_get_processor_frequency(processor); - if (!*high_speed) { - ret = -EIO; - goto out; - } - - dprintk("high speed is %u\n", *high_speed); - - if (*low_speed == *high_speed) { - ret = -ENODEV; - goto out; - } - - /* switch to previous state, if necessary */ - if (*high_speed != prev_speed) - set_state(SPEEDSTEP_LOW); - - if (transition_latency) { - *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + - tv2.tv_usec - tv1.tv_usec; - dprintk("transition latency is %u uSec\n", *transition_latency); - - /* convert uSec to nSec and add 20% for safety reasons */ - *transition_latency *= 1200; - - /* check if the latency measurement is too high or too low - * and set it to a safe value (500uSec) in that case - */ - if (*transition_latency > 10000000 || *transition_latency < 50000) { - printk (KERN_WARNING "speedstep: frequency transition measured seems out of " - "range (%u nSec), falling back to a safe one of %u nSec.\n", - *transition_latency, 500000); - *transition_latency = 500000; - } - } - -out: - local_irq_restore(flags); - return (ret); -} -EXPORT_SYMBOL_GPL(speedstep_get_freqs); - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -module_param(relaxed_check, int, 0444); -MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); -#endif - -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); -MODULE_LICENSE ("GPL"); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h deleted file mode 100644 index b11bcc608ca..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - - -/* processors */ - -#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ - -/* the following processors are not speedstep-capable and are not auto-detected - * in speedstep_detect_processor(). However, their speed can be detected using - * the speedstep_get_processor_frequency() call. */ -#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ - -/* speedstep states -- only two of them */ - -#define SPEEDSTEP_HIGH 0x00000000 -#define SPEEDSTEP_LOW 0x00000001 - - -/* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); - -/* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_processor_frequency(unsigned int processor); - - -/* detect the low and high speeds of the processor. The callback - * set_state"'s first argument is either SPEEDSTEP_HIGH or - * SPEEDSTEP_LOW; the second argument is zero so that no - * cpufreq_notify_transition calls are initiated. - */ -extern unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c deleted file mode 100644 index e1c509aa305..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Intel SpeedStep SMI driver. - * - * (C) 2003 Hiroshi Miura - * - * Licensed under the terms of the GNU GPL License version 2. - * - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "speedstep-lib.h" - -/* speedstep system management interface port/command. - * - * These parameters are got from IST-SMI BIOS call. - * If user gives it, these are used. - * - */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; - -/* info about the processor */ -static unsigned int speedstep_processor = 0; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -#define GET_SPEEDSTEP_OWNER 0 -#define GET_SPEEDSTEP_STATE 1 -#define SET_SPEEDSTEP_STATE 2 -#define GET_SPEEDSTEP_FREQS 4 - -/* how often shall the SMI call be tried if it failed, e.g. because - * of DMA activity going on? */ -#define SMI_TRIES 5 - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) - -/** - * speedstep_smi_ownership - */ -static int speedstep_smi_ownership (void) -{ - u32 command, result, magic; - u32 function = GET_SPEEDSTEP_OWNER; - unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - magic = virt_to_phys(magic_data); - - dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__( - "out %%al, (%%dx)\n" - : "=D" (result) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), - "D" (0), "S" (magic) - : "memory" - ); - - dprintk("result is %x\n", result); - - return result; -} - -/** - * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. - * @low: the low frequency value is placed here - * @high: the high frequency value is placed here - * - * Only available on later SpeedStep-enabled systems, returns false results or - * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing - * shows that the latter occurs if !(ist_info.event & 0xFFFF). - */ -static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) -{ - u32 command, result = 0, edi, high_mhz, low_mhz; - u32 state=0; - u32 function = GET_SPEEDSTEP_FREQS; - - if (!(ist_info.event & 0xFFFF)) { - dprintk("bug #1422 -- can't read freqs from BIOS\n"); - return -ENODEV; - } - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__("movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) - ); - - dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); - - /* abort if results are obviously incorrect... */ - if ((high_mhz + low_mhz) < 600) - return -EINVAL; - - *high = high_mhz * 1000; - *low = low_mhz * 1000; - - return result; -} - -/** - * speedstep_get_state - set the SpeedStep state - * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static int speedstep_get_state (void) -{ - u32 function=GET_SPEEDSTEP_STATE; - u32 result, state, edi, command; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__("movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=a" (result), "=b" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) - ); - - dprintk("state is %x, result is %x\n", state, result); - - return (state & 1); -} - - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static void speedstep_set_state (unsigned int state) -{ - unsigned int result = 0, command, new_state; - unsigned long flags; - unsigned int function=SET_SPEEDSTEP_STATE; - unsigned int retry = 0; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); - - do { - if (retry) { - dprintk("retry %u, previous result %u, waiting...\n", retry, result); - mdelay(retry * 50); - } - retry++; - __asm__ __volatile__( - "movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=b" (new_state), "=D" (result) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) - ); - } while ((new_state != state) && (retry <= SMI_TRIES)); - - /* enable IRQs */ - local_irq_restore(flags); - - if (new_state == state) { - dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); - } else { - printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); - } - - return; -} - - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: new freq - * @relation: - * - * Sets a new CPUFreq policy/freq. - */ -static int speedstep_target (struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = speedstep_freqs[speedstep_get_state()].frequency; - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = 0; /* speedstep.c is UP only driver */ - - if (freqs.old == freqs.new) - return 0; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - speedstep_set_state(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result; - unsigned int speed,state; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - result = speedstep_smi_ownership(); - if (result) { - dprintk("fails in aquiring ownership of a SMI interface.\n"); - return -EINVAL; - } - - /* detect low and high frequency */ - result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency); - if (result) { - /* fall back to speedstep_lib.c dection mechanism: try both states out */ - dprintk("could not detect low and high frequencies by SMI call.\n"); - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - NULL, - &speedstep_set_state); - - if (result) { - dprintk("could not detect two different speeds -- aborting.\n"); - return result; - } else - dprintk("workaround worked.\n"); - } - - /* get current speed setting */ - state = speedstep_get_state(); - speed = speedstep_freqs[state].frequency; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - if (cpu) - return -ENODEV; - return speedstep_get_processor_frequency(speedstep_processor); -} - - -static int speedstep_resume(struct cpufreq_policy *policy) -{ - int result = speedstep_smi_ownership(); - - if (result) - dprintk("fails in re-aquiring ownership of a SMI interface.\n"); - - return result; -} - -static struct freq_attr* speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-smi", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .resume = speedstep_resume, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * BIOS, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - speedstep_processor = speedstep_detect_processor(); - - switch (speedstep_processor) { - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: - break; - default: - speedstep_processor = 0; - } - - if (!speedstep_processor) { - dprintk ("No supported Intel CPU detected.\n"); - return -ENODEV; - } - - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", - ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); - - /* Error if no IST-SMI BIOS or no PARM - sig= 'ISGE' aka 'Intel Speedstep Gate E' */ - if ((ist_info.signature != 0x47534943) && ( - (smi_port == 0) || (smi_cmd == 0))) - return -ENODEV; - - if (smi_sig == 1) - smi_sig = 0x47534943; - else - smi_sig = ist_info.signature; - - /* setup smi_port from MODLULE_PARM or BIOS */ - if ((smi_port > 0xff) || (smi_port < 0)) - return -EINVAL; - else if (smi_port == 0) - smi_port = ist_info.command & 0xff; - - if ((smi_cmd > 0xff) || (smi_cmd < 0)) - return -EINVAL; - else if (smi_cmd == 0) - smi_cmd = (ist_info.command >> 16) & 0xff; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - cpufreq_unregister_driver(&speedstep_driver); -} - -module_param(smi_port, int, 0444); -module_param(smi_cmd, int, 0444); -module_param(smi_sig, uint, 0444); - -MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); -MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); -MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); - -MODULE_AUTHOR ("Hiroshi Miura"); -MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); -MODULE_LICENSE ("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig new file mode 100644 index 00000000000..d8c6f132dc7 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -0,0 +1,250 @@ +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver also supports Intel Enhanced Speedstep. + + For details, take a look at . + + If in doubt, say N. + +config ELAN_CPUFREQ + tristate "AMD Elan SC400 and SC410" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC400 and SC410 + processors. + + You need to specify the processor maximum speed as boot + parameter: elanfreq=maxspeed (in kHz) or as module + parameter "max_freq". + + For details, take a look at . + + If in doubt, say N. + +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at . + + If in doubt, say N. + + +config X86_POWERNOW_K6 + tristate "AMD Mobile K6-2/K6-3 PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K6-2+ and mobile + AMD K6-3+ processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7 + tristate "AMD Mobile Athlon/Duron PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K7 mobile processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7_ACPI + bool + depends on X86_POWERNOW_K7 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + default y + +config X86_POWERNOW_K8 + tristate "AMD Opteron/Athlon64 PowerNow!" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K8_ACPI + bool "ACPI Support" + select ACPI_PROCESSOR + depends on ACPI && X86_POWERNOW_K8 + default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. + +config X86_GX_SUSPMOD + tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI + help + This add the CPUFreq driver for NatSemi Geode processors which + support suspend modulation. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO + tristate "Intel Enhanced SpeedStep" + select CPU_FREQ_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE + help + This adds the CPUFreq driver for Enhanced SpeedStep enabled + mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, + you also need to say Y to "Use ACPI tables to decode..." below + [which might imply enabling ACPI] if you want to use this driver + on non-Banias CPUs. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO_TABLE + bool "Built-in tables for Banias CPUs" + depends on X86_SPEEDSTEP_CENTRINO + default y + help + Use built-in tables for Banias CPUs if ACPI encoding + is not available. + + If in doubt, say N. + +config X86_SPEEDSTEP_ICH + tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + ICH3 or ICH4 southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_SMI + tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) + on systems which have an Intel 440BX/ZX/MX southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_P4_CLOCKMOD + tristate "Intel Pentium 4 clock modulation" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for Intel Pentium 4 / XEON + processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_CPUFREQ_NFORCE2 + tristate "nVidia nForce2 FSB changing" + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for FSB changing on nVidia nForce2 + platforms. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGRUN + tristate "Transmeta LongRun" + help + This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors + which support LongRun. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGHAUL + tristate "VIA Cyrix III Longhaul" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_E_POWERSAVER + tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for VIA C7 processors. + + If in doubt, say N. + +comment "shared options" + +config X86_ACPI_CPUFREQ_PROC_INTF + bool "/proc/acpi/processor/../performance interface (deprecated)" + depends on PROC_FS + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + help + This enables the deprecated /proc/acpi/processor/../performance + interface. While it is helpful for debugging, the generic, + cross-architecture cpufreq interfaces should be used. + + If in doubt, say N. + +config X86_SPEEDSTEP_LIB + tristate + default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD + +config X86_SPEEDSTEP_RELAXED_CAP_CHECK + bool "Relaxed speedstep capability checks" + depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + help + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This + option lets the probing code bypass some of those checks if the + parameter "relaxed_check=1" is passed to the module. + +endif # CPU_FREQ + +endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile new file mode 100644 index 00000000000..560f7760dae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Makefile @@ -0,0 +1,16 @@ +obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_LONGHAUL) += longhaul.o +obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o +obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o +obj-$(CONFIG_X86_LONGRUN) += longrun.o +obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o +obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o +obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o +obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c new file mode 100644 index 00000000000..705e13a3078 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -0,0 +1,799 @@ +/* + * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * + * Copyright (C) 2001, 2002 Andy Grover + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2002 - 2004 Dominik Brodowski + * Copyright (C) 2006 Denis Sadykov + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + +enum { + UNDEFINED_CAPABLE = 0, + SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_IO_CAPABLE, +}; + +#define INTEL_MSR_RANGE (0xffff) +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) + +struct acpi_cpufreq_data { + struct acpi_processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; + unsigned int resume; + unsigned int cpu_feature; +}; + +static struct acpi_cpufreq_data *drv_data[NR_CPUS]; +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance *acpi_perf_data; + +static struct cpufreq_driver acpi_cpufreq_driver; + +static unsigned int acpi_pstate_strict; + +static int check_est_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + + if (cpu->x86_vendor != X86_VENDOR_INTEL || + !cpu_has(cpu, X86_FEATURE_EST)) + return 0; + + return 1; +} + +static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) +{ + struct acpi_processor_performance *perf; + int i; + + perf = data->acpi_data; + + for (i=0; istate_count; i++) { + if (value == perf->states[i].status) + return data->freq_table[i].frequency; + } + return 0; +} + +static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) +{ + int i; + struct acpi_processor_performance *perf; + + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; + + for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == perf->states[data->freq_table[i].index].status) + return data->freq_table[i].frequency; + } + return data->freq_table[0].frequency; +} + +static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) +{ + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + return extract_msr(val, data); + case SYSTEM_IO_CAPABLE: + return extract_io(val, data); + default: + return 0; + } +} + +struct msr_addr { + u32 reg; +}; + +struct io_addr { + u16 port; + u8 bit_width; +}; + +typedef union { + struct msr_addr msr; + struct io_addr io; +} drv_addr_union; + +struct drv_cmd { + unsigned int type; + cpumask_t mask; + drv_addr_union addr; + u32 val; +}; + +static void do_drv_read(struct drv_cmd *cmd) +{ + u32 h; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, cmd->val, h); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_read_port((acpi_io_address)cmd->addr.io.port, + &cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void do_drv_write(struct drv_cmd *cmd) +{ + u32 lo, hi; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); + wrmsr(cmd->addr.msr.reg, lo, hi); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_write_port((acpi_io_address)cmd->addr.io.port, + cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void drv_read(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + cmd->val = 0; + + set_cpus_allowed(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed(current, saved_mask); +} + +static void drv_write(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + unsigned int i; + + for_each_cpu_mask(i, cmd->mask) { + set_cpus_allowed(current, cpumask_of_cpu(i)); + do_drv_write(cmd); + } + + set_cpus_allowed(current, saved_mask); + return; +} + +static u32 get_cur_val(cpumask_t mask) +{ + struct acpi_processor_performance *perf; + struct drv_cmd cmd; + + if (unlikely(cpus_empty(mask))) + return 0; + + switch (drv_data[first_cpu(mask)]->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + perf = drv_data[first_cpu(mask)]->acpi_data; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + break; + default: + return 0; + } + + cmd.mask = mask; + + drv_read(&cmd); + + dprintk("get_cur_val = %u\n", cmd.val); + + return cmd.val; +} + +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +static unsigned int get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ + /* + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. + */ + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + else + perf_percent = 0; + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (aperf_cur.whole && mperf_cur.whole) + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + else + perf_percent = 0; + +#endif + + retval = drv_data[cpu]->max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + +static unsigned int get_cur_freq_on_cpu(unsigned int cpu) +{ + struct acpi_cpufreq_data *data = drv_data[cpu]; + unsigned int freq; + + dprintk("get_cur_freq_on_cpu (%d)\n", cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return 0; + } + + freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + dprintk("cur freq = %u\n", freq); + + return freq; +} + +static unsigned int check_freqs(cpumask_t mask, unsigned int freq, + struct acpi_cpufreq_data *data) +{ + unsigned int cur_freq; + unsigned int i; + + for (i=0; i<100; i++) { + cur_freq = extract_freq(get_cur_val(mask), data); + if (cur_freq == freq) + return 1; + udelay(10); + } + return 0; +} + +static int acpi_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_processor_performance *perf; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + struct drv_cmd cmd; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ + unsigned int i; + int result = 0; + + dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return -ENODEV; + } + + perf = data->acpi_data; + result = cpufreq_frequency_table_target(policy, + data->freq_table, + target_freq, + relation, &next_state); + if (unlikely(result)) + return -ENODEV; + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + next_perf_state = data->freq_table[next_state].index; + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", + next_perf_state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", + next_perf_state); + return 0; + } + } + + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + default: + return -ENODEV; + } + + cpus_clear(cmd.mask); + + if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) + cmd.mask = online_policy_cpus; + else + cpu_set(policy->cpu, cmd.mask); + + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + drv_write(&cmd); + + if (acpi_pstate_strict) { + if (!check_freqs(cmd.mask, freqs.new, data)) { + dprintk("acpi_cpufreq_target failed (%d)\n", + policy->cpu); + return -EAGAIN; + } + } + + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + perf->state = next_perf_state; + + return result; +} + +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_verify\n"); + + return cpufreq_frequency_table_verify(policy, data->freq_table); +} + +static unsigned long +acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) +{ + struct acpi_processor_performance *perf = data->acpi_data; + + if (cpu_khz) { + /* search the closest match to cpu_khz */ + unsigned int i; + unsigned long freq; + unsigned long freqn = perf->states[0].core_frequency * 1000; + + for (i=0; i<(perf->state_count-1); i++) { + freq = freqn; + freqn = perf->states[i+1].core_frequency * 1000; + if ((2 * cpu_khz) > (freqn + freq)) { + perf->state = i; + return freq; + } + } + perf->state = perf->state_count-1; + return freqn; + } else { + /* assume CPU is at P0... */ + perf->state = 0; + return perf->states[0].core_frequency * 1000; + } +} + +/* + * acpi_cpufreq_early_init - initialize ACPI P-States library + * + * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) + * in order to determine correct frequency and voltage pairings. We can + * do _PDC and _PSD and find out the processor dependency for the + * actual init that will happen later... + */ +static int __init acpi_cpufreq_early_init(void) +{ + dprintk("acpi_cpufreq_early_init\n"); + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + dprintk("Memory allocation error for acpi_perf_data.\n"); + return -ENOMEM; + } + + /* Do initialization in ACPI core */ + acpi_processor_preregister_performance(acpi_perf_data); + return 0; +} + +#ifdef CONFIG_SMP +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; + +static int sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + +static struct dmi_system_id sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; +#endif + +static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int valid_states = 0; + unsigned int cpu = policy->cpu; + struct acpi_cpufreq_data *data; + unsigned int result = 0; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct acpi_processor_performance *perf; + + dprintk("acpi_cpufreq_cpu_init\n"); + + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); + drv_data[cpu] = data; + + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) + acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + + result = acpi_processor_register_performance(data->acpi_data, cpu); + if (result) + goto err_free; + + perf = data->acpi_data; + policy->shared_type = perf->shared_type; + + /* + * Will let policy->cpus know about dependency only when software + * coordination is required. + */ + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + policy->cpus = perf->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif + + /* capability check */ + if (perf->state_count <= 1) { + dprintk("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if (perf->control_register.space_id != perf->status_register.space_id) { + result = -ENODEV; + goto err_unreg; + } + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + dprintk("SYSTEM IO addr space\n"); + data->cpu_feature = SYSTEM_IO_CAPABLE; + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + dprintk("HARDWARE addr space\n"); + if (!check_est_cpu(cpu)) { + result = -ENODEV; + goto err_unreg; + } + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; + default: + dprintk("Unknown addr space %d\n", + (u32) (perf->control_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (perf->state_count+1), GFP_KERNEL); + if (!data->freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; istate_count; i++) { + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + } + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + data->max_freq = perf->states[0].core_frequency * 1000; + /* table init */ + for (i=0; istate_count; i++) { + if (i>0 && perf->states[i].core_frequency >= + data->freq_table[valid_states-1].frequency / 1000) + continue; + + data->freq_table[valid_states].index = i; + data->freq_table[valid_states].frequency = + perf->states[i].core_frequency * 1000; + valid_states++; + } + data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; + + result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if (result) + goto err_freqfree; + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + /* Current speed is unknown and not detectable by IO port */ + policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + acpi_cpufreq_driver.get = get_cur_freq_on_cpu; + policy->cur = get_cur_freq_on_cpu(cpu); + break; + default: + break; + } + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) + acpi_cpufreq_driver.getavg = get_measured_perf; + } + + dprintk("CPU%u - ACPI performance management activated.\n", cpu); + for (i = 0; i < perf->state_count; i++) + dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency); + + cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + data->resume = 1; + + return result; + +err_freqfree: + kfree(data->freq_table); +err_unreg: + acpi_processor_unregister_performance(perf, cpu); +err_free: + kfree(data); + drv_data[cpu] = NULL; + + return result; +} + +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_cpu_exit\n"); + + if (data) { + cpufreq_frequency_table_put_attr(policy->cpu); + drv_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(data->acpi_data, + policy->cpu); + kfree(data); + } + + return 0; +} + +static int acpi_cpufreq_resume(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_resume\n"); + + data->resume = 1; + + return 0; +} + +static struct freq_attr *acpi_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, +}; + +static int __init acpi_cpufreq_init(void) +{ + int ret; + + dprintk("acpi_cpufreq_init\n"); + + ret = acpi_cpufreq_early_init(); + if (ret) + return ret; + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + +static void __exit acpi_cpufreq_exit(void) +{ + dprintk("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); + + free_percpu(acpi_perf_data); + + return; +} + +module_param(acpi_pstate_strict, uint, 0644); +MODULE_PARM_DESC(acpi_pstate_strict, + "value 0 or non-zero. non-zero -> strict ACPI checks are " + "performed during frequency changes."); + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); + +MODULE_ALIAS("acpi"); diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c new file mode 100644 index 00000000000..66acd503991 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -0,0 +1,441 @@ +/* + * (C) 2004-2006 Sebastian Witt + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NFORCE2_XTAL 25 +#define NFORCE2_BOOTFSB 0x48 +#define NFORCE2_PLLENABLE 0xa8 +#define NFORCE2_PLLREG 0xa4 +#define NFORCE2_PLLADR 0xa0 +#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) + +#define NFORCE2_MIN_FSB 50 +#define NFORCE2_SAFE_DISTANCE 50 + +/* Delay in ms between FSB changes */ +//#define NFORCE2_DELAY 10 + +/* nforce2_chipset: + * FSB is changed using the chipset + */ +static struct pci_dev *nforce2_chipset_dev; + +/* fid: + * multiplier * 10 + */ +static int fid = 0; + +/* min_fsb, max_fsb: + * minimum and maximum FSB (= FSB at boot time) + */ +static int min_fsb = 0; +static int max_fsb = 0; + +MODULE_AUTHOR("Sebastian Witt "); +MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); +MODULE_LICENSE("GPL"); + +module_param(fid, int, 0444); +module_param(min_fsb, int, 0444); + +MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); +MODULE_PARM_DESC(min_fsb, + "Minimum FSB to use, if not defined: current FSB - 50"); + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) + +/** + * nforce2_calc_fsb - calculate FSB + * @pll: PLL value + * + * Calculates FSB from PLL value + */ +static int nforce2_calc_fsb(int pll) +{ + unsigned char mul, div; + + mul = (pll >> 8) & 0xff; + div = pll & 0xff; + + if (div > 0) + return NFORCE2_XTAL * mul / div; + + return 0; +} + +/** + * nforce2_calc_pll - calculate PLL value + * @fsb: FSB + * + * Calculate PLL value for given FSB + */ +static int nforce2_calc_pll(unsigned int fsb) +{ + unsigned char xmul, xdiv; + unsigned char mul = 0, div = 0; + int tried = 0; + + /* Try to calculate multiplier and divider up to 4 times */ + while (((mul == 0) || (div == 0)) && (tried <= 3)) { + for (xdiv = 2; xdiv <= 0x80; xdiv++) + for (xmul = 1; xmul <= 0xfe; xmul++) + if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == + fsb + tried) { + mul = xmul; + div = xdiv; + } + tried++; + } + + if ((mul == 0) || (div == 0)) + return -1; + + return NFORCE2_PLL(mul, div); +} + +/** + * nforce2_write_pll - write PLL value to chipset + * @pll: PLL value + * + * Writes new FSB PLL value to chipset + */ +static void nforce2_write_pll(int pll) +{ + int temp; + + /* Set the pll addr. to 0x00 */ + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); + + /* Now write the value in all 64 registers */ + for (temp = 0; temp <= 0x3f; temp++) + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); + + return; +} + +/** + * nforce2_fsb_read - Read FSB + * + * Read FSB from chipset + * If bootfsb != 0, return FSB at boot-time + */ +static unsigned int nforce2_fsb_read(int bootfsb) +{ + struct pci_dev *nforce2_sub5; + u32 fsb, temp = 0; + + /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ + nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); + if (!nforce2_sub5) + return 0; + + pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); + fsb /= 1000000; + + /* Check if PLL register is already set */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + + if(bootfsb || !temp) + return fsb; + + /* Use PLL register FSB value */ + pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); + fsb = nforce2_calc_fsb(temp); + + return fsb; +} + +/** + * nforce2_set_fsb - set new FSB + * @fsb: New FSB + * + * Sets new FSB + */ +static int nforce2_set_fsb(unsigned int fsb) +{ + u32 temp = 0; + unsigned int tfsb; + int diff; + int pll = 0; + + if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { + printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); + return -EINVAL; + } + + tfsb = nforce2_fsb_read(0); + if (!tfsb) { + printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); + return -EINVAL; + } + + /* First write? Then set actual value */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + if (!temp) { + pll = nforce2_calc_pll(tfsb); + + if (pll < 0) + return -EINVAL; + + nforce2_write_pll(pll); + } + + /* Enable write access */ + temp = 0x01; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); + + diff = tfsb - fsb; + + if (!diff) + return 0; + + while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { + if (diff < 0) + tfsb++; + else + tfsb--; + + /* Calculate the PLL reg. value */ + if ((pll = nforce2_calc_pll(tfsb)) == -1) + return -EINVAL; + + nforce2_write_pll(pll); +#ifdef NFORCE2_DELAY + mdelay(NFORCE2_DELAY); +#endif + } + + temp = 0x40; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); + + return 0; +} + +/** + * nforce2_get - get the CPU frequency + * @cpu: CPU number + * + * Returns the CPU frequency + */ +static unsigned int nforce2_get(unsigned int cpu) +{ + if (cpu) + return 0; + return nforce2_fsb_read(0) * fid * 100; +} + +/** + * nforce2_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int nforce2_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ +// unsigned long flags; + struct cpufreq_freqs freqs; + unsigned int target_fsb; + + if ((target_freq > policy->max) || (target_freq < policy->min)) + return -EINVAL; + + target_fsb = target_freq / (fid * 100); + + freqs.old = nforce2_get(policy->cpu); + freqs.new = target_fsb * fid * 100; + freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ + + if (freqs.old == freqs.new) + return 0; + + dprintk("Old CPU frequency %d kHz, new %d kHz\n", + freqs.old, freqs.new); + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Disable IRQs */ + //local_irq_save(flags); + + if (nforce2_set_fsb(target_fsb) < 0) + printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", + target_fsb); + else + dprintk("Changed FSB successfully to %d\n", + target_fsb); + + /* Enable IRQs */ + //local_irq_restore(flags); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + +/** + * nforce2_verify - verifies a new CPUFreq policy + * @policy: new policy + */ +static int nforce2_verify(struct cpufreq_policy *policy) +{ + unsigned int fsb_pol_max; + + fsb_pol_max = policy->max / (fid * 100); + + if (policy->min < (fsb_pol_max * fid * 100)) + policy->max = (fsb_pol_max + 1) * fid * 100; + + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + return 0; +} + +static int nforce2_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int fsb; + unsigned int rfid; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* Get current FSB */ + fsb = nforce2_fsb_read(0); + + if (!fsb) + return -EIO; + + /* FIX: Get FID from CPU */ + if (!fid) { + if (!cpu_khz) { + printk(KERN_WARNING + "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); + return -ENODEV; + } + + fid = cpu_khz / (fsb * 100); + rfid = fid % 5; + + if (rfid) { + if (rfid > 2) + fid += 5 - rfid; + else + fid -= rfid; + } + } + + printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, + fid / 10, fid % 10); + + /* Set maximum FSB to FSB at boot time */ + max_fsb = nforce2_fsb_read(1); + + if(!max_fsb) + return -EIO; + + if (!min_fsb) + min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; + + if (min_fsb < NFORCE2_MIN_FSB) + min_fsb = NFORCE2_MIN_FSB; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = min_fsb * fid * 100; + policy->cpuinfo.max_freq = max_fsb * fid * 100; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = nforce2_get(policy->cpu); + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + return 0; +} + +static int nforce2_cpu_exit(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver nforce2_driver = { + .name = "nforce2", + .verify = nforce2_verify, + .target = nforce2_target, + .get = nforce2_get, + .init = nforce2_cpu_init, + .exit = nforce2_cpu_exit, + .owner = THIS_MODULE, +}; + +/** + * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic + * + * Detects nForce2 A2 and C1 stepping + * + */ +static unsigned int nforce2_detect_chipset(void) +{ + nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + PCI_DEVICE_ID_NVIDIA_NFORCE2, + PCI_ANY_ID, PCI_ANY_ID, NULL); + + if (nforce2_chipset_dev == NULL) + return -ENODEV; + + printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", + nforce2_chipset_dev->revision); + printk(KERN_INFO + "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); + + return 0; +} + +/** + * nforce2_init - initializes the nForce2 CPUFreq driver + * + * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init nforce2_init(void) +{ + /* TODO: do we need to detect the processor? */ + + /* detect chipset */ + if (nforce2_detect_chipset()) { + printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); + return -ENODEV; + } + + return cpufreq_register_driver(&nforce2_driver); +} + +/** + * nforce2_exit - unregisters cpufreq module + * + * Unregisters nForce2 FSB change support. + */ +static void __exit nforce2_exit(void) +{ + cpufreq_unregister_driver(&nforce2_driver); +} + +module_init(nforce2_init); +module_exit(nforce2_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c new file mode 100644 index 00000000000..f43d98e11cc --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -0,0 +1,334 @@ +/* + * Based on documentation provided by Dave Jones. Thanks! + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define EPS_BRAND_C7M 0 +#define EPS_BRAND_C7 1 +#define EPS_BRAND_EDEN 2 +#define EPS_BRAND_C3 3 + +struct eps_cpu_data { + u32 fsb; + struct cpufreq_frequency_table freq_table[]; +}; + +static struct eps_cpu_data *eps_cpu[NR_CPUS]; + + +static unsigned int eps_get(unsigned int cpu) +{ + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (cpu) + return 0; + centaur = eps_cpu[cpu]; + if (centaur == NULL) + return 0; + + /* Return current frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + return centaur->fsb * ((lo >> 8) & 0xff); +} + +static int eps_set_state(struct eps_cpu_data *centaur, + unsigned int cpu, + u32 dest_state) +{ + struct cpufreq_freqs freqs; + u32 lo, hi; + int err = 0; + int i; + + freqs.old = eps_get(cpu); + freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Wait while CPU is busy */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i = 0; + while (lo & ((1 << 16) | (1 << 17))) { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } + /* Set new multiplier and voltage */ + wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); + /* Wait until transition end */ + i = 0; + do { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } while (lo & ((1 << 16) | (1 << 17))); + + /* Return current frequency */ +postchange: + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + freqs.new = centaur->fsb * ((lo >> 8) & 0xff); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + return err; +} + +static int eps_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct eps_cpu_data *centaur; + unsigned int newstate = 0; + unsigned int cpu = policy->cpu; + unsigned int dest_state; + int ret; + + if (unlikely(eps_cpu[cpu] == NULL)) + return -ENODEV; + centaur = eps_cpu[cpu]; + + if (unlikely(cpufreq_frequency_table_target(policy, + &eps_cpu[cpu]->freq_table[0], + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + + /* Make frequency transition */ + dest_state = centaur->freq_table[newstate].index & 0xffff; + ret = eps_set_state(centaur, cpu, dest_state); + if (ret) + printk(KERN_ERR "eps: Timeout!\n"); + return ret; +} + +static int eps_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, + &eps_cpu[policy->cpu]->freq_table[0]); +} + +static int eps_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + u32 lo, hi; + u64 val; + u8 current_multiplier, current_voltage; + u8 max_multiplier, max_voltage; + u8 min_multiplier, min_voltage; + u8 brand; + u32 fsb; + struct eps_cpu_data *centaur; + struct cpufreq_frequency_table *f_table; + int k, step, voltage; + int ret; + int states; + + if (policy->cpu != 0) + return -ENODEV; + + /* Check brand */ + printk("eps: Detected VIA "); + rdmsr(0x1153, lo, hi); + brand = (((lo >> 2) ^ lo) >> 18) & 3; + switch(brand) { + case EPS_BRAND_C7M: + printk("C7-M\n"); + break; + case EPS_BRAND_C7: + printk("C7\n"); + break; + case EPS_BRAND_EDEN: + printk("Eden\n"); + break; + case EPS_BRAND_C3: + printk("C3\n"); + return -ENODEV; + break; + } + /* Enable Enhanced PowerSaver */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + val |= 1 << 16; + wrmsrl(MSR_IA32_MISC_ENABLE, val); + /* Can be locked at 0 */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + printk("eps: Can't enable Enhanced PowerSaver\n"); + return -ENODEV; + } + } + + /* Print voltage and multiplier */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + current_voltage = lo & 0xff; + printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); + current_multiplier = (lo >> 8) & 0xff; + printk("eps: Current multiplier = %d\n", current_multiplier); + + /* Print limits */ + max_voltage = hi & 0xff; + printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); + max_multiplier = (hi >> 8) & 0xff; + printk("eps: Highest multiplier = %d\n", max_multiplier); + min_voltage = (hi >> 16) & 0xff; + printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); + min_multiplier = (hi >> 24) & 0xff; + printk("eps: Lowest multiplier = %d\n", min_multiplier); + + /* Sanity checks */ + if (current_multiplier == 0 || max_multiplier == 0 + || min_multiplier == 0) + return -EINVAL; + if (current_multiplier > max_multiplier + || max_multiplier <= min_multiplier) + return -EINVAL; + if (current_voltage > 0x1c || max_voltage > 0x1c) + return -EINVAL; + if (max_voltage < min_voltage) + return -EINVAL; + + /* Calc FSB speed */ + fsb = cpu_khz / current_multiplier; + /* Calc number of p-states supported */ + if (brand == EPS_BRAND_C7M) + states = max_multiplier - min_multiplier + 1; + else + states = 2; + + /* Allocate private data and frequency table for current cpu */ + centaur = kzalloc(sizeof(struct eps_cpu_data) + + (states + 1) * sizeof(struct cpufreq_frequency_table), + GFP_KERNEL); + if (!centaur) + return -ENOMEM; + eps_cpu[0] = centaur; + + /* Copy basic values */ + centaur->fsb = fsb; + + /* Fill frequency and MSR value table */ + f_table = ¢aur->freq_table[0]; + if (brand != EPS_BRAND_C7M) { + f_table[0].frequency = fsb * min_multiplier; + f_table[0].index = (min_multiplier << 8) | min_voltage; + f_table[1].frequency = fsb * max_multiplier; + f_table[1].index = (max_multiplier << 8) | max_voltage; + f_table[2].frequency = CPUFREQ_TABLE_END; + } else { + k = 0; + step = ((max_voltage - min_voltage) * 256) + / (max_multiplier - min_multiplier); + for (i = min_multiplier; i <= max_multiplier; i++) { + voltage = (k * step) / 256 + min_voltage; + f_table[k].frequency = fsb * i; + f_table[k].index = (i << 8) | voltage; + k++; + } + f_table[k].frequency = CPUFREQ_TABLE_END; + } + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ + policy->cur = fsb * current_multiplier; + + ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); + if (ret) { + kfree(centaur); + return ret; + } + + cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); + return 0; +} + +static int eps_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (eps_cpu[cpu] == NULL) + return -ENODEV; + centaur = eps_cpu[cpu]; + + /* Get max frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + /* Set max frequency */ + eps_set_state(centaur, cpu, hi & 0xffff); + /* Bye */ + cpufreq_frequency_table_put_attr(policy->cpu); + kfree(eps_cpu[cpu]); + eps_cpu[cpu] = NULL; + return 0; +} + +static struct freq_attr* eps_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver eps_driver = { + .verify = eps_verify, + .target = eps_target, + .init = eps_cpu_init, + .exit = eps_cpu_exit, + .get = eps_get, + .name = "e_powersaver", + .owner = THIS_MODULE, + .attr = eps_attr, +}; + +static int __init eps_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* This driver will work only on Centaur C7 processors with + * Enhanced SpeedStep/PowerSaver registers */ + if (c->x86_vendor != X86_VENDOR_CENTAUR + || c->x86 != 6 || c->x86_model != 10) + return -ENODEV; + if (!cpu_has(c, X86_FEATURE_EST)) + return -ENODEV; + + if (cpufreq_register_driver(&eps_driver)) + return -EINVAL; + return 0; +} + +static void __exit eps_exit(void) +{ + cpufreq_unregister_driver(&eps_driver); +} + +MODULE_AUTHOR("Rafa³ Bilski "); +MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); +MODULE_LICENSE("GPL"); + +module_init(eps_init); +module_exit(eps_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c new file mode 100644 index 00000000000..f317276afa7 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -0,0 +1,309 @@ +/* + * elanfreq: cpufreq driver for the AMD ELAN family + * + * (c) Copyright 2002 Robert Schwebel + * + * Parts of this code are (c) Sven Geggus + * + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ +#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ + +/* Module parameter */ +static int max_freq; + +struct s_elan_multiplier { + int clock; /* frequency in kHz */ + int val40h; /* PMU Force Mode register */ + int val80h; /* CPU Clock Speed Register */ +}; + +/* + * It is important that the frequencies + * are listed in ascending order here! + */ +struct s_elan_multiplier elan_multiplier[] = { + {1000, 0x02, 0x18}, + {2000, 0x02, 0x10}, + {4000, 0x02, 0x08}, + {8000, 0x00, 0x00}, + {16000, 0x00, 0x02}, + {33000, 0x00, 0x04}, + {66000, 0x01, 0x04}, + {99000, 0x01, 0x05} +}; + +static struct cpufreq_frequency_table elanfreq_table[] = { + {0, 1000}, + {1, 2000}, + {2, 4000}, + {3, 8000}, + {4, 16000}, + {5, 33000}, + {6, 66000}, + {7, 99000}, + {0, CPUFREQ_TABLE_END}, +}; + + +/** + * elanfreq_get_cpu_frequency: determine current cpu speed + * + * Finds out at which frequency the CPU of the Elan SOC runs + * at the moment. Frequencies from 1 to 33 MHz are generated + * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" + * and have the rest of the chip running with 33 MHz. + */ + +static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg; /* Clock Speed Register */ + + local_irq_disable(); + outb_p(0x80,REG_CSCIR); + clockspeed_reg = inb_p(REG_CSCDR); + local_irq_enable(); + + if ((clockspeed_reg & 0xE0) == 0xE0) + return 0; + + /* Are we in CPU clock multiplied mode (66/99 MHz)? */ + if ((clockspeed_reg & 0xE0) == 0xC0) { + if ((clockspeed_reg & 0x01) == 0) + return 66000; + else + return 99000; + } + + /* 33 MHz is not 32 MHz... */ + if ((clockspeed_reg & 0xE0)==0xA0) + return 33000; + + return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); +} + + +/** + * elanfreq_set_cpu_frequency: Change the CPU core frequency + * @cpu: cpu number + * @freq: frequency in kHz + * + * This function takes a frequency value and changes the CPU frequency + * according to this. Note that the frequency has to be checked by + * elanfreq_validatespeed() for correctness! + * + * There is no return value. + */ + +static void elanfreq_set_cpu_state (unsigned int state) +{ + struct cpufreq_freqs freqs; + + freqs.old = elanfreq_get_cpu_frequency(0); + freqs.new = elan_multiplier[state].clock; + freqs.cpu = 0; /* elanfreq.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", + elan_multiplier[state].clock); + + + /* + * Access to the Elan's internal registers is indexed via + * 0x22: Chip Setup & Control Register Index Register (CSCI) + * 0x23: Chip Setup & Control Register Data Register (CSCD) + * + */ + + /* + * 0x40 is the Power Management Unit's Force Mode Register. + * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) + */ + + local_irq_disable(); + outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00,REG_CSCDR); + local_irq_enable(); /* wait till internal pipelines and */ + udelay(1000); /* buffers have cleaned up */ + + local_irq_disable(); + + /* now, set the CPU clock speed register (0x80) */ + outb_p(0x80,REG_CSCIR); + outb_p(elan_multiplier[state].val80h,REG_CSCDR); + + /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ + outb_p(0x40,REG_CSCIR); + outb_p(elan_multiplier[state].val40h,REG_CSCDR); + udelay(10000); + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + + +/** + * elanfreq_validatespeed: test if frequency range is valid + * @policy: the policy to validate + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int elanfreq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); +} + +static int elanfreq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + elanfreq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int elanfreq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int i; + int result; + + /* capability check */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) + return -ENODEV; + + /* max freq */ + if (!max_freq) + max_freq = elanfreq_get_cpu_frequency(0); + + /* table init */ + for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if (elanfreq_table[i].frequency > max_freq) + elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = elanfreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); + return 0; +} + + +static int elanfreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +#ifndef MODULE +/** + * elanfreq_setup - elanfreq command line parameter parsing + * + * elanfreq command line parameter. Use: + * elanfreq=66000 + * to set the maximum CPU frequency to 66 MHz. Note that in + * case you do not give this boot parameter, the maximum + * frequency will fall back to _current_ CPU frequency which + * might be lower. If you build this as a module, use the + * max_freq module parameter instead. + */ +static int __init elanfreq_setup(char *str) +{ + max_freq = simple_strtoul(str, &str, 0); + printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); + return 1; +} +__setup("elanfreq=", elanfreq_setup); +#endif + + +static struct freq_attr* elanfreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver elanfreq_driver = { + .get = elanfreq_get_cpu_frequency, + .verify = elanfreq_verify, + .target = elanfreq_target, + .init = elanfreq_cpu_init, + .exit = elanfreq_cpu_exit, + .name = "elanfreq", + .owner = THIS_MODULE, + .attr = elanfreq_attr, +}; + + +static int __init elanfreq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) { + printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); + return -ENODEV; + } + return cpufreq_register_driver(&elanfreq_driver); +} + + +static void __exit elanfreq_exit(void) +{ + cpufreq_unregister_driver(&elanfreq_driver); +} + + +module_param (max_freq, int, 0444); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); + +module_init(elanfreq_init); +module_exit(elanfreq_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c new file mode 100644 index 00000000000..461dabc4e49 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -0,0 +1,495 @@ +/* + * Cyrix MediaGX and NatSemi Geode Suspend Modulation + * (C) 2002 Zwane Mwaikambo + * (C) 2002 Hiroshi Miura + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Theoritical note: + * + * (see Geode(tm) CS5530 manual (rev.4.1) page.56) + * + * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 + * are based on Suspend Moduration. + * + * Suspend Modulation works by asserting and de-asserting the SUSP# pin + * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# + * the CPU enters an idle state. GX1 stops its core clock when SUSP# is + * asserted then power consumption is reduced. + * + * Suspend Modulation's OFF/ON duration are configurable + * with 'Suspend Modulation OFF Count Register' + * and 'Suspend Modulation ON Count Register'. + * These registers are 8bit counters that represent the number of + * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) + * to the processor. + * + * These counters define a ratio which is the effective frequency + * of operation of the system. + * + * OFF Count + * F_eff = Fgx * ---------------------- + * OFF Count + ON Count + * + * 0 <= On Count, Off Count <= 255 + * + * From these limits, we can get register values + * + * off_duration + on_duration <= MAX_DURATION + * on_duration = off_duration * (stock_freq - freq) / freq + * + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration + * + * + *--------------------------------------------------------------------------- + * + * ChangeLog: + * Dec. 12, 2003 Hiroshi Miura + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. + * + * Dec. 11, 2002 Hiroshi Miura + * - rewrite for Cyrix MediaGX Cx5510/5520 and + * NatSemi Geode Cs5530(A). + * + * Jul. ??, 2002 Zwane Mwaikambo + * - cs5530_mod patch for 2.4.19-rc1. + * + *--------------------------------------------------------------------------- + * + * Todo + * Test on machines with 5510, 5530, 5530A + */ + +/************************************************************************ + * Suspend Modulation - Definitions * + ************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* PCI config registers, all at F0 */ +#define PCI_PMER1 0x80 /* power management enable register 1 */ +#define PCI_PMER2 0x81 /* power management enable register 2 */ +#define PCI_PMER3 0x82 /* power management enable register 3 */ +#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ +#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ +#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ +#define PCI_MODON 0x95 /* suspend modulation ON counter register */ +#define PCI_SUSCFG 0x96 /* suspend configuration register */ + +/* PMER1 bits */ +#define GPM (1<<0) /* global power management */ +#define GIT (1<<1) /* globally enable PM device idle timers */ +#define GTR (1<<2) /* globally enable IO traps */ +#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ +#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ + +/* SUSCFG bits */ +#define SUSMOD (1<<0) /* enable/disable suspend modulation */ +/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ + /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ +#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ +/* the belows support only with cs5530A */ +#define PWRSVE_ISA (1<<3) /* stop ISA clock */ +#define PWRSVE (1<<4) /* active idle */ + +struct gxfreq_params { + u8 on_duration; + u8 off_duration; + u8 pci_suscfg; + u8 pci_pmer1; + u8 pci_pmer2; + struct pci_dev *cs55x0; +}; + +static struct gxfreq_params *gx_params; +static int stock_freq; + +/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ +static int pci_busclk = 0; +module_param (pci_busclk, int, 0444); + +/* maximum duration for which the cpu may be suspended + * (32us * MAX_DURATION). If no parameter is given, this defaults + * to 255. + * Note that this leads to a maximum of 8 ms(!) where the CPU clock + * is suspended -- processing power is just 0.39% of what it used to be, + * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ +static int max_duration = 255; +module_param (max_duration, int, 0444); + +/* For the default policy, we want at least some processing power + * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) + */ +#define POLICY_MIN_DIV 20 + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) + +/** + * we can detect a core multipiler from dir0_lsb + * from GX1 datasheet p.56, + * MULT[3:0]: + * 0000 = SYSCLK multiplied by 4 (test only) + * 0001 = SYSCLK multiplied by 10 + * 0010 = SYSCLK multiplied by 4 + * 0011 = SYSCLK multiplied by 6 + * 0100 = SYSCLK multiplied by 9 + * 0101 = SYSCLK multiplied by 5 + * 0110 = SYSCLK multiplied by 7 + * 0111 = SYSCLK multiplied by 8 + * of 33.3MHz + **/ +static int gx_freq_mult[16] = { + 4, 10, 4, 6, 9, 5, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +/**************************************************************** + * Low Level chipset interface * + ****************************************************************/ +static struct pci_device_id gx_chipset_tbl[] __initdata = { + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { 0, }, +}; + +/** + * gx_detect_chipset: + * + **/ +static __init struct pci_dev *gx_detect_chipset(void) +{ + struct pci_dev *gx_pci = NULL; + + /* check if CPU is a MediaGX or a Geode. */ + if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && + (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { + dprintk("error: no MediaGX/Geode processor found!\n"); + return NULL; + } + + /* detect which companion chip is used */ + while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) + return gx_pci; + } + + dprintk("error: no supported chipset found!\n"); + return NULL; +} + +/** + * gx_get_cpuspeed: + * + * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. + */ +static unsigned int gx_get_cpuspeed(unsigned int cpu) +{ + if ((gx_params->pci_suscfg & SUSMOD) == 0) + return stock_freq; + + return (stock_freq * gx_params->off_duration) + / (gx_params->on_duration + gx_params->off_duration); +} + +/** + * gx_validate_speed: + * determine current cpu speed + * + **/ + +static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) +{ + unsigned int i; + u8 tmp_on, tmp_off; + int old_tmp_freq = stock_freq; + int tmp_freq; + + *off_duration=1; + *on_duration=0; + + for (i=max_duration; i>0; i--) { + tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_on = i - tmp_off; + tmp_freq = (stock_freq * tmp_off) / i; + /* if this relation is closer to khz, use this. If it's equal, + * prefer it, too - lower latency */ + if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { + *on_duration = tmp_on; + *off_duration = tmp_off; + old_tmp_freq = tmp_freq; + } + } + + return old_tmp_freq; +} + + +/** + * gx_set_cpuspeed: + * set cpu speed in khz. + **/ + +static void gx_set_cpuspeed(unsigned int khz) +{ + u8 suscfg, pmer1; + unsigned int new_khz; + unsigned long flags; + struct cpufreq_freqs freqs; + + freqs.cpu = 0; + freqs.old = gx_get_cpuspeed(0); + + new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); + + freqs.new = new_khz; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + local_irq_save(flags); + + if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + switch (gx_params->cs55x0->device) { + case PCI_DEVICE_ID_CYRIX_5530_LEGACY: + pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; + /* FIXME: need to test other values -- Zwane,Miura */ + pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); + + if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg | SUSMOD; + } else { /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + } + break; + case PCI_DEVICE_ID_CYRIX_5520: + case PCI_DEVICE_ID_CYRIX_5510: + suscfg = gx_params->pci_suscfg | SUSMOD; + break; + default: + local_irq_restore(flags); + dprintk("fatal: try to set unknown chipset.\n"); + return; + } + } else { + suscfg = gx_params->pci_suscfg & ~(SUSMOD); + gx_params->off_duration = 0; + gx_params->on_duration = 0; + dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); + } + + pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); + pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); + + pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); + + local_irq_restore(flags); + + gx_params->pci_suscfg = suscfg; + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", + gx_params->on_duration * 32, gx_params->off_duration * 32); + dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); +} + +/**************************************************************** + * High level functions * + ****************************************************************/ + +/* + * cpufreq_gx_verify: test if frequency range is valid + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int cpufreq_gx_verify(struct cpufreq_policy *policy) +{ + unsigned int tmp_freq = 0; + u8 tmp1, tmp2; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + /* it needs to be assured that at least one supported frequency is + * within policy->min and policy->max. If it is not, policy->max + * needs to be increased until one freuqency is supported. + * policy->min may not be decreased, though. This way we guarantee a + * specific processing capacity. + */ + tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); + if (tmp_freq < policy->min) + tmp_freq += stock_freq / max_duration; + policy->min = tmp_freq; + if (policy->min > policy->max) + policy->max = tmp_freq; + tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); + if (tmp_freq > policy->max) + tmp_freq -= stock_freq / max_duration; + policy->max = tmp_freq; + if (policy->max < policy->min) + policy->max = policy->min; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + return 0; +} + +/* + * cpufreq_gx_target: + * + */ +static int cpufreq_gx_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + u8 tmp1, tmp2; + unsigned int tmp_freq; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + + tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); + while (tmp_freq < policy->min) { + tmp_freq += stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + while (tmp_freq > policy->max) { + tmp_freq -= stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + + gx_set_cpuspeed(tmp_freq); + + return 0; +} + +static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int maxfreq, curfreq; + + if (!policy || policy->cpu != 0) + return -ENODEV; + + /* determine maximum frequency */ + if (pci_busclk) { + maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } else if (cpu_khz) { + maxfreq = cpu_khz; + } else { + maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } + stock_freq = maxfreq; + curfreq = gx_get_cpuspeed(0); + + dprintk("cpu max frequency is %d.\n", maxfreq); + dprintk("cpu current frequency is %dkHz.\n",curfreq); + + /* setup basic struct for cpufreq API */ + policy->cpu = 0; + + if (max_duration < POLICY_MIN_DIV) + policy->min = maxfreq / max_duration; + else + policy->min = maxfreq / POLICY_MIN_DIV; + policy->max = maxfreq; + policy->cur = curfreq; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.min_freq = maxfreq / max_duration; + policy->cpuinfo.max_freq = maxfreq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + + return 0; +} + +/* + * cpufreq_gx_init: + * MediaGX/Geode GX initialize cpufreq driver + */ +static struct cpufreq_driver gx_suspmod_driver = { + .get = gx_get_cpuspeed, + .verify = cpufreq_gx_verify, + .target = cpufreq_gx_target, + .init = cpufreq_gx_cpu_init, + .name = "gx-suspmod", + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gx_init(void) +{ + int ret; + struct gxfreq_params *params; + struct pci_dev *gx_pci; + + /* Test if we have the right hardware */ + if ((gx_pci = gx_detect_chipset()) == NULL) + return -ENODEV; + + /* check whether module parameters are sane */ + if (max_duration > 0xff) + max_duration = 0xff; + + dprintk("geode suspend modulation available.\n"); + + params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); + if (params == NULL) + return -ENOMEM; + + params->cs55x0 = gx_pci; + gx_params = params; + + /* keep cs55x0 configurations */ + pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); + pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); + pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); + pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); + pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); + + if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + kfree(params); + return ret; /* register error! */ + } + + return 0; +} + +static void __exit cpufreq_gx_exit(void) +{ + cpufreq_unregister_driver(&gx_suspmod_driver); + pci_dev_put(gx_params->cs55x0); + kfree(gx_params); +} + +MODULE_AUTHOR ("Hiroshi Miura "); +MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); +MODULE_LICENSE ("GPL"); + +module_init(cpufreq_gx_init); +module_exit(cpufreq_gx_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c new file mode 100644 index 00000000000..f0cce3c2dc3 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -0,0 +1,1024 @@ +/* + * (C) 2001-2004 Dave Jones. + * (C) 2002 Padraig Brady. + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by VIA. + * + * VIA have currently 3 different versions of Longhaul. + * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. + * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. + * Version 2 of longhaul is backward compatible with v1, but adds + * LONGHAUL MSR for purpose of both frequency and voltage scaling. + * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). + * Version 3 of longhaul got renamed to Powersaver and redesigned + * to use only the POWERSAVER MSR at 0x110a. + * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. + * It's pretty much the same feature wise to longhaul v2, though + * there is provision for scaling FSB too, but this doesn't work + * too well in practice so we don't even try to use this. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "longhaul.h" + +#define PFX "longhaul: " + +#define TYPE_LONGHAUL_V1 1 +#define TYPE_LONGHAUL_V2 2 +#define TYPE_POWERSAVER 3 + +#define CPU_SAMUEL 1 +#define CPU_SAMUEL2 2 +#define CPU_EZRA 3 +#define CPU_EZRA_T 4 +#define CPU_NEHEMIAH 5 +#define CPU_NEHEMIAH_C 6 + +/* Flags */ +#define USE_ACPI_C3 (1 << 1) +#define USE_NORTHBRIDGE (1 << 2) + +static int cpu_model; +static unsigned int numscales=16; +static unsigned int fsb; + +static const struct mV_pos *vrm_mV_table; +static const unsigned char *mV_vrm_table; + +static unsigned int highest_speed, lowest_speed; /* kHz */ +static unsigned int minmult, maxmult; +static int can_scale_voltage; +static struct acpi_processor *pr = NULL; +static struct acpi_processor_cx *cx = NULL; +static u32 acpi_regs_addr; +static u8 longhaul_flags; +static unsigned int longhaul_index; + +/* Module parameters */ +static int scale_voltage; +static int disable_acpi_c3; +static int revid_errata; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) + + +/* Clock ratios multiplied by 10 */ +static int clock_ratio[32]; +static int eblcr_table[32]; +static int longhaul_version; +static struct cpufreq_frequency_table *longhaul_table; + +#ifdef CONFIG_CPU_FREQ_DEBUG +static char speedbuffer[8]; + +static char *print_speed(int speed) +{ + if (speed < 1000) { + snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + return speedbuffer; + } + + if (speed%1000 == 0) + snprintf(speedbuffer, sizeof(speedbuffer), + "%dGHz", speed/1000); + else + snprintf(speedbuffer, sizeof(speedbuffer), + "%d.%dGHz", speed/1000, (speed%1000)/100); + + return speedbuffer; +} +#endif + + +static unsigned int calc_speed(int mult) +{ + int khz; + khz = (mult/10)*fsb; + if (mult%10) + khz += fsb/2; + khz *= 1000; + return khz; +} + + +static int longhaul_get_cpu_mult(void) +{ + unsigned long invalue=0,lo, hi; + + rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; + if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { + if (lo & (1<<27)) + invalue+=16; + } + return eblcr_table[invalue]; +} + +/* For processor with BCR2 MSR */ + +static void do_longhaul1(unsigned int clock_ratio_index) +{ + union msr_bcr2 bcr2; + + rdmsrl(MSR_VIA_BCR2, bcr2.val); + /* Enable software clock multiplier */ + bcr2.bits.ESOFTBF = 1; + bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; + + /* Sync to timer tick */ + safe_halt(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_BCR2, bcr2.val); + /* Invoke transition */ + ACPI_FLUSH_CPU_CACHE(); + halt(); + + /* Disable software clock multiplier */ + local_irq_disable(); + rdmsrl(MSR_VIA_BCR2, bcr2.val); + bcr2.bits.ESOFTBF = 0; + wrmsrl(MSR_VIA_BCR2, bcr2.val); +} + +/* For processor with Longhaul MSR */ + +static void do_powersaver(int cx_address, unsigned int clock_ratio_index, + unsigned int dir) +{ + union msr_longhaul longhaul; + u32 t; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Setup new frequency */ + if (!revid_errata) + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + else + longhaul.bits.RevisionKey = 0; + longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; + longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + /* Setup new voltage */ + if (can_scale_voltage) + longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; + /* Sync to timer tick */ + safe_halt(); + /* Raise voltage if necessary */ + if (can_scale_voltage && dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } + + /* Change frequency on next halt or sleep */ + longhaul.bits.EnableSoftBusRatio = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + /* Disable bus ratio bit */ + longhaul.bits.EnableSoftBusRatio = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + + /* Reduce voltage if necessary */ + if (can_scale_voltage && !dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } +} + +/** + * longhaul_set_cpu_frequency() + * @clock_ratio_index : bitpattern of the new multiplier. + * + * Sets a new clock ratio. + */ + +static void longhaul_setstate(unsigned int table_index) +{ + unsigned int clock_ratio_index; + int speed, mult; + struct cpufreq_freqs freqs; + unsigned long flags; + unsigned int pic1_mask, pic2_mask; + u16 bm_status = 0; + u32 bm_timeout = 1000; + unsigned int dir = 0; + + clock_ratio_index = longhaul_table[table_index].index; + /* Safety precautions */ + mult = clock_ratio[clock_ratio_index & 0x1f]; + if (mult == -1) + return; + speed = calc_speed(mult); + if ((speed > highest_speed) || (speed < lowest_speed)) + return; + /* Voltage transition before frequency transition? */ + if (can_scale_voltage && longhaul_index < table_index) + dir = 1; + + freqs.old = calc_speed(longhaul_get_cpu_mult()); + freqs.new = speed; + freqs.cpu = 0; /* longhaul.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", + fsb, mult/10, mult%10, print_speed(speed/1000)); +retry_loop: + preempt_disable(); + local_irq_save(flags); + + pic2_mask = inb(0xA1); + pic1_mask = inb(0x21); /* works on C3. save mask. */ + outb(0xFF,0xA1); /* Overkill */ + outb(0xFE,0x21); /* TMR0 only */ + + /* Wait while PCI bus is busy. */ + if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE + || ((pr != NULL) && pr->flags.bm_control))) { + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + while (bm_status && bm_timeout) { + outw(1 << 4, acpi_regs_addr); + bm_timeout--; + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + } + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Disable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); + } + switch (longhaul_version) { + + /* + * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) + * Software controlled multipliers only. + */ + case TYPE_LONGHAUL_V1: + do_longhaul1(clock_ratio_index); + break; + + /* + * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] + * + * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) + * Nehemiah can do FSB scaling too, but this has never been proven + * to work in practice. + */ + case TYPE_LONGHAUL_V2: + case TYPE_POWERSAVER: + if (longhaul_flags & USE_ACPI_C3) { + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); + do_powersaver(cx->address, clock_ratio_index, dir); + } else { + do_powersaver(0, clock_ratio_index, dir); + } + break; + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Enable arbiters */ + outb(0, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Enable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + } + outb(pic2_mask,0xA1); /* restore mask */ + outb(pic1_mask,0x21); + + local_irq_restore(flags); + preempt_enable(); + + freqs.new = calc_speed(longhaul_get_cpu_mult()); + /* Check if requested frequency is set. */ + if (unlikely(freqs.new != speed)) { + printk(KERN_INFO PFX "Failed to set requested frequency!\n"); + /* Revision ID = 1 but processor is expecting revision key + * equal to 0. Jumpers at the bottom of processor will change + * multiplier and FSB, but will not change bits in Longhaul + * MSR nor enable voltage scaling. */ + if (!revid_errata) { + printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " + "option.\n"); + revid_errata = 1; + msleep(200); + goto retry_loop; + } + /* Why ACPI C3 sometimes doesn't work is a mystery for me. + * But it does happen. Processor is entering ACPI C3 state, + * but it doesn't change frequency. I tried poking various + * bits in northbridge registers, but without success. */ + if (longhaul_flags & USE_ACPI_C3) { + printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); + longhaul_flags &= ~USE_ACPI_C3; + if (revid_errata) { + printk(KERN_INFO PFX "Disabling \"Ignore " + "Revision ID\" option.\n"); + revid_errata = 0; + } + msleep(200); + goto retry_loop; + } + /* This shouldn't happen. Longhaul ver. 2 was reported not + * working on processors without voltage scaling, but with + * RevID = 1. RevID errata will make things right. Just + * to be 100% sure. */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); + longhaul_version = TYPE_LONGHAUL_V1; + msleep(200); + goto retry_loop; + } + } + /* Report true CPU frequency */ + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + if (!bm_timeout) + printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); +} + +/* + * Centaur decided to make life a little more tricky. + * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. + * Samuel2 and above have to try and guess what the FSB is. + * We do this by assuming we booted at maximum multiplier, and interpolate + * between that value multiplied by possible FSBs and cpu_mhz which + * was calculated at boot time. Really ugly, but no other way to do this. + */ + +#define ROUNDING 0xf + +static int guess_fsb(int mult) +{ + int speed = cpu_khz / 1000; + int i; + int speeds[] = { 666, 1000, 1333, 2000 }; + int f_max, f_min; + + for (i = 0; i < 4; i++) { + f_max = ((speeds[i] * mult) + 50) / 100; + f_max += (ROUNDING / 2); + f_min = f_max - ROUNDING; + if ((speed <= f_max) && (speed >= f_min)) + return speeds[i] / 10; + } + return 0; +} + + +static int __init longhaul_get_ranges(void) +{ + unsigned int i, j, k = 0; + unsigned int ratio; + int mult; + + /* Get current frequency */ + mult = longhaul_get_cpu_mult(); + if (mult == -1) { + printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); + return -EINVAL; + } + fsb = guess_fsb(mult); + if (fsb == 0) { + printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); + return -EINVAL; + } + /* Get max multiplier - as we always did. + * Longhaul MSR is usefull only when voltage scaling is enabled. + * C3 is booting at max anyway. */ + maxmult = mult; + /* Get min multiplier */ + switch (cpu_model) { + case CPU_NEHEMIAH: + minmult = 50; + break; + case CPU_NEHEMIAH_C: + minmult = 40; + break; + default: + minmult = 30; + break; + } + + dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", + minmult/10, minmult%10, maxmult/10, maxmult%10); + + highest_speed = calc_speed(maxmult); + lowest_speed = calc_speed(minmult); + dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, + print_speed(lowest_speed/1000), + print_speed(highest_speed/1000)); + + if (lowest_speed == highest_speed) { + printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + return -EINVAL; + } + if (lowest_speed > highest_speed) { + printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + lowest_speed, highest_speed); + return -EINVAL; + } + + longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); + if(!longhaul_table) + return -ENOMEM; + + for (j = 0; j < numscales; j++) { + ratio = clock_ratio[j]; + if (ratio == -1) + continue; + if (ratio > maxmult || ratio < minmult) + continue; + longhaul_table[k].frequency = calc_speed(ratio); + longhaul_table[k].index = j; + k++; + } + if (k <= 1) { + kfree(longhaul_table); + return -ENODEV; + } + /* Sort */ + for (j = 0; j < k - 1; j++) { + unsigned int min_f, min_i; + min_f = longhaul_table[j].frequency; + min_i = j; + for (i = j + 1; i < k; i++) { + if (longhaul_table[i].frequency < min_f) { + min_f = longhaul_table[i].frequency; + min_i = i; + } + } + if (min_i != j) { + unsigned int temp; + temp = longhaul_table[j].frequency; + longhaul_table[j].frequency = longhaul_table[min_i].frequency; + longhaul_table[min_i].frequency = temp; + temp = longhaul_table[j].index; + longhaul_table[j].index = longhaul_table[min_i].index; + longhaul_table[min_i].index = temp; + } + } + + longhaul_table[k].frequency = CPUFREQ_TABLE_END; + + /* Find index we are running on */ + for (j = 0; j < k; j++) { + if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + longhaul_index = j; + break; + } + } + return 0; +} + + +static void __init longhaul_setup_voltagescaling(void) +{ + union msr_longhaul longhaul; + struct mV_pos minvid, maxvid, vid; + unsigned int j, speed, pos, kHz_step, numvscales; + int min_vid_speed; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!(longhaul.bits.RevisionID & 1)) { + printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); + return; + } + + if (!longhaul.bits.VRMRev) { + printk(KERN_INFO PFX "VRM 8.5\n"); + vrm_mV_table = &vrm85_mV[0]; + mV_vrm_table = &mV_vrm85[0]; + } else { + printk(KERN_INFO PFX "Mobile VRM\n"); + if (cpu_model < CPU_NEHEMIAH) + return; + vrm_mV_table = &mobilevrm_mV[0]; + mV_vrm_table = &mV_mobilevrm[0]; + } + + minvid = vrm_mV_table[longhaul.bits.MinimumVID]; + maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; + + if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { + printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " + "Voltage scaling disabled.\n", + minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); + return; + } + + if (minvid.mV == maxvid.mV) { + printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " + "both %d.%03d. Voltage scaling disabled\n", + maxvid.mV/1000, maxvid.mV%1000); + return; + } + + /* How many voltage steps */ + numvscales = maxvid.pos - minvid.pos + 1; + printk(KERN_INFO PFX + "Max VID=%d.%03d " + "Min VID=%d.%03d, " + "%d possible voltage scales\n", + maxvid.mV/1000, maxvid.mV%1000, + minvid.mV/1000, minvid.mV%1000, + numvscales); + + /* Calculate max frequency at min voltage */ + j = longhaul.bits.MinMHzBR; + if (longhaul.bits.MinMHzBR4) + j += 16; + min_vid_speed = eblcr_table[j]; + if (min_vid_speed == -1) + return; + switch (longhaul.bits.MinMHzFSB) { + case 0: + min_vid_speed *= 13333; + break; + case 1: + min_vid_speed *= 10000; + break; + case 3: + min_vid_speed *= 6666; + break; + default: + return; + break; + } + if (min_vid_speed >= highest_speed) + return; + /* Calculate kHz for one voltage step */ + kHz_step = (highest_speed - min_vid_speed) / numvscales; + + j = 0; + while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { + speed = longhaul_table[j].frequency; + if (speed > min_vid_speed) + pos = (speed - min_vid_speed) / kHz_step + minvid.pos; + else + pos = minvid.pos; + longhaul_table[j].index |= mV_vrm_table[pos] << 8; + vid = vrm_mV_table[mV_vrm_table[pos]]; + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); + j++; + } + + can_scale_voltage = 1; + printk(KERN_INFO PFX "Voltage scaling enabled.\n"); +} + + +static int longhaul_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, longhaul_table); +} + + +static int longhaul_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int table_index = 0; + unsigned int i; + unsigned int dir = 0; + u8 vid, current_vid; + + if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) + return -EINVAL; + + /* Don't set same frequency again */ + if (longhaul_index == table_index) + return 0; + + if (!can_scale_voltage) + longhaul_setstate(table_index); + else { + /* On test system voltage transitions exceeding single + * step up or down were turning motherboard off. Both + * "ondemand" and "userspace" are unsafe. C7 is doing + * this in hardware, C3 is old and we need to do this + * in software. */ + i = longhaul_index; + current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + if (table_index > longhaul_index) + dir = 1; + while (i != table_index) { + vid = (longhaul_table[i].index >> 8) & 0x1f; + if (vid != current_vid) { + longhaul_setstate(i); + current_vid = vid; + msleep(200); + } + if (dir) + i++; + else + i--; + } + longhaul_setstate(table_index); + } + longhaul_index = table_index; + return 0; +} + + +static unsigned int longhaul_get(unsigned int cpu) +{ + if (cpu) + return 0; + return calc_speed(longhaul_get_cpu_mult()); +} + +static acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) +{ + struct acpi_device *d; + + if ( acpi_bus_get_device(obj_handle, &d) ) { + return 0; + } + *return_value = (void *)acpi_driver_data(d); + return 1; +} + +/* VIA don't support PM2 reg, but have something similar */ +static int enable_arbiter_disable(void) +{ + struct pci_dev *dev; + int status = 1; + int reg; + u8 pci_cmd; + + /* Find PLE133 host bridge */ + reg = 0x78; + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, + NULL); + /* Find CLE266 host bridge */ + if (dev == NULL) { + reg = 0x76; + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_862X_0, NULL); + /* Find CN400 V-Link host bridge */ + if (dev == NULL) + dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); + } + if (dev != NULL) { + /* Enable access to port 0x22 */ + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + pci_cmd |= 1<<7; + pci_write_config_byte(dev, reg, pci_cmd); + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + printk(KERN_ERR PFX + "Can't enable access to port 0x22.\n"); + status = 0; + } + } + pci_dev_put(dev); + return status; + } + return 0; +} + +static int longhaul_setup_southbridge(void) +{ + struct pci_dev *dev; + u8 pci_cmd; + + /* Find VT8235 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); + if (dev == NULL) + /* Find VT8237 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8237, NULL); + if (dev != NULL) { + /* Set transition time to max */ + pci_read_config_byte(dev, 0xec, &pci_cmd); + pci_cmd &= ~(1 << 2); + pci_write_config_byte(dev, 0xec, pci_cmd); + pci_read_config_byte(dev, 0xe4, &pci_cmd); + pci_cmd &= ~(1 << 7); + pci_write_config_byte(dev, 0xe4, pci_cmd); + pci_read_config_byte(dev, 0xe5, &pci_cmd); + pci_cmd |= 1 << 7; + pci_write_config_byte(dev, 0xe5, pci_cmd); + /* Get address of ACPI registers block*/ + pci_read_config_byte(dev, 0x81, &pci_cmd); + if (pci_cmd & 1 << 7) { + pci_read_config_dword(dev, 0x88, &acpi_regs_addr); + acpi_regs_addr &= 0xff00; + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + } + + pci_dev_put(dev); + return 1; + } + return 0; +} + +static int __init longhaul_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + char *cpuname=NULL; + int ret; + u32 lo, hi; + + /* Check what we have on this motherboard */ + switch (c->x86_model) { + case 6: + cpu_model = CPU_SAMUEL; + cpuname = "C3 'Samuel' [C5A]"; + longhaul_version = TYPE_LONGHAUL_V1; + memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); + memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); + break; + + case 7: + switch (c->x86_mask) { + case 0: + longhaul_version = TYPE_LONGHAUL_V1; + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + /* Note, this is not a typo, early Samuel2's had + * Samuel1 ratios. */ + memcpy(clock_ratio, samuel1_clock_ratio, + sizeof(samuel1_clock_ratio)); + memcpy(eblcr_table, samuel2_eblcr, + sizeof(samuel2_eblcr)); + break; + case 1 ... 15: + longhaul_version = TYPE_LONGHAUL_V1; + if (c->x86_mask < 8) { + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + } else { + cpu_model = CPU_EZRA; + cpuname = "C3 'Ezra' [C5C]"; + } + memcpy(clock_ratio, ezra_clock_ratio, + sizeof(ezra_clock_ratio)); + memcpy(eblcr_table, ezra_eblcr, + sizeof(ezra_eblcr)); + break; + } + break; + + case 8: + cpu_model = CPU_EZRA_T; + cpuname = "C3 'Ezra-T' [C5M]"; + longhaul_version = TYPE_POWERSAVER; + numscales=32; + memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); + memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); + break; + + case 9: + longhaul_version = TYPE_POWERSAVER; + numscales = 32; + memcpy(clock_ratio, + nehemiah_clock_ratio, + sizeof(nehemiah_clock_ratio)); + memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); + switch (c->x86_mask) { + case 0 ... 1: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah A' [C5XLOE]"; + break; + case 2 ... 4: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah B' [C5XLOH]"; + break; + case 5 ... 15: + cpu_model = CPU_NEHEMIAH_C; + cpuname = "C3 'Nehemiah C' [C5P]"; + break; + } + break; + + default: + cpuname = "Unknown"; + break; + } + /* Check Longhaul ver. 2 */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + rdmsr(MSR_VIA_LONGHAUL, lo, hi); + if (lo == 0 && hi == 0) + /* Looks like MSR isn't present */ + longhaul_version = TYPE_LONGHAUL_V1; + } + + printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + switch (longhaul_version) { + case TYPE_LONGHAUL_V1: + case TYPE_LONGHAUL_V2: + printk ("Longhaul v%d supported.\n", longhaul_version); + break; + case TYPE_POWERSAVER: + printk ("Powersaver supported.\n"); + break; + }; + + /* Doesn't hurt */ + longhaul_setup_southbridge(); + + /* Find ACPI data for processor */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, &longhaul_walk_callback, + NULL, (void *)&pr); + + /* Check ACPI support for C3 state */ + if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address > 0 && cx->latency <= 1000) + longhaul_flags |= USE_ACPI_C3; + } + /* Disable if it isn't working */ + if (disable_acpi_c3) + longhaul_flags &= ~USE_ACPI_C3; + /* Check if northbridge is friendly */ + if (enable_arbiter_disable()) + longhaul_flags |= USE_NORTHBRIDGE; + + /* Check ACPI support for bus master arbiter disable */ + if (!(longhaul_flags & USE_ACPI_C3 + || longhaul_flags & USE_NORTHBRIDGE) + && ((pr == NULL) || !(pr->flags.bm_control))) { + printk(KERN_ERR PFX + "No ACPI support. Unsupported northbridge.\n"); + return -ENODEV; + } + + if (longhaul_flags & USE_NORTHBRIDGE) + printk(KERN_INFO PFX "Using northbridge support.\n"); + if (longhaul_flags & USE_ACPI_C3) + printk(KERN_INFO PFX "Using ACPI support.\n"); + + ret = longhaul_get_ranges(); + if (ret != 0) + return ret; + + if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) + longhaul_setup_voltagescaling(); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 200000; /* nsec */ + policy->cur = calc_speed(longhaul_get_cpu_mult()); + + ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); + if (ret) + return ret; + + cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); + + return 0; +} + +static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* longhaul_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver longhaul_driver = { + .verify = longhaul_verify, + .target = longhaul_target, + .get = longhaul_get, + .init = longhaul_cpu_init, + .exit = __devexit_p(longhaul_cpu_exit), + .name = "longhaul", + .owner = THIS_MODULE, + .attr = longhaul_attr, +}; + + +static int __init longhaul_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) + return -ENODEV; + +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + return -ENODEV; + } +#endif +#ifdef CONFIG_X86_IO_APIC + if (cpu_has_apic) { + printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + return -ENODEV; + } +#endif + switch (c->x86_model) { + case 6 ... 9: + return cpufreq_register_driver(&longhaul_driver); + case 10: + printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); + default: + ;; + } + + return -ENODEV; +} + + +static void __exit longhaul_exit(void) +{ + int i; + + for (i=0; i < numscales; i++) { + if (clock_ratio[i] == maxmult) { + longhaul_setstate(i); + break; + } + } + + cpufreq_unregister_driver(&longhaul_driver); + kfree(longhaul_table); +} + +/* Even if BIOS is exporting ACPI C3 state, and it is used + * with success when CPU is idle, this state doesn't + * trigger frequency transition in some cases. */ +module_param (disable_acpi_c3, int, 0644); +MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); +/* Change CPU voltage with frequency. Very usefull to save + * power, but most VIA C3 processors aren't supporting it. */ +module_param (scale_voltage, int, 0644); +MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); +/* Force revision key to 0 for processors which doesn't + * support voltage scaling, but are introducing itself as + * such. */ +module_param(revid_errata, int, 0644); +MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); + +MODULE_AUTHOR ("Dave Jones "); +MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(longhaul_init); +module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h new file mode 100644 index 00000000000..4fcc320997d --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -0,0 +1,353 @@ +/* + * longhaul.h + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * VIA-specific information + */ + +union msr_bcr2 { + struct { + unsigned Reseved:19, // 18:0 + ESOFTBF:1, // 19 + Reserved2:3, // 22:20 + CLOCKMUL:4, // 26:23 + Reserved3:5; // 31:27 + } bits; + unsigned long val; +}; + +union msr_longhaul { + struct { + unsigned RevisionID:4, // 3:0 + RevisionKey:4, // 7:4 + EnableSoftBusRatio:1, // 8 + EnableSoftVID:1, // 9 + EnableSoftBSEL:1, // 10 + Reserved:3, // 11:13 + SoftBusRatio4:1, // 14 + VRMRev:1, // 15 + SoftBusRatio:4, // 19:16 + SoftVID:5, // 24:20 + Reserved2:3, // 27:25 + SoftBSEL:2, // 29:28 + Reserved3:2, // 31:30 + MaxMHzBR:4, // 35:32 + MaximumVID:5, // 40:36 + MaxMHzFSB:2, // 42:41 + MaxMHzBR4:1, // 43 + Reserved4:4, // 47:44 + MinMHzBR:4, // 51:48 + MinimumVID:5, // 56:52 + MinMHzFSB:2, // 58:57 + MinMHzBR4:1, // 59 + Reserved5:4; // 63:60 + } bits; + unsigned long long val; +}; + +/* + * Clock ratio tables. Div/Mod by 10 to get ratio. + * The eblcr ones specify the ratio read from the CPU. + * The clock_ratio ones specify what to write to the CPU. + */ + +/* + * VIA C3 Samuel 1 & Samuel 2 (stepping 0) + */ +static const int __initdata samuel1_clock_ratio[16] = { + -1, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + -1, /* 0100 -> RESERVED */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + -1, /* 1111 -> RESERVED */ +}; + +static const int __initdata samuel1_eblcr[16] = { + 50, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + -1, /* 0111 -> RESERVED */ + -1, /* 1000 -> RESERVED */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + -1, /* 1100 -> RESERVED */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Samuel2 Stepping 1->15 + */ +static const int __initdata samuel2_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 110, /* 0111 -> 11.0x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 130, /* 1110 -> 13.0x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Ezra + */ +static const int __initdata ezra_clock_ratio[16] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ +}; + +static const int __initdata ezra_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 (Ezra-T) [C5M]. + */ +static const int __initdata ezrat_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + + -1, /* 0000 -> RESERVED (10.0x) */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (9.0x)*/ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> RESERVED (12.0x) */ +}; + +static const int __initdata ezrat_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + + -1, /* 0000 -> RESERVED (9.0x) */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (10.0x)*/ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + -1, /* 1100 -> RESERVED (12.0x) */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145, /* 1111 -> 14.5x */ +}; + +/* + * VIA C3 Nehemiah */ + +static const int __initdata nehemiah_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + -1, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + -1, /* 0000 -> 10.0x */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> 9.0x */ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> 12.0x */ +}; + +static const int __initdata nehemiah_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 160, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + 90, /* 0000 -> 9.0x */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + 100, /* 0011 -> 10.0x */ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + 120, /* 1100 -> 12.0x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145 /* 1111 -> 14.5x */ +}; + +/* + * Voltage scales. Div/Mod by 1000 to get actual voltage. + * Which scale to use depends on the VRM type in use. + */ + +struct mV_pos { + unsigned short mV; + unsigned short pos; +}; + +static const struct mV_pos __initdata vrm85_mV[32] = { + {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, + {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, + {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, + {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, + {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, + {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, + {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, + {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} +}; + +static const unsigned char __initdata mV_vrm85[32] = { + 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, + 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, + 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, + 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 +}; + +static const struct mV_pos __initdata mobilevrm_mV[32] = { + {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, + {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, + {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, + {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, + {975, 15}, {950, 14}, {925, 13}, {900, 12}, + {875, 11}, {850, 10}, {825, 9}, {800, 8}, + {775, 7}, {750, 6}, {725, 5}, {700, 4}, + {675, 3}, {650, 2}, {625, 1}, {600, 0} +}; + +static const unsigned char __initdata mV_mobilevrm[32] = { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +}; + diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c new file mode 100644 index 00000000000..b2689514295 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -0,0 +1,325 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) + +static struct cpufreq_driver longrun_driver; + +/** + * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz + * values into per cent values. In TMTA microcode, the following is valid: + * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int longrun_low_freq, longrun_high_freq; + + +/** + * longrun_get_policy - get the current LongRun policy + * @policy: struct cpufreq_policy where current policy is written into + * + * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS + * and MSR_TMTA_LONGRUN_CTRL + */ +static void __init longrun_get_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); + if (msr_lo & 0x01) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); + msr_lo &= 0x0000007F; + msr_hi &= 0x0000007F; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + policy->min = policy->max = longrun_high_freq; + } else { + policy->min = longrun_low_freq + msr_lo * + ((longrun_high_freq - longrun_low_freq) / 100); + policy->max = longrun_low_freq + msr_hi * + ((longrun_high_freq - longrun_low_freq) / 100); + } + policy->cpu = 0; +} + + +/** + * longrun_set_policy - sets a new CPUFreq policy + * @policy: new policy + * + * Sets a new CPUFreq policy on LongRun-capable processors. This function + * has to be called with cpufreq_driver locked. + */ +static int longrun_set_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + u32 pctg_lo, pctg_hi; + + if (!policy) + return -EINVAL; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + pctg_lo = pctg_hi = 100; + } else { + pctg_lo = (policy->min - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + pctg_hi = (policy->max - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + } + + if (pctg_hi > 100) + pctg_hi = 100; + if (pctg_lo > pctg_hi) + pctg_lo = pctg_hi; + + /* performance or economy mode */ + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + msr_lo &= 0xFFFFFFFE; + switch (policy->policy) { + case CPUFREQ_POLICY_PERFORMANCE: + msr_lo |= 0x00000001; + break; + case CPUFREQ_POLICY_POWERSAVE: + break; + } + wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + + /* lower and upper boundary */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_lo |= pctg_lo; + msr_hi |= pctg_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + return 0; +} + + +/** + * longrun_verify_poliy - verifies a new CPUFreq policy + * @policy: the policy to verify + * + * Validates a new CPUFreq policy. This function has to be called with + * cpufreq_driver locked. + */ +static int longrun_verify_policy(struct cpufreq_policy *policy) +{ + if (!policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static unsigned int longrun_get(unsigned int cpu) +{ + u32 eax, ebx, ecx, edx; + + if (cpu) + return 0; + + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + dprintk("cpuid eax is %u\n", eax); + + return (eax * 1000); +} + +/** + * longrun_determine_freqs - determines the lowest and highest possible core frequency + * @low_freq: an int to put the lowest frequency into + * @high_freq: an int to put the highest frequency into + * + * Determines the lowest and highest possible core frequencies on this CPU. + * This is necessary to calculate the performance percentage according to + * TMTA rules: + * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, + unsigned int *high_freq) +{ + u32 msr_lo, msr_hi; + u32 save_lo, save_hi; + u32 eax, ebx, ecx, edx; + u32 try_hi; + struct cpuinfo_x86 *c = cpu_data; + + if (!low_freq || !high_freq) + return -EINVAL; + + if (cpu_has(c, X86_FEATURE_LRTI)) { + /* if the LongRun Table Interface is present, the + * detection is a bit easier: + * For minimum frequency, read out the maximum + * level (msr_hi), write that into "currently + * selected level", and read out the frequency. + * For maximum frequency, read out level zero. + */ + /* minimum */ + rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); + wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *low_freq = msr_lo * 1000; /* to kHz */ + + /* maximum */ + wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *high_freq = msr_lo * 1000; /* to kHz */ + + dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + return 0; + } + + /* set the upper border to the value determined during TSC init */ + *high_freq = (cpu_khz / 1000); + *high_freq = *high_freq * 1000; + dprintk("high frequency is %u kHz\n", *high_freq); + + /* get current borders */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + save_lo = msr_lo & 0x0000007F; + save_hi = msr_hi & 0x0000007F; + + /* if current perf_pctg is larger than 90%, we need to decrease the + * upper limit to make the calculation more accurate. + */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + /* try decreasing in 10% steps, some processors react only + * on some barrier values */ + for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { + /* set to 0 to try_hi perf_pctg */ + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_hi |= try_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + /* read out current core MHz and current perf_pctg */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + + /* restore values */ + wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); + } + dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); + + /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + * eqals + * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) + * + * high_freq * perf_pctg is stored tempoarily into "ebx". + */ + ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ + + if ((ecx > 95) || (ecx == 0) || (eax < ebx)) + return -EIO; + + edx = (eax - ebx) / (100 - ecx); + *low_freq = edx * 1000; /* back to kHz */ + + dprintk("low frequency is %u kHz\n", *low_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + + return 0; +} + + +static int __init longrun_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* detect low and high frequency */ + result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); + if (result) + return result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = longrun_low_freq; + policy->cpuinfo.max_freq = longrun_high_freq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + longrun_get_policy(policy); + + return 0; +} + + +static struct cpufreq_driver longrun_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = longrun_verify_policy, + .setpolicy = longrun_set_policy, + .get = longrun_get, + .init = longrun_cpu_init, + .name = "longrun", + .owner = THIS_MODULE, +}; + + +/** + * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver + * + * Initializes the LongRun support. + */ +static int __init longrun_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_TRANSMETA || + !cpu_has(c, X86_FEATURE_LONGRUN)) + return -ENODEV; + + return cpufreq_register_driver(&longrun_driver); +} + + +/** + * longrun_exit - unregisters LongRun support + */ +static void __exit longrun_exit(void) +{ + cpufreq_unregister_driver(&longrun_driver); +} + + +MODULE_AUTHOR ("Dominik Brodowski "); +MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); +MODULE_LICENSE ("GPL"); + +module_init(longrun_init); +module_exit(longrun_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c new file mode 100644 index 00000000000..4c76b511e19 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -0,0 +1,316 @@ +/* + * Pentium 4/Xeon CPU on demand clock modulation/speed scaling + * (C) 2002 - 2003 Dominik Brodowski + * (C) 2002 Zwane Mwaikambo + * (C) 2002 Arjan van de Ven + * (C) 2002 Tora T. Engstad + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Date Errata Description + * 20020525 N44, O17 12.5% or 25% DC causes lockup + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "speedstep-lib.h" + +#define PFX "p4-clockmod: " +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) + +/* + * Duty Cycle (3bits), note DC_DISABLE is not specified in + * intel docs i just use it to mean disable + */ +enum { + DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, + DC_64PT, DC_75PT, DC_88PT, DC_DISABLE +}; + +#define DC_ENTRIES 8 + + +static int has_N44_O17_errata[NR_CPUS]; +static unsigned int stock_freq; +static struct cpufreq_driver p4clockmod_driver; +static unsigned int cpufreq_p4_get(unsigned int cpu); + +static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) +{ + u32 l, h; + + if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) + return -EINVAL; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); + + if (l & 0x01) + dprintk("CPU#%d currently thermal throttled\n", cpu); + + if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) + newstate = DC_38PT; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + if (newstate == DC_DISABLE) { + dprintk("CPU#%d disabling modulation\n", cpu); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); + } else { + dprintk("CPU#%d setting duty cycle to %d%%\n", + cpu, ((125 * newstate) / 10)); + /* bits 63 - 5 : reserved + * bit 4 : enable/disable + * bits 3-1 : duty cycle + * bit 0 : reserved + */ + l = (l & ~14); + l = l | (1<<4) | ((newstate & 0x7)<<1); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); + } + + return 0; +} + + +static struct cpufreq_frequency_table p4clockmod_table[] = { + {DC_RESV, CPUFREQ_ENTRY_INVALID}, + {DC_DFLT, 0}, + {DC_25PT, 0}, + {DC_38PT, 0}, + {DC_50PT, 0}, + {DC_64PT, 0}, + {DC_75PT, 0}, + {DC_88PT, 0}, + {DC_DISABLE, 0}, + {DC_RESV, CPUFREQ_TABLE_END}, +}; + + +static int cpufreq_p4_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = DC_RESV; + struct cpufreq_freqs freqs; + int i; + + if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = cpufreq_p4_get(policy->cpu); + freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; + + if (freqs.new == freqs.old) + return 0; + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software + * Developer's Manual, Volume 3 + */ + for_each_cpu_mask(i, policy->cpus) + cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +static int cpufreq_p4_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); +} + + +static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x06) { + if (cpu_has(c, X86_FEATURE_EST)) + printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " + "The acpi-cpufreq module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + switch (c->x86_model) { + case 0x0E: /* Core */ + case 0x0F: /* Core Duo */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + case 0x0D: /* Pentium M (Dothan) */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + /* fall through */ + case 0x09: /* Pentium M (Banias) */ + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + } + } + + if (c->x86 != 0xF) { + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + return 0; + } + + /* on P-4s, the TSC runs with constant frequency independent whether + * throttling is active or not. */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { + printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " + "The speedstep-ich or acpi cpufreq modules offer " + "voltage scaling in addition of frequency scaling. " + "You should use either one instead of p4-clockmod, " + "if possible.\n"); + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); + } + + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); +} + + + +static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + int cpuid = 0; + unsigned int i; + +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + /* Errata workaround */ + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + switch (cpuid) { + case 0x0f07: + case 0x0f0a: + case 0x0f11: + case 0x0f12: + has_N44_O17_errata[policy->cpu] = 1; + dprintk("has errata -- disabling low frequencies\n"); + } + + /* get max frequency */ + stock_freq = cpufreq_p4_get_frequency(c); + if (!stock_freq) + return -EINVAL; + + /* table init */ + for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if ((i<2) && (has_N44_O17_errata[policy->cpu])) + p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; + else + p4clockmod_table[i].frequency = (stock_freq * i)/8; + } + cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* assumed */ + policy->cur = stock_freq; + + return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); +} + + +static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int cpufreq_p4_get(unsigned int cpu) +{ + u32 l, h; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + + if (l & 0x10) { + l = l >> 1; + l &= 0x7; + } else + l = DC_DISABLE; + + if (l != DC_DISABLE) + return (stock_freq * l / 8); + + return stock_freq; +} + +static struct freq_attr* p4clockmod_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver p4clockmod_driver = { + .verify = cpufreq_p4_verify, + .target = cpufreq_p4_target, + .init = cpufreq_p4_cpu_init, + .exit = cpufreq_p4_cpu_exit, + .get = cpufreq_p4_get, + .name = "p4-clockmod", + .owner = THIS_MODULE, + .attr = p4clockmod_attr, +}; + + +static int __init cpufreq_p4_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int ret; + + /* + * THERM_CONTROL is architectural for IA32 now, so + * we can rely on the capability checks + */ + if (c->x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + + if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || + !test_bit(X86_FEATURE_ACC, c->x86_capability)) + return -ENODEV; + + ret = cpufreq_register_driver(&p4clockmod_driver); + if (!ret) + printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); + + return (ret); +} + + +static void __exit cpufreq_p4_exit(void) +{ + cpufreq_unregister_driver(&p4clockmod_driver); +} + + +MODULE_AUTHOR ("Zwane Mwaikambo "); +MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); +MODULE_LICENSE ("GPL"); + +late_initcall(cpufreq_p4_init); +module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c new file mode 100644 index 00000000000..f89524051e4 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -0,0 +1,256 @@ +/* + * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ + +static unsigned int busfreq; /* FSB, in 10 kHz */ +static unsigned int max_multiplier; + + +/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ +static struct cpufreq_frequency_table clock_ratio[] = { + {45, /* 000 -> 4.5x */ 0}, + {50, /* 001 -> 5.0x */ 0}, + {40, /* 010 -> 4.0x */ 0}, + {55, /* 011 -> 5.5x */ 0}, + {20, /* 100 -> 2.0x */ 0}, + {30, /* 101 -> 3.0x */ 0}, + {60, /* 110 -> 6.0x */ 0}, + {35, /* 111 -> 3.5x */ 0}, + {0, CPUFREQ_TABLE_END} +}; + + +/** + * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier + * + * Returns the current setting of the frequency multiplier. Core clock + * speed is frequency of the Front-Side Bus multiplied with this value. + */ +static int powernow_k6_get_cpu_multiplier(void) +{ + u64 invalue = 0; + u32 msrval; + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + return clock_ratio[(invalue >> 5)&7].index; +} + + +/** + * powernow_k6_set_state - set the PowerNow! multiplier + * @best_i: clock_ratio[best_i] is the target multiplier + * + * Tries to change the PowerNow! multiplier + */ +static void powernow_k6_set_state (unsigned int best_i) +{ + unsigned long outvalue=0, invalue=0; + unsigned long msrval; + struct cpufreq_freqs freqs; + + if (clock_ratio[best_i].index > max_multiplier) { + printk(KERN_ERR "cpufreq: invalid target frequency\n"); + return; + } + + freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); + freqs.new = busfreq * clock_ratio[best_i].index; + freqs.cpu = 0; /* powernow-k6.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* we now need to transform best_i to the BVC format, see AMD#23446 */ + + outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = invalue & 0xf; + outvalue = outvalue | invalue; + outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return; +} + + +/** + * powernow_k6_verify - verifies a new CPUfreq policy + * @policy: new policy + * + * Policy must be within lowest and highest possible CPU Frequency, + * and at least one possible state must be within min and max. + */ +static int powernow_k6_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); +} + + +/** + * powernow_k6_setpolicy - sets a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * sets a new CPUFreq policy + */ +static int powernow_k6_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) + return -EINVAL; + + powernow_k6_set_state(newstate); + + return 0; +} + + +static int powernow_k6_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + /* get frequencies */ + max_multiplier = powernow_k6_get_cpu_multiplier(); + busfreq = cpu_khz / max_multiplier; + + /* table init */ + for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + if (clock_ratio[i].index > max_multiplier) + clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; + else + clock_ratio[i].frequency = busfreq * clock_ratio[i].index; + } + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = busfreq * max_multiplier; + + result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); + + return 0; +} + + +static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int i; + for (i=0; i<8; i++) { + if (i==max_multiplier) + powernow_k6_set_state(i); + } + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int powernow_k6_get(unsigned int cpu) +{ + return busfreq * powernow_k6_get_cpu_multiplier(); +} + +static struct freq_attr* powernow_k6_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_k6_driver = { + .verify = powernow_k6_verify, + .target = powernow_k6_target, + .init = powernow_k6_cpu_init, + .exit = powernow_k6_cpu_exit, + .get = powernow_k6_get, + .name = "powernow-k6", + .owner = THIS_MODULE, + .attr = powernow_k6_attr, +}; + + +/** + * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver + * + * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported + * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero + * on success. + */ +static int __init powernow_k6_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || + ((c->x86_model != 12) && (c->x86_model != 13))) + return -ENODEV; + + if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { + printk("cpufreq: PowerNow IOPORT region already used.\n"); + return -EIO; + } + + if (cpufreq_register_driver(&powernow_k6_driver)) { + release_region (POWERNOW_IOPORT, 16); + return -EINVAL; + } + + return 0; +} + + +/** + * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support + * + * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. + */ +static void __exit powernow_k6_exit(void) +{ + cpufreq_unregister_driver(&powernow_k6_driver); + release_region (POWERNOW_IOPORT, 16); +} + + +MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE ("GPL"); + +module_init(powernow_k6_init); +module_exit(powernow_k6_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c new file mode 100644 index 00000000000..ca3e1d34188 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -0,0 +1,703 @@ +/* + * AMD K7 Powernow driver. + * (C) 2003 Dave Jones on behalf of SuSE Labs. + * (C) 2003-2004 Dave Jones + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. + * - We cli/sti on stepping A0 CPUs around the FID/VID transition. + * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. + * - We disable half multipliers if ACPI is used on A0 stepping CPUs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +#include +#include +#endif + +#include "powernow-k7.h" + +#define PFX "powernow: " + + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags; + u16 settlingtime; + u8 reserved1; + u8 numpst; +}; + +struct pst_s { + u32 cpuid; + u8 fsbspeed; + u8 maxfid; + u8 startvid; + u8 numpstates; +}; + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +union powernow_acpi_control_t { + struct { + unsigned long fid:5, + vid:5, + sgtc:20, + res1:2; + } bits; + unsigned long val; +}; +#endif + +#ifdef CONFIG_CPU_FREQ_DEBUG +/* divide by 1000 to get VCore voltage in V. */ +static const int mobile_vid_table[32] = { + 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, + 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, + 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, + 1075, 1050, 1025, 1000, 975, 950, 925, 0, +}; +#endif + +/* divide by 10 to get FID. */ +static const int fid_codes[32] = { + 110, 115, 120, 125, 50, 55, 60, 65, + 70, 75, 80, 85, 90, 95, 100, 105, + 30, 190, 40, 200, 130, 135, 140, 210, + 150, 225, 160, 165, 170, 180, -1, -1, +}; + +/* This parameter is used in order to force ACPI instead of legacy method for + * configuration purpose. + */ + +static int acpi_force; + +static struct cpufreq_frequency_table *powernow_table; + +static unsigned int can_scale_bus; +static unsigned int can_scale_vid; +static unsigned int minimum_speed=-1; +static unsigned int maximum_speed; +static unsigned int number_scales; +static unsigned int fsb; +static unsigned int latency; +static char have_a0; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) + +static int check_fsb(unsigned int fsbspeed) +{ + int delta; + unsigned int f = fsb / 1000; + + delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; + return (delta < 5); +} + +static int check_powernow(void) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int maxei, eax, ebx, ecx, edx; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { +#ifdef MODULE + printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); +#endif + return 0; + } + + /* Get maximum capabilities */ + maxei = cpuid_eax (0x80000000); + if (maxei < 0x80000007) { /* Any powernow info ? */ +#ifdef MODULE + printk (KERN_INFO PFX "No powernow capabilities detected\n"); +#endif + return 0; + } + + if ((c->x86_model == 6) && (c->x86_mask == 0)) { + printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); + have_a0 = 1; + } + + cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + + /* Check we can actually do something before we say anything.*/ + if (!(edx & (1 << 1 | 1 << 2))) + return 0; + + printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + + if (edx & 1 << 1) { + printk ("frequency"); + can_scale_bus=1; + } + + if ((edx & (1 << 1 | 1 << 2)) == 0x6) + printk (" and "); + + if (edx & 1 << 2) { + printk ("voltage"); + can_scale_vid=1; + } + + printk (".\n"); + return 1; +} + + +static int get_ranges (unsigned char *pst) +{ + unsigned int j; + unsigned int speed; + u8 fid, vid; + + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + if (!powernow_table) + return -ENOMEM; + + for (j=0 ; j < number_scales; j++) { + fid = *pst++; + + powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; + powernow_table[j].index = fid; /* lower 8 bits */ + + speed = powernow_table[j].frequency; + + if ((fid_codes[fid] % 10)==5) { +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (have_a0 == 1) + powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; +#endif + } + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + + vid = *pst++; + powernow_table[j].index |= (vid << 8); /* upper 8 bits */ + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + } + powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; + powernow_table[number_scales].index = 0; + + return 0; +} + + +static void change_FID(int fid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.FID != fid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.FID = fid; + fidvidctl.bits.VIDC = 0; + fidvidctl.bits.FIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_VID(int vid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.VID != vid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.VID = vid; + fidvidctl.bits.FIDC = 0; + fidvidctl.bits.VIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_speed (unsigned int index) +{ + u8 fid, vid; + struct cpufreq_freqs freqs; + union msr_fidvidstatus fidvidstatus; + int cfid; + + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in powernow_decode_bios, + * vid are the upper 8 bits. + */ + + fid = powernow_table[index].index & 0xFF; + vid = (powernow_table[index].index & 0xFF00) >> 8; + + freqs.cpu = 0; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + freqs.old = fsb * fid_codes[cfid] / 10; + + freqs.new = powernow_table[index].frequency; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Now do the magic poking into the MSRs. */ + + if (have_a0 == 1) /* A0 errata 5 */ + local_irq_disable(); + + if (freqs.old > freqs.new) { + /* Going down, so change FID first */ + change_FID(fid); + change_VID(vid); + } else { + /* Going up, so change VID first */ + change_VID(vid); + change_FID(fid); + } + + + if (have_a0 == 1) + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +} + + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + +static struct acpi_processor_performance *acpi_processor_perf; + +static int powernow_acpi_init(void) +{ + int i; + int retval = 0; + union powernow_acpi_control_t pc; + + if (acpi_processor_perf != NULL && powernow_table != NULL) { + retval = -EINVAL; + goto err0; + } + + acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!acpi_processor_perf) { + retval = -ENOMEM; + goto err0; + } + + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { + retval = -EIO; + goto err1; + } + + if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + number_scales = acpi_processor_perf->state_count; + + if (number_scales < 2) { + retval = -ENODEV; + goto err2; + } + + powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + if (!powernow_table) { + retval = -ENOMEM; + goto err2; + } + + pc.val = (unsigned long) acpi_processor_perf->states[0].control; + for (i = 0; i < number_scales; i++) { + u8 fid, vid; + struct acpi_processor_px *state = + &acpi_processor_perf->states[i]; + unsigned int speed, speed_mhz; + + pc.val = (unsigned long) state->control; + dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + i, + (u32) state->core_frequency, + (u32) state->power, + (u32) state->transition_latency, + (u32) state->control, + pc.bits.sgtc); + + vid = pc.bits.vid; + fid = pc.bits.fid; + + powernow_table[i].frequency = fsb * fid_codes[fid] / 10; + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + + speed = powernow_table[i].frequency; + speed_mhz = speed / 1000; + + /* processor_perflib will multiply the MHz value by 1000 to + * get a KHz value (e.g. 1266000). However, powernow-k7 works + * with true KHz values (e.g. 1266768). To ensure that all + * powernow frequencies are available, we must ensure that + * ACPI doesn't restrict them, so we round up the MHz value + * to ensure that perflib's computed KHz value is greater than + * or equal to powernow's KHz value. + */ + if (speed % 1000 > 0) + speed_mhz++; + + if ((fid_codes[fid] % 10)==5) { + if (have_a0 == 1) + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed_mhz, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + + if (state->core_frequency != speed_mhz) { + state->core_frequency = speed_mhz; + dprintk(" Corrected ACPI frequency to %d\n", + speed_mhz); + } + + if (latency < pc.bits.sgtc) + latency = pc.bits.sgtc; + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + } + + powernow_table[i].frequency = CPUFREQ_TABLE_END; + powernow_table[i].index = 0; + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err2: + acpi_processor_unregister_performance(acpi_processor_perf, 0); +err1: + kfree(acpi_processor_perf); +err0: + printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + acpi_processor_perf = NULL; + return retval; +} +#else +static int powernow_acpi_init(void) +{ + printk(KERN_INFO PFX "no support for ACPI processor found." + " Please recompile your kernel with ACPI processor\n"); + return -EINVAL; +} +#endif + +static int powernow_decode_bios (int maxfid, int startvid) +{ + struct psb_s *psb; + struct pst_s *pst; + unsigned int i, j; + unsigned char *p; + unsigned int etuple; + unsigned int ret; + + etuple = cpuid_eax(0x80000001); + + for (i=0xC0000; i < 0xffff0 ; i+=16) { + + p = phys_to_virt(i); + + if (memcmp(p, "AMDK7PNOW!", 10) == 0){ + dprintk ("Found PSB header at %p\n", p); + psb = (struct psb_s *) p; + dprintk ("Table version: 0x%x\n", psb->tableversion); + if (psb->tableversion != 0x12) { + printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); + return -ENODEV; + } + + dprintk ("Flags: 0x%x\n", psb->flags); + if ((psb->flags & 1)==0) { + dprintk ("Mobile voltage regulator\n"); + } else { + dprintk ("Desktop voltage regulator\n"); + } + + latency = psb->settlingtime; + if (latency < 100) { + printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." + "Should be at least 100. Correcting.\n", latency); + latency = 100; + } + dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); + dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); + + p += sizeof (struct psb_s); + + pst = (struct pst_s *) p; + + for (j=0; jnumpst; j++) { + pst = (struct pst_s *) p; + number_scales = pst->numpstates; + + if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && + (maxfid==pst->maxfid) && (startvid==pst->startvid)) + { + dprintk ("PST:%d (@%p)\n", j, pst); + dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); + + ret = get_ranges ((char *) pst + sizeof (struct pst_s)); + return ret; + } else { + unsigned int k; + p = (char *) pst + sizeof (struct pst_s); + for (k=0; k= 5) + m += 5; + + m /= 10; + + sgtc = 100 * m * latency; + sgtc = sgtc / 3; + if (sgtc > 0xfffff) { + printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); + sgtc = 0xfffff; + } + return sgtc; +} + +static unsigned int powernow_get(unsigned int cpu) +{ + union msr_fidvidstatus fidvidstatus; + unsigned int cfid; + + if (cpu) + return 0; + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + + return (fsb * fid_codes[cfid] / 10); +} + + +static int __init acer_cpufreq_pst(struct dmi_system_id *d) +{ + printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); + printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); + printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); + return 0; +} + +/* + * Some Athlon laptops have really fucked PST tables. + * A BIOS update is all that can save them. + * Mention this, and disable cpufreq. + */ +static struct dmi_system_id __initdata powernow_dmi_table[] = { + { + .callback = acer_cpufreq_pst, + .ident = "Acer Aspire", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), + DMI_MATCH(DMI_BIOS_VERSION, "3A71"), + }, + }, + { } +}; + +static int __init powernow_cpu_init (struct cpufreq_policy *policy) +{ + union msr_fidvidstatus fidvidstatus; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + + recalibrate_cpu_khz(); + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; + if (!fsb) { + printk(KERN_WARNING PFX "can not determine bus frequency\n"); + return -EINVAL; + } + dprintk("FSB: %3dMHz\n", fsb/1000); + + if (dmi_check_system(powernow_dmi_table) || acpi_force) { + printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + result = powernow_acpi_init(); + } else { + result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + if (result) { + printk (KERN_INFO PFX "Trying ACPI perflib\n"); + maximum_speed = 0; + minimum_speed = -1; + latency = 0; + result = powernow_acpi_init(); + if (result) { + printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); + printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); + } + } else { + /* SGTC use the bus clock as timer */ + latency = fixup_sgtc(); + printk(KERN_INFO PFX "SGTC: %d\n", latency); + } + } + + if (result) + return result; + + printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", + minimum_speed/1000, maximum_speed/1000); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); + + policy->cur = powernow_get(0); + + cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, powernow_table); +} + +static int powernow_cpu_exit (struct cpufreq_policy *policy) { + cpufreq_frequency_table_put_attr(policy->cpu); + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (acpi_processor_perf) { + acpi_processor_unregister_performance(acpi_processor_perf, 0); + kfree(acpi_processor_perf); + } +#endif + + kfree(powernow_table); + return 0; +} + +static struct freq_attr* powernow_table_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_driver = { + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, +}; + +static int __init powernow_init (void) +{ + if (check_powernow()==0) + return -ENODEV; + return cpufreq_register_driver(&powernow_driver); +} + + +static void __exit powernow_exit (void) +{ + cpufreq_unregister_driver(&powernow_driver); +} + +module_param(acpi_force, int, 0444); +MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); + +MODULE_AUTHOR ("Dave Jones "); +MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(powernow_init); +module_exit(powernow_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h new file mode 100644 index 00000000000..f8a63b3664e --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h @@ -0,0 +1,44 @@ +/* + * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * AMD-specific information + * + */ + +union msr_fidvidctl { + struct { + unsigned FID:5, // 4:0 + reserved1:3, // 7:5 + VID:5, // 12:8 + reserved2:3, // 15:13 + FIDC:1, // 16 + VIDC:1, // 17 + reserved3:2, // 19:18 + FIDCHGRATIO:1, // 20 + reserved4:11, // 31-21 + SGTC:20, // 32:51 + reserved5:12; // 63:52 + } bits; + unsigned long long val; +}; + +union msr_fidvidstatus { + struct { + unsigned CFID:5, // 4:0 + reserved1:3, // 7:5 + SFID:5, // 12:8 + reserved2:3, // 15:13 + MFID:5, // 20:16 + reserved3:11, // 31:21 + CVID:5, // 36:32 + reserved4:3, // 39:37 + SVID:5, // 44:40 + reserved5:3, // 47:45 + MVID:5, // 52:48 + reserved6:11; // 63:53 + } bits; + unsigned long long val; +}; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c new file mode 100644 index 00000000000..34ed53a0673 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -0,0 +1,1363 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Support : mark.langsdorf@amd.com + * + * Based on the powernow-k7.c module written by Dave Jones. + * (C) 2003 Dave Jones on behalf of SuSE Labs + * (C) 2004 Dominik Brodowski + * (C) 2004 Pavel Machek + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Valuable input gratefully received from Dave Jones, Pavel Machek, + * Dominik Brodowski, Jacob Shin, and others. + * Originally developed by Paul Devriendt. + * Processor information obtained from Chapter 9 (Power and Thermal Management) + * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD + * Opteron Processors" available for download from www.amd.com + * + * Tables for specific CPUs can be inferred from + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for current / set_cpus_allowed() */ + +#include +#include +#include + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +#include +#include +#include +#endif + +#define PFX "powernow-k8: " +#define BFX PFX "BIOS error: " +#define VERSION "version 2.00.00" +#include "powernow-k8.h" + +/* serialize freq changes */ +static DEFINE_MUTEX(fidvid_mutex); + +static struct powernow_k8_data *powernow_data[NR_CPUS]; + +static int cpu_family = CPU_OPTERON; + +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + +/* Return a frequency in MHz, given an input fid */ +static u32 find_freq_from_fid(u32 fid) +{ + return 800 + (fid * 100); +} + + +/* Return a frequency in KHz, given an input fid */ +static u32 find_khz_freq_from_fid(u32 fid) +{ + return 1000 * find_freq_from_fid(fid); +} + +/* Return a frequency in MHz, given an input fid and did */ +static u32 find_freq_from_fiddid(u32 fid, u32 did) +{ + return 100 * (fid + 0x10) >> did; +} + +static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) +{ + return 1000 * find_freq_from_fiddid(fid, did); +} + +static u32 find_fid_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return lo & HW_PSTATE_FID_MASK; +} + +static u32 find_did_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; +} + +/* Return the vco fid for an input fid + * + * Each "low" fid has corresponding "high" fid, and you can get to "low" fids + * only from corresponding high fids. This returns "high" fid corresponding to + * "low" one. + */ +static u32 convert_fid_to_vco_fid(u32 fid) +{ + if (fid < HI_FID_TABLE_BOTTOM) + return 8 + (2 * fid); + else + return fid; +} + +/* + * Return 1 if the pending bit is set. Unless we just instructed the processor + * to transition to a new state, seeing this bit set is really bad news. + */ +static int pending_bit_stuck(void) +{ + u32 lo, hi; + + if (cpu_family == CPU_HW_PSTATE) + return 0; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; +} + +/* + * Update the global current fid / vid values from the status msr. + * Returns 1 on error. + */ +static int query_current_values_with_pending_wait(struct powernow_k8_data *data) +{ + u32 lo, hi; + u32 i = 0; + + if (cpu_family == CPU_HW_PSTATE) { + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); + data->currfid = lo & HW_PSTATE_FID_MASK; + data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + return 0; + } + do { + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); + return 1; + } + rdmsr(MSR_FIDVID_STATUS, lo, hi); + } while (lo & MSR_S_LO_CHANGE_PENDING); + + data->currvid = hi & MSR_S_HI_CURRENT_VID; + data->currfid = lo & MSR_S_LO_CURRENT_FID; + + return 0; +} + +/* the isochronous relief time */ +static void count_off_irt(struct powernow_k8_data *data) +{ + udelay((1 << data->irt) * 10); + return; +} + +/* the voltage stabalization time */ +static void count_off_vst(struct powernow_k8_data *data) +{ + udelay(data->vstable * VST_UNITS_20US); + return; +} + +/* need to init the control msr to a safe value (for each cpu) */ +static void fidvid_msr_init(void) +{ + u32 lo, hi; + u8 fid, vid; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + vid = hi & MSR_S_HI_CURRENT_VID; + fid = lo & MSR_S_LO_CURRENT_FID; + lo = fid | (vid << MSR_C_LO_VID_SHIFT); + hi = MSR_C_HI_STP_GNT_BENIGN; + dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); + wrmsr(MSR_FIDVID_CTL, lo, hi); +} + + +/* write the new fid value along with the other control fields to the msr */ +static int write_new_fid(struct powernow_k8_data *data, u32 fid) +{ + u32 lo; + u32 savevid = data->currvid; + u32 i = 0; + + if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on fid write\n"); + return 1; + } + + lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", + fid, lo, data->plllock * PLL_LOCK_CONVERSION); + + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + count_off_irt(data); + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", + savevid, data->currvid); + return 1; + } + + if (fid != data->currfid) { + printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, + data->currfid); + return 1; + } + + return 0; +} + +/* Write a new vid to the hardware */ +static int write_new_vid(struct powernow_k8_data *data, u32 vid) +{ + u32 lo; + u32 savefid = data->currfid; + int i = 0; + + if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on vid write\n"); + return 1; + } + + lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", + vid, lo, STOP_GRANT_5NS); + + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (vid != data->currvid) { + printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, + data->currvid); + return 1; + } + + return 0; +} + +/* + * Reduce the vid by the max of step or reqvid. + * Decreasing vid codes represent increasing voltages: + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. + */ +static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) +{ + if ((data->currvid - reqvid) > step) + reqvid = data->currvid - step; + + if (write_new_vid(data, reqvid)) + return 1; + + count_off_vst(data); + + return 0; +} + +/* Change hardware pstate by single MSR write */ +static int transition_pstate(struct powernow_k8_data *data, u32 pstate) +{ + wrmsr(MSR_PSTATE_CTRL, pstate, 0); + data->currfid = find_fid_from_pstate(pstate); + return 0; +} + +/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ +static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) +{ + if (core_voltage_pre_transition(data, reqvid)) + return 1; + + if (core_frequency_transition(data, reqfid)) + return 1; + + if (core_voltage_post_transition(data, reqvid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((reqfid != data->currfid) || (reqvid != data->currvid)) { + printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", + smp_processor_id(), + reqfid, reqvid, data->currfid, data->currvid); + return 1; + } + + dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", + smp_processor_id(), data->currfid, data->currvid); + + return 0; +} + +/* Phase 1 - core voltage transition ... setup voltage */ +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 rvosteps = data->rvo; + u32 savefid = data->currfid; + u32 maxvid, lo; + + dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqvid, data->rvo); + + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + + while (data->currvid > reqvid) { + dprintk("ph1: curr 0x%x, req vid 0x%x\n", + data->currvid, reqvid); + if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) + return 1; + } + + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { + rvosteps = 0; + } else { + dprintk("ph1: changing vid for rvo, req 0x%x\n", + data->currvid - 1); + if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) + return 1; + rvosteps--; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); + return 1; + } + + dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 2 - core frequency transition */ +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) +{ + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; + + if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", + reqfid, data->currfid); + return 1; + } + + if (data->currfid == reqfid) { + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); + return 0; + } + + dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqfid); + + vcoreqfid = convert_fid_to_vco_fid(reqfid); + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + + while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + + if (reqfid > data->currfid) { + if (data->currfid > LO_FID_TABLE_TOP) { + if (write_new_fid(data, data->currfid + fid_interval)) { + return 1; + } + } else { + if (write_new_fid + (data, 2 + convert_fid_to_vco_fid(data->currfid))) { + return 1; + } + } + } else { + if (write_new_fid(data, data->currfid - fid_interval)) + return 1; + } + + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + } + + if (write_new_fid(data, reqfid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (data->currfid != reqfid) { + printk(KERN_ERR PFX + "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", + data->currfid, reqfid); + return 1; + } + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", + savevid, data->currvid); + return 1; + } + + dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 3 - core voltage transition flow ... jump to the final vid. */ +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 savefid = data->currfid; + u32 savereqvid = reqvid; + + dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid); + + if (reqvid != data->currvid) { + if (write_new_vid(data, reqvid)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX + "ph3: bad fid change, save 0x%x, curr 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (data->currvid != reqvid) { + printk(KERN_ERR PFX + "ph3: failed vid transition\n, req 0x%x, curr 0x%x", + reqvid, data->currvid); + return 1; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savereqvid != data->currvid) { + dprintk("ph3 failed, currvid 0x%x\n", data->currvid); + return 1; + } + + if (savefid != data->currfid) { + dprintk("ph3 failed, currfid changed 0x%x\n", + data->currfid); + return 1; + } + + dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +static int check_supported_cpu(unsigned int cpu) +{ + cpumask_t oldmask = CPU_MASK_ALL; + u32 eax, ebx, ecx, edx; + unsigned int rc = 0; + + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); + goto out; + } + + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) + goto out; + + eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && + ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) + goto out; + + if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { + printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + goto out; + } + + eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); + if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { + printk(KERN_INFO PFX + "No frequency change capabilities detected\n"); + goto out; + } + + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX "Power state transitions not supported\n"); + goto out; + } + } else { /* must be a HW Pstate capable processor */ + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) + cpu_family = CPU_HW_PSTATE; + else + goto out; + } + + rc = 1; + +out: + set_cpus_allowed(current, oldmask); + return rc; +} + +static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + unsigned int j; + u8 lastfid = 0xff; + + for (j = 0; j < data->numps; j++) { + if (pst[j].vid > LEAST_VID) { + printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); + return -EINVAL; + } + if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ + printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ + printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { + /* Only first fid is allowed to be in "low" range */ + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + return -EINVAL; + } + if (pst[j].fid < lastfid) + lastfid = pst[j].fid; + } + if (lastfid & 1) { + printk(KERN_ERR BFX "lastfid invalid\n"); + return -EINVAL; + } + if (lastfid > LO_FID_TABLE_TOP) + printk(KERN_INFO BFX "first fid not from lo freq table\n"); + + return 0; +} + +static void print_basics(struct powernow_k8_data *data) +{ + int j; + for (j = 0; j < data->numps; j++) { + if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (cpu_family == CPU_HW_PSTATE) { + printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", + j, + (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); + } else { + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + j, + data->powernow_table[j].index & 0xff, + data->powernow_table[j].frequency/1000, + data->powernow_table[j].index >> 8); + } + } + } + if (data->batps) + printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); +} + +static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + struct cpufreq_frequency_table *powernow_table; + unsigned int j; + + if (data->batps) { /* use ACPI support to get full speed on mains power */ + printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); + data->numps = data->batps; + } + + for ( j=1; jnumps; j++ ) { + if (pst[j-1].fid >= pst[j].fid) { + printk(KERN_ERR PFX "PST out of sequence\n"); + return -EINVAL; + } + } + + if (data->numps < 2) { + printk(KERN_ERR PFX "no p states to transition\n"); + return -ENODEV; + } + + if (check_pst_table(data, pst, maxvid)) + return -EINVAL; + + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->numps + 1)), GFP_KERNEL); + if (!powernow_table) { + printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); + return -ENOMEM; + } + + for (j = 0; j < data->numps; j++) { + powernow_table[j].index = pst[j].fid; /* lower 8 bits */ + powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ + powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); + } + powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; + powernow_table[data->numps].index = 0; + + if (query_current_values_with_pending_wait(data)) { + kfree(powernow_table); + return -EIO; + } + + dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); + data->powernow_table = powernow_table; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + + for (j = 0; j < data->numps; j++) + if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) + return 0; + + dprintk("currfid/vid do not match PST, ignoring\n"); + return 0; +} + +/* Find and validate the PSB/PST table in BIOS. */ +static int find_psb_table(struct powernow_k8_data *data) +{ + struct psb_s *psb; + unsigned int i; + u32 mvs; + u8 maxvid; + u32 cpst = 0; + u32 thiscpuid; + + for (i = 0xc0000; i < 0xffff0; i += 0x10) { + /* Scan BIOS looking for the signature. */ + /* It can not be at ffff0 - it is too big. */ + + psb = phys_to_virt(i); + if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) + continue; + + dprintk("found PSB header at 0x%p\n", psb); + + dprintk("table vers: 0x%x\n", psb->tableversion); + if (psb->tableversion != PSB_VERSION_1_4) { + printk(KERN_ERR BFX "PSB table is not v1.4\n"); + return -ENODEV; + } + + dprintk("flags: 0x%x\n", psb->flags1); + if (psb->flags1) { + printk(KERN_ERR BFX "unknown flags\n"); + return -ENODEV; + } + + data->vstable = psb->vstable; + dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); + + dprintk("flags2: 0x%x\n", psb->flags2); + data->rvo = psb->flags2 & 3; + data->irt = ((psb->flags2) >> 2) & 3; + mvs = ((psb->flags2) >> 4) & 3; + data->vidmvs = 1 << mvs; + data->batps = ((psb->flags2) >> 6) & 3; + + dprintk("ramp voltage offset: %d\n", data->rvo); + dprintk("isochronous relief time: %d\n", data->irt); + dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); + + dprintk("numpst: 0x%x\n", psb->num_tables); + cpst = psb->num_tables; + if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ + thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { + cpst = 1; + } + } + if (cpst != 1) { + printk(KERN_ERR BFX "numpst must be 1\n"); + return -ENODEV; + } + + data->plllock = psb->plllocktime; + dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); + dprintk("maxfid: 0x%x\n", psb->maxfid); + dprintk("maxvid: 0x%x\n", psb->maxvid); + maxvid = psb->maxvid; + + data->numps = psb->numps; + dprintk("numpstates: 0x%x\n", data->numps); + return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); + } + /* + * If you see this message, complain to BIOS manufacturer. If + * he tells you "we do not support Linux" or some similar + * nonsense, remember that Windows 2000 uses the same legacy + * mechanism that the old Linux PSB driver uses. Tell them it + * is broken with Windows 2000. + * + * The reference to the AMD documentation is chapter 9 in the + * BIOS and Kernel Developer's Guide, which is available on + * www.amd.com + */ + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + return -ENODEV; +} + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) +{ + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + return; + + data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; +} + +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) +{ + struct cpufreq_frequency_table *powernow_table; + int ret_val; + + if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { + dprintk("register performance failed: bad ACPI data\n"); + return -EIO; + } + + /* verify the data contained in the ACPI structures */ + if (data->acpi_data.state_count <= 1) { + dprintk("No ACPI P-States\n"); + goto err_out; + } + + if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + dprintk("Invalid control/status registers (%x - %x)\n", + data->acpi_data.control_register.space_id, + data->acpi_data.status_register.space_id); + goto err_out; + } + + /* fill in data->powernow_table */ + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->acpi_data.state_count + 1)), GFP_KERNEL); + if (!powernow_table) { + dprintk("powernow_table memory alloc failure\n"); + goto err_out; + } + + if (cpu_family == CPU_HW_PSTATE) + ret_val = fill_powernow_table_pstate(data, powernow_table); + else + ret_val = fill_powernow_table_fidvid(data, powernow_table); + if (ret_val) + goto err_out_mem; + + powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].index = 0; + data->powernow_table = powernow_table; + + /* fill in data */ + data->numps = data->acpi_data.state_count; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + powernow_k8_acpi_pst_values(data, 0); + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err_out_mem: + kfree(powernow_table); + +err_out: + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + + /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + data->acpi_data.state_count = 0; + + return -ENODEV; +} + +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 index; + u32 hi = 0, lo = 0; + u32 fid; + u32 did; + + index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + if (index > MAX_HW_PSTATE) { + printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + } + rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); + if (!(hi & HW_PSTATE_VALID_MASK)) { + dprintk("invalid pstate %d, ignoring\n", index); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + fid = lo & HW_PSTATE_FID_MASK; + did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + + dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); + + powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); + + powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + int cntlofreq = 0; + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 fid; + u32 vid; + + if (data->exttype) { + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + } else { + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + } + + dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); + + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + powernow_table[i].frequency = find_khz_freq_from_fid(fid); + + /* verify frequency is OK */ + if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || + (powernow_table[i].frequency < (MIN_FREQ * 1000))) { + dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ + if (vid == VID_OFF) { + dprintk("invalid vid %u, ignoring\n", vid); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this one ... */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + return 1; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; + } + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) +{ + if (data->acpi_data.state_count) + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); +} + +#else +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ + +/* Take a frequency, and issue the fid/vid transition command */ +static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 vid = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* fid/vid correctness check for k8 */ + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in find_psb_table, vid + * are the upper 8 bits. + */ + fid = data->powernow_table[index].index & 0xFF; + vid = (data->powernow_table[index].index & 0xFF00) >> 8; + + dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((data->currvid == vid) && (data->currfid == fid)) { + dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", + fid, vid); + return 0; + } + + if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", + data->currfid, fid); + return 1; + } + + dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", + smp_processor_id(), fid, vid); + freqs.old = find_khz_freq_from_fid(data->currfid); + freqs.new = find_khz_freq_from_fid(fid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_fid_vid(data, fid, vid); + freqs.new = find_khz_freq_from_fid(data->currfid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Take a frequency, and issue the hardware pstate transition command */ +static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 did = 0; + u32 pstate = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* get fid did for hardware pstate transition */ + pstate = index & HW_PSTATE_MASK; + if (pstate > MAX_HW_PSTATE) + return 0; + fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; + did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; + freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_fiddid(fid, did); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_pstate(data, pstate); + data->currfid = find_fid_from_pstate(pstate); + data->currdid = find_did_from_pstate(pstate); + freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Driver entry point to switch to the target frequency */ +static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +{ + cpumask_t oldmask = CPU_MASK_ALL; + struct powernow_k8_data *data = powernow_data[pol->cpu]; + u32 checkfid; + u32 checkvid; + unsigned int newstate; + int ret = -EIO; + + if (!data) + return -EINVAL; + + checkfid = data->currfid; + checkvid = data->currvid; + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing targ, change pending bit set\n"); + goto err_out; + } + + dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", + pol->cpu, targfreq, pol->min, pol->max, relation); + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_HW_PSTATE) + dprintk("targ: curr fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else { + dprintk("targ: curr fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); + } + } + + if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) + goto err_out; + + mutex_lock(&fidvid_mutex); + + powernow_k8_acpi_pst_values(data, newstate); + + if (cpu_family == CPU_HW_PSTATE) + ret = transition_frequency_pstate(data, newstate); + else + ret = transition_frequency_fidvid(data, newstate); + if (ret) { + printk(KERN_ERR PFX "transition frequency failed\n"); + ret = 1; + mutex_unlock(&fidvid_mutex); + goto err_out; + } + mutex_unlock(&fidvid_mutex); + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + ret = 0; + +err_out: + set_cpus_allowed(current, oldmask); + return ret; +} + +/* Driver entry point to verify the policy and range of frequencies */ +static int powernowk8_verify(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + return cpufreq_frequency_table_verify(pol, data->powernow_table); +} + +/* per CPU init entry point to the driver */ +static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = CPU_MASK_ALL; + int rc; + + if (!cpu_online(pol->cpu)) + return -ENODEV; + + if (!check_supported_cpu(pol->cpu)) + return -ENODEV; + + data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); + if (!data) { + printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); + return -ENOMEM; + } + + data->cpu = pol->cpu; + + if (powernow_k8_cpu_init_acpi(data)) { + /* + * Use the PSB BIOS structure. This is only availabe on + * an UP version, and is deprecated by AMD. + */ + if (num_online_cpus() != 1) { + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); + kfree(data); + return -ENODEV; + } + if (pol->cpu != 0) { + printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); + kfree(data); + return -ENODEV; + } + rc = find_psb_table(data); + if (rc) { + kfree(data); + return -ENODEV; + } + } + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + goto err_out; + } + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + /* run on any CPU again */ + set_cpus_allowed(current, oldmask); + + pol->governor = CPUFREQ_DEFAULT_GOVERNOR; + if (cpu_family == CPU_HW_PSTATE) + pol->cpus = cpumask_of_cpu(pol->cpu); + else + pol->cpus = cpu_core_map[pol->cpu]; + data->available_cores = &(pol->cpus); + + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + (3 * (1 << data->irt) * 10)) * 1000; + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + dprintk("policy current frequency %d kHz\n", pol->cur); + + /* min/max the cpu is capable of */ + if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { + printk(KERN_ERR PFX "invalid powernow_table\n"); + powernow_k8_cpu_exit_acpi(data); + kfree(data->powernow_table); + kfree(data); + return -EINVAL; + } + + cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); + + if (cpu_family == CPU_HW_PSTATE) + dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else + dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + powernow_data[pol->cpu] = data; + + return 0; + +err_out: + set_cpus_allowed(current, oldmask); + powernow_k8_cpu_exit_acpi(data); + + kfree(data); + return -ENODEV; +} + +static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + powernow_k8_cpu_exit_acpi(data); + + cpufreq_frequency_table_put_attr(pol->cpu); + + kfree(data->powernow_table); + kfree(data); + + return 0; +} + +static unsigned int powernowk8_get (unsigned int cpu) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = current->cpus_allowed; + unsigned int khz = 0; + + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); + set_cpus_allowed(current, oldmask); + return 0; + } + + if (query_current_values_with_pending_wait(data)) + goto out; + + if (cpu_family == CPU_HW_PSTATE) + khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + khz = find_khz_freq_from_fid(data->currfid); + + +out: + set_cpus_allowed(current, oldmask); + return khz; +} + +static struct freq_attr* powernow_k8_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver cpufreq_amd64_driver = { + .verify = powernowk8_verify, + .target = powernowk8_target, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, +}; + +/* driver entry point for init */ +static int __cpuinit powernowk8_init(void) +{ + unsigned int i, supported_cpus = 0; + unsigned int booted_cores = 1; + + for_each_online_cpu(i) { + if (check_supported_cpu(i)) + supported_cpus++; + } + +#ifdef CONFIG_SMP + booted_cores = cpu_data[0].booted_cores; +#endif + + if (supported_cpus == num_online_cpus()) { + printk(KERN_INFO PFX "Found %d %s " + "processors (%d cpu cores) (" VERSION ")\n", + supported_cpus/booted_cores, + boot_cpu_data.x86_model_id, supported_cpus); + return cpufreq_register_driver(&cpufreq_amd64_driver); + } + + return -ENODEV; +} + +/* driver entry point for term */ +static void __exit powernowk8_exit(void) +{ + dprintk("exit\n"); + + cpufreq_unregister_driver(&cpufreq_amd64_driver); +} + +MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); +MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); +MODULE_LICENSE("GPL"); + +late_initcall(powernowk8_init); +module_exit(powernowk8_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h new file mode 100644 index 00000000000..b06c812208c --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -0,0 +1,232 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + */ + +struct powernow_k8_data { + unsigned int cpu; + + u32 numps; /* number of p-states */ + u32 batps; /* number of p-states supported on battery */ + + /* these values are constant when the PSB is used to determine + * vid/fid pairings, but are modified during the ->target() call + * when ACPI is used */ + u32 rvo; /* ramp voltage offset */ + u32 irt; /* isochronous relief time */ + u32 vidmvs; /* usable value calculated from mvs */ + u32 vstable; /* voltage stabilization time, units 20 us */ + u32 plllock; /* pll lock time, units 1 us */ + u32 exttype; /* extended interface = 1 */ + + /* keep track of the current fid / vid or did */ + u32 currvid, currfid, currdid; + + /* the powernow_table includes all frequency and vid/fid pairings: + * fid are the lower 8 bits of the index, vid are the upper 8 bits. + * frequency is in kHz */ + struct cpufreq_frequency_table *powernow_table; + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI + /* the acpi table needs to be kept. it's only available if ACPI was + * used to determine valid frequency/vid/fid states */ + struct acpi_processor_performance acpi_data; +#endif + /* we need to keep track of associated cores, but let cpufreq + * handle hotplug events - so just point at cpufreq pol->cpus + * structure */ + cpumask_t *available_cores; +}; + + +/* processor's cpuid instruction support */ +#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ +#define CPUID_XFAM 0x0ff00000 /* extended family */ +#define CPUID_XFAM_K8 0 +#define CPUID_XMOD 0x000f0000 /* extended model */ +#define CPUID_XMOD_REV_MASK 0x00080000 +#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ +#define CPUID_USE_XFAM_XMOD 0x00000f00 +#define CPUID_GET_MAX_CAPABILITIES 0x80000000 +#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 +#define P_STATE_TRANSITION_CAPABLE 6 + +/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ +/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ +/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ +/* the register number is placed in ecx, and the data is returned in edx:eax. */ + +#define MSR_FIDVID_CTL 0xc0010041 +#define MSR_FIDVID_STATUS 0xc0010042 + +/* Field definitions within the FID VID Low Control MSR : */ +#define MSR_C_LO_INIT_FID_VID 0x00010000 +#define MSR_C_LO_NEW_VID 0x00003f00 +#define MSR_C_LO_NEW_FID 0x0000003f +#define MSR_C_LO_VID_SHIFT 8 + +/* Field definitions within the FID VID High Control MSR : */ +#define MSR_C_HI_STP_GNT_TO 0x000fffff + +/* Field definitions within the FID VID Low Status MSR : */ +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 +#define MSR_S_LO_MAX_FID 0x003f0000 +#define MSR_S_LO_START_FID 0x00003f00 +#define MSR_S_LO_CURRENT_FID 0x0000003f + +/* Field definitions within the FID VID High Status MSR : */ +#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 +#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 +#define MSR_S_HI_START_VID 0x00003f00 +#define MSR_S_HI_CURRENT_VID 0x0000003f +#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 + + +/* Hardware Pstate _PSS and MSR definitions */ +#define USE_HW_PSTATE 0x00000080 +#define HW_PSTATE_FID_MASK 0x0000003f +#define HW_PSTATE_DID_MASK 0x000001c0 +#define HW_PSTATE_DID_SHIFT 6 +#define HW_PSTATE_MASK 0x00000007 +#define HW_PSTATE_VALID_MASK 0x80000000 +#define HW_FID_INDEX_SHIFT 8 +#define HW_FID_INDEX_MASK 0x0000ff00 +#define HW_DID_INDEX_SHIFT 16 +#define HW_DID_INDEX_MASK 0x00ff0000 +#define HW_WATTS_MASK 0xff +#define HW_PWR_DVR_MASK 0x300 +#define HW_PWR_DVR_SHIFT 8 +#define HW_PWR_MAX_MULT 3 +#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ +#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ +#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ + +/* define the two driver architectures */ +#define CPU_OPTERON 0 +#define CPU_HW_PSTATE 1 + + +/* + * There are restrictions frequencies have to follow: + * - only 1 entry in the low fid table ( <=1.4GHz ) + * - lowest entry in the high fid table must be >= 2 * the entry in the + * low fid table + * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry + * in the low fid table + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. + * - lowest frequency must be >= interprocessor hypertransport link speed + * (only applies to MP systems obviously) + */ + +/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ +#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ + +#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ +#define HI_VCOFREQ_TABLE_BOTTOM 1600 + +#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ + +#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ +#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ + +#define MIN_FREQ 800 /* Min and max freqs, per spec */ +#define MAX_FREQ 5000 + +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ +#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ + +#define VID_OFF 0x3f + +#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ + +#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ + +#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ +#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ + +/* + * Most values of interest are enocoded in a single field of the _PSS + * entries: the "control" value. + */ + +#define IRT_SHIFT 30 +#define RVO_SHIFT 28 +#define EXT_TYPE_SHIFT 27 +#define PLL_L_SHIFT 20 +#define MVS_SHIFT 18 +#define VST_SHIFT 11 +#define VID_SHIFT 6 +#define IRT_MASK 3 +#define RVO_MASK 3 +#define EXT_TYPE_MASK 1 +#define PLL_L_MASK 0x7f +#define MVS_MASK 3 +#define VST_MASK 0x7f +#define VID_MASK 0x1f +#define FID_MASK 0x1f +#define EXT_VID_MASK 0x3f +#define EXT_FID_MASK 0x3f + + +/* + * Version 1.4 of the PSB table. This table is constructed by BIOS and is + * to tell the OS's power management driver which VIDs and FIDs are + * supported by this particular processor. + * If the data in the PSB / PST is wrong, then this driver will program the + * wrong values into hardware, which is very likely to lead to a crash. + */ + +#define PSB_ID_STRING "AMDK7PNOW!" +#define PSB_ID_STRING_LEN 10 + +#define PSB_VERSION_1_4 0x14 + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags1; + u16 vstable; + u8 flags2; + u8 num_tables; + u32 cpuid; + u8 plllocktime; + u8 maxfid; + u8 maxvid; + u8 numps; +}; + +/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ +struct pst_s { + u8 fid; + u8 vid; +}; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) + +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); + +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +#endif + +#ifdef CONFIG_SMP +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ +} +#else +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ + cpu_set(0, cpu_sharedcore_mask[0]); +} +#endif diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 00000000000..b8fb4b521c6 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,191 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int err; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + err = cpufreq_register_driver(&sc520_freq_driver); + if (err) + iounmap(cpuctl); + + return err; +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young "); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c new file mode 100644 index 00000000000..6c5dc2c85ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -0,0 +1,634 @@ +/* + * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium + * M (part of the Centrino chipset). + * + * Since the original Pentium M, most new Intel CPUs support Enhanced + * SpeedStep. + * + * Despite the "SpeedStep" in the name, this is almost entirely unlike + * traditional SpeedStep. + * + * Modelled on speedstep.c + * + * Copyright (C) 2003 Jeremy Fitzhardinge + */ + +#include +#include +#include +#include +#include /* current */ +#include +#include + +#include +#include +#include + +#define PFX "speedstep-centrino: " +#define MAINTAINER "cpufreq@lists.linux.org.uk" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) + +#define INTEL_MSR_RANGE (0xffff) + +struct cpu_id +{ + __u8 x86; /* CPU family */ + __u8 x86_model; /* model */ + __u8 x86_mask; /* stepping */ +}; + +enum { + CPU_BANIAS, + CPU_DOTHAN_A1, + CPU_DOTHAN_A2, + CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, +}; + +static const struct cpu_id cpu_ids[] = { + [CPU_BANIAS] = { 6, 9, 5 }, + [CPU_DOTHAN_A1] = { 6, 13, 1 }, + [CPU_DOTHAN_A2] = { 6, 13, 2 }, + [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, +}; +#define N_IDS ARRAY_SIZE(cpu_ids) + +struct cpu_model +{ + const struct cpu_id *cpu_id; + const char *model_name; + unsigned max_freq; /* max clock in kHz */ + + struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ +}; +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); + +/* Operating points for current CPU */ +static struct cpu_model *centrino_model[NR_CPUS]; +static const struct cpu_id *centrino_cpu[NR_CPUS]; + +static struct cpufreq_driver centrino_driver; + +#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE + +/* Computes the correct form for IA32_PERF_CTL MSR for a particular + frequency/voltage operating point; frequency in MHz, volts in mV. + This is stored as "index" in the structure. */ +#define OP(mhz, mv) \ + { \ + .frequency = (mhz) * 1000, \ + .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ + } + +/* + * These voltage tables were derived from the Intel Pentium M + * datasheet, document 25261202.pdf, Table 5. I have verified they + * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium + * M. + */ + +/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ +static struct cpufreq_frequency_table banias_900[] = +{ + OP(600, 844), + OP(800, 988), + OP(900, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ +static struct cpufreq_frequency_table banias_1000[] = +{ + OP(600, 844), + OP(800, 972), + OP(900, 988), + OP(1000, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ +static struct cpufreq_frequency_table banias_1100[] = +{ + OP( 600, 956), + OP( 800, 1020), + OP( 900, 1100), + OP(1000, 1164), + OP(1100, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + + +/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ +static struct cpufreq_frequency_table banias_1200[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP( 900, 1020), + OP(1000, 1100), + OP(1100, 1164), + OP(1200, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.30GHz (Banias) */ +static struct cpufreq_frequency_table banias_1300[] = +{ + OP( 600, 956), + OP( 800, 1260), + OP(1000, 1292), + OP(1200, 1356), + OP(1300, 1388), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.40GHz (Banias) */ +static struct cpufreq_frequency_table banias_1400[] = +{ + OP( 600, 956), + OP( 800, 1180), + OP(1000, 1308), + OP(1200, 1436), + OP(1400, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.50GHz (Banias) */ +static struct cpufreq_frequency_table banias_1500[] = +{ + OP( 600, 956), + OP( 800, 1116), + OP(1000, 1228), + OP(1200, 1356), + OP(1400, 1452), + OP(1500, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.60GHz (Banias) */ +static struct cpufreq_frequency_table banias_1600[] = +{ + OP( 600, 956), + OP( 800, 1036), + OP(1000, 1164), + OP(1200, 1276), + OP(1400, 1420), + OP(1600, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.70GHz (Banias) */ +static struct cpufreq_frequency_table banias_1700[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP(1000, 1116), + OP(1200, 1228), + OP(1400, 1308), + OP(1700, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; +#undef OP + +#define _BANIAS(cpuid, max, name) \ +{ .cpu_id = cpuid, \ + .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ + .max_freq = (max)*1000, \ + .op_points = banias_##max, \ +} +#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) + +/* CPU models, their operating frequency range, and freq/voltage + operating points */ +static struct cpu_model models[] = +{ + _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), + BANIAS(1000), + BANIAS(1100), + BANIAS(1200), + BANIAS(1300), + BANIAS(1400), + BANIAS(1500), + BANIAS(1600), + BANIAS(1700), + + /* NULL model_name is a wildcard */ + { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, + + { NULL, } +}; +#undef _BANIAS +#undef BANIAS + +static int centrino_cpu_init_table(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpu_model *model; + + for(model = models; model->cpu_id != NULL; model++) + if (centrino_verify_cpu_id(cpu, model->cpu_id) && + (model->model_name == NULL || + strcmp(cpu->x86_model_id, model->model_name) == 0)) + break; + + if (model->cpu_id == NULL) { + /* No match at all */ + dprintk("no support for CPU model \"%s\": " + "send /proc/cpuinfo to " MAINTAINER "\n", + cpu->x86_model_id); + return -ENOENT; + } + + if (model->op_points == NULL) { + /* Matched a non-match */ + dprintk("no table support for CPU model \"%s\"\n", + cpu->x86_model_id); + dprintk("try using the acpi-cpufreq driver\n"); + return -ENOENT; + } + + centrino_model[policy->cpu] = model; + + dprintk("found \"%s\": max frequency: %dkHz\n", + model->model_name, model->max_freq); + + return 0; +} + +#else +static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } +#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ + +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) +{ + if ((c->x86 == x->x86) && + (c->x86_model == x->x86_model) && + (c->x86_mask == x->x86_mask)) + return 1; + return 0; +} + +/* To be called only after centrino_model is initialized */ +static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) +{ + int i; + + /* + * Extract clock in kHz from PERF_CTL value + * for centrino, as some DSDTs are buggy. + * Ideally, this can be done using the acpi_data structure. + */ + if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { + msr = (msr >> 8) & 0xff; + return msr * 100000; + } + + if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) + return 0; + + msr &= 0xffff; + for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == centrino_model[cpu]->op_points[i].index) + return centrino_model[cpu]->op_points[i].frequency; + } + if (failsafe) + return centrino_model[cpu]->op_points[i-1].frequency; + else + return 0; +} + +/* Return the current CPU frequency in kHz */ +static unsigned int get_cur_freq(unsigned int cpu) +{ + unsigned l, h; + unsigned clock_freq; + cpumask_t saved_mask; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) + return 0; + + rdmsr(MSR_IA32_PERF_STATUS, l, h); + clock_freq = extract_clock(l, cpu, 0); + + if (unlikely(clock_freq == 0)) { + /* + * On some CPUs, we can see transient MSR values (which are + * not present in _PSS), while CPU is doing some automatic + * P-state transition (like TM2). Get the last freq set + * in PERF_CTL. + */ + rdmsr(MSR_IA32_PERF_CTL, l, h); + clock_freq = extract_clock(l, cpu, 1); + } + + set_cpus_allowed(current, saved_mask); + return clock_freq; +} + + +static int centrino_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + unsigned freq; + unsigned l, h; + int ret; + int i; + + /* Only Intel makes Enhanced Speedstep-capable CPUs */ + if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) + centrino_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (policy->cpu != 0) + return -ENODEV; + + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; + + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; + + if (!centrino_cpu[policy->cpu]) { + dprintk("found unsupported CPU with " + "Enhanced SpeedStep: send /proc/cpuinfo to " + MAINTAINER "\n"); + return -ENODEV; + } + + if (centrino_cpu_init_table(policy)) { + return -ENODEV; + } + + /* Check to see if Enhanced SpeedStep is enabled, and try to + enable it if not. */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + + if (!(l & (1<<16))) { + l |= (1<<16); + dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); + wrmsr(MSR_IA32_MISC_ENABLE, l, h); + + /* check to see if it stuck */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + if (!(l & (1<<16))) { + printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); + return -ENODEV; + } + } + + freq = get_cur_freq(policy->cpu); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ + policy->cur = freq; + + dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); + + ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); + if (ret) + return (ret); + + cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); + + return 0; +} + +static int centrino_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + + if (!centrino_model[cpu]) + return -ENODEV; + + cpufreq_frequency_table_put_attr(cpu); + + centrino_model[cpu] = NULL; + + return 0; +} + +/** + * centrino_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within this model's frequency range at least one + * border included. + */ +static int centrino_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); +} + +/** + * centrino_setpolicy - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int centrino_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; + int retval = 0; + unsigned int j, k, first_cpu, tmp; + + if (unlikely(centrino_model[cpu] == NULL)) + return -ENODEV; + + if (unlikely(cpufreq_frequency_table_target(policy, + centrino_model[cpu]->op_points, + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + saved_mask = current->cpus_allowed; + first_cpu = 1; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + preempt_disable(); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + retval = -EAGAIN; + if (first_cpu) { + /* We haven't started the transition yet. */ + goto migrate_end; + } + preempt_enable(); + break; + } + + msr = centrino_model[cpu]->op_points[newstate].index; + + if (first_cpu) { + rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (msr == (oldmsr & 0xffff)) { + dprintk("no change needed - msr was and needs " + "to be %x\n", oldmsr); + retval = 0; + goto migrate_end; + } + + freqs.old = extract_clock(oldmsr, cpu, 0); + freqs.new = extract_clock(msr, cpu, 0); + + dprintk("target=%dkHz old=%d new=%d msr=%04x\n", + target_freq, freqs.old, freqs.new, msr); + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, + CPUFREQ_PRECHANGE); + } + + first_cpu = 0; + /* all but 16 LSB are reserved, treat them with care */ + oldmsr &= ~0xffff; + msr &= 0xffff; + oldmsr |= msr; + } + + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + preempt_enable(); + break; + } + + cpu_set(j, covered_cpus); + preempt_enable(); + } + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + if (unlikely(retval)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to policy->cpus and + * MSRs have already been written on coverd_cpus. + * Best effort undo.. + */ + + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + set_cpus_allowed(current, cpumask_of_cpu(j)); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + } + } + + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + } + set_cpus_allowed(current, saved_mask); + return 0; + +migrate_end: + preempt_enable(); + set_cpus_allowed(current, saved_mask); + return 0; +} + +static struct freq_attr* centrino_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver centrino_driver = { + .name = "centrino", /* should be speedstep-centrino, + but there's a 16 char limit */ + .init = centrino_cpu_init, + .exit = centrino_cpu_exit, + .verify = centrino_verify, + .target = centrino_target, + .get = get_cur_freq, + .attr = centrino_attr, + .owner = THIS_MODULE, +}; + + +/** + * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver + * + * Initializes the Enhanced SpeedStep support. Returns -ENODEV on + * unsupported devices, -ENOENT if there's no voltage table for this + * particular CPU model, -EINVAL on problems during initiatization, + * and zero on success. + * + * This is quite picky. Not only does the CPU have to advertise the + * "est" flag in the cpuid capability flags, we look for a specific + * CPU model and stepping, and we need to have the exact model name in + * our voltage tables. That is, be paranoid about not releasing + * someone's valuable magic smoke. + */ +static int __init centrino_init(void) +{ + struct cpuinfo_x86 *cpu = cpu_data; + + if (!cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + return cpufreq_register_driver(¢rino_driver); +} + +static void __exit centrino_exit(void) +{ + cpufreq_unregister_driver(¢rino_driver); +} + +MODULE_AUTHOR ("Jeremy Fitzhardinge "); +MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(centrino_init); +module_exit(centrino_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c new file mode 100644 index 00000000000..a5b2346faf1 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -0,0 +1,440 @@ +/* + * (C) 2001 Dave Jones, Arjan van de ven. + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information, and on Intel documentation + * for chipsets ICH2-M and ICH3-M. + * + * Many thanks to Ducrot Bruno for finding and fixing the last + * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler + * for extensive testing. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "speedstep-lib.h" + + +/* speedstep_chipset: + * It is necessary to know which chipset is used. As accesses to + * this device occur at various places in this module, we need a + * static struct pci_dev * pointing to that device. + */ +static struct pci_dev *speedstep_chipset_dev; + + +/* speedstep_processor + */ +static unsigned int speedstep_processor = 0; + +static u32 pmbase; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) + + +/** + * speedstep_find_register - read the PMBASE address + * + * Returns: -ENODEV if no register could be found + */ +static int speedstep_find_register (void) +{ + if (!speedstep_chipset_dev) + return -ENODEV; + + /* get PMBASE */ + pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); + if (!(pmbase & 0x01)) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + pmbase &= 0xFFFFFFFE; + if (!pmbase) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + /* read state */ + value = inb(pmbase + 0x50); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + /* write new state */ + value &= 0xFE; + value |= state; + + dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); + + /* Disable bus master arbitration */ + pm2_blk = inb(pmbase + 0x20); + pm2_blk |= 0x01; + outb(pm2_blk, (pmbase + 0x20)); + + /* Actual transition */ + outb(value, (pmbase + 0x50)); + + /* Restore bus master arbitration */ + pm2_blk &= 0xfe; + outb(pm2_blk, (pmbase + 0x20)); + + /* check if transition was successful */ + value = inb(pmbase + 0x50); + + /* Enable IRQs */ + local_irq_restore(flags); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + if (state == (value & 0x1)) { + dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); + } else { + printk (KERN_ERR "cpufreq: change failed - I/O error\n"); + } + + return; +} + + +/** + * speedstep_activate - activate SpeedStep control in the chipset + * + * Tries to activate the SpeedStep status and control registers. + * Returns -EINVAL on an unsupported chipset, and zero on success. + */ +static int speedstep_activate (void) +{ + u16 value = 0; + + if (!speedstep_chipset_dev) + return -EINVAL; + + pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); + if (!(value & 0x08)) { + value |= 0x08; + dprintk("activating SpeedStep (TM) registers\n"); + pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); + } + + return 0; +} + + +/** + * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic + * + * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to + * the LPC bridge / PM module which contains all power-management + * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected + * chipset, or zero on failure. + */ +static unsigned int speedstep_detect_chipset (void) +{ + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 4; /* 4-M */ + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801CA_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 3; /* 3-M */ + + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801BA_10, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) { + /* speedstep.c causes lockups on Dell Inspirons 8000 and + * 8100 which use a pretty old revision of the 82815 + * host brige. Abort on these systems. + */ + static struct pci_dev *hostbridge; + + hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82815_MC, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + + if (!hostbridge) + return 2; /* 2-M */ + + if (hostbridge->revision < 5) { + dprintk("hostbridge does not support speedstep\n"); + speedstep_chipset_dev = NULL; + pci_dev_put(hostbridge); + return 0; + } + + pci_dev_put(hostbridge); + return 2; /* 2-M */ + } + + return 0; +} + +static unsigned int _speedstep_get(cpumask_t cpus) +{ + unsigned int speed; + cpumask_t cpus_allowed; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpus); + speed = speedstep_get_processor_frequency(speedstep_processor); + set_cpus_allowed(current, cpus_allowed); + dprintk("detected %u kHz as current frequency\n", speed); + return speed; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + return _speedstep_get(cpumask_of_cpu(cpu)); +} + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + cpumask_t cpus_allowed; + int i; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = _speedstep_get(policy->cpus); + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = policy->cpu; + + dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); + + /* no transition necessary */ + if (freqs.old == freqs.new) + return 0; + + cpus_allowed = current->cpus_allowed; + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* switch to physical CPU where state is to be changed */ + set_cpus_allowed(current, policy->cpus); + + speedstep_set_state(newstate); + + /* allow to be run on all CPUs */ + set_cpus_allowed(current, cpus_allowed); + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + unsigned int speed; + cpumask_t cpus_allowed; + + /* only run on CPU to be set, or on its sibling */ +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, policy->cpus); + + /* detect low and high frequency and transition latency */ + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, + &speedstep_set_state); + set_cpus_allowed(current, cpus_allowed); + if (result) + return result; + + /* get current speed setting */ + speed = _speedstep_get(policy->cpus); + if (!speed) + return -EIO; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-ich", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + /* detect processor */ + speedstep_processor = speedstep_detect_processor(); + if (!speedstep_processor) { + dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); + return -ENODEV; + } + + /* detect chipset */ + if (!speedstep_detect_chipset()) { + dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); + return -ENODEV; + } + + /* activate speedstep support */ + if (speedstep_activate()) { + pci_dev_put(speedstep_chipset_dev); + return -EINVAL; + } + + if (speedstep_find_register()) + return -ENODEV; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + pci_dev_put(speedstep_chipset_dev); + cpufreq_unregister_driver(&speedstep_driver); +} + + +MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c new file mode 100644 index 00000000000..b1acc8ce316 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -0,0 +1,444 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "speedstep-lib.h" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +static int relaxed_check = 0; +#else +#define relaxed_check 0 +#endif + +/********************************************************************* + * GET PROCESSOR CORE SPEED IN KHZ * + *********************************************************************/ + +static unsigned int pentium3_get_frequency (unsigned int processor) +{ + /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ + struct { + unsigned int ratio; /* Frequency Multiplier (x10) */ + u8 bitmap; /* power on configuration bits + [27, 25:22] (in MSR 0x2a) */ + } msr_decode_mult [] = { + { 30, 0x01 }, + { 35, 0x05 }, + { 40, 0x02 }, + { 45, 0x06 }, + { 50, 0x00 }, + { 55, 0x04 }, + { 60, 0x0b }, + { 65, 0x0f }, + { 70, 0x09 }, + { 75, 0x0d }, + { 80, 0x0a }, + { 85, 0x26 }, + { 90, 0x20 }, + { 100, 0x2b }, + { 0, 0xff } /* error or unknown value */ + }; + + /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ + struct { + unsigned int value; /* Front Side Bus speed in MHz */ + u8 bitmap; /* power on configuration bits [18: 19] + (in MSR 0x2a) */ + } msr_decode_fsb [] = { + { 66, 0x0 }, + { 100, 0x2 }, + { 133, 0x1 }, + { 0, 0xff} + }; + + u32 msr_lo, msr_tmp; + int i = 0, j = 0; + + /* read MSR 0x2a - we only need the low 32 bits */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + msr_tmp = msr_lo; + + /* decode the FSB */ + msr_tmp &= 0x00c0000; + msr_tmp >>= 18; + while (msr_tmp != msr_decode_fsb[i].bitmap) { + if (msr_decode_fsb[i].bitmap == 0xff) + return 0; + i++; + } + + /* decode the multiplier */ + if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { + dprintk("workaround for early PIIIs\n"); + msr_lo &= 0x03c00000; + } else + msr_lo &= 0x0bc00000; + msr_lo >>= 22; + while (msr_lo != msr_decode_mult[j].bitmap) { + if (msr_decode_mult[j].bitmap == 0xff) + return 0; + j++; + } + + dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); + + return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); +} + + +static unsigned int pentiumM_get_frequency(void) +{ + u32 msr_lo, msr_tmp; + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + /* see table B-2 of 24547212.pdf */ + if (msr_lo & 0x00040000) { + printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); + return 0; + } + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); + + return (msr_tmp * 100 * 1000); +} + +static unsigned int pentium_core_get_frequency(void) +{ + u32 fsb = 0; + u32 msr_lo, msr_tmp; + + rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); + /* see table B-2 of 25366920.pdf */ + switch (msr_lo & 0x07) { + case 5: + fsb = 100000; + break; + case 1: + fsb = 133333; + break; + case 3: + fsb = 166667; + break; + default: + printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); + } + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + + return (msr_tmp * fsb); +} + + +static unsigned int pentium4_get_frequency(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + u32 msr_lo, msr_hi, mult; + unsigned int fsb = 0; + + rdmsr(0x2c, msr_lo, msr_hi); + + dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); + + /* decode the FSB: see IA-32 Intel (C) Architecture Software + * Developer's Manual, Volume 3: System Prgramming Guide, + * revision #12 in Table B-1: MSRs in the Pentium 4 and + * Intel Xeon Processors, on page B-4 and B-5. + */ + if (c->x86_model < 2) + fsb = 100 * 1000; + else { + u8 fsb_code = (msr_lo >> 16) & 0x7; + switch (fsb_code) { + case 0: + fsb = 100 * 1000; + break; + case 1: + fsb = 13333 * 10; + break; + case 2: + fsb = 200 * 1000; + break; + } + } + + if (!fsb) + printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); + + /* Multiplier. */ + if (c->x86_model < 2) + mult = msr_lo >> 27; + else + mult = msr_lo >> 24; + + dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); + + return (fsb * mult); +} + + +unsigned int speedstep_get_processor_frequency(unsigned int processor) +{ + switch (processor) { + case SPEEDSTEP_PROCESSOR_PCORE: + return pentium_core_get_frequency(); + case SPEEDSTEP_PROCESSOR_PM: + return pentiumM_get_frequency(); + case SPEEDSTEP_PROCESSOR_P4D: + case SPEEDSTEP_PROCESSOR_P4M: + return pentium4_get_frequency(); + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + return pentium3_get_frequency(processor); + default: + return 0; + }; + return 0; +} +EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); + + +/********************************************************************* + * DETECT SPEEDSTEP-CAPABLE PROCESSOR * + *********************************************************************/ + +unsigned int speedstep_detect_processor (void) +{ + struct cpuinfo_x86 *c = cpu_data; + u32 ebx, msr_lo, msr_hi; + + dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); + + if ((c->x86_vendor != X86_VENDOR_INTEL) || + ((c->x86 != 6) && (c->x86 != 0xF))) + return 0; + + if (c->x86 == 0xF) { + /* Intel Mobile Pentium 4-M + * or Intel Mobile Pentium 4 with 533 MHz FSB */ + if (c->x86_model != 2) + return 0; + + ebx = cpuid_ebx(0x00000001); + ebx &= 0x000000FF; + + dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + + switch (c->x86_mask) { + case 4: + /* + * B-stepping [M-P4-M] + * sample has ebx = 0x0f, production has 0x0e. + */ + if ((ebx == 0x0e) || (ebx == 0x0f)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 7: + /* + * C-stepping [M-P4-M] + * needs to have ebx=0x0e, else it's a celeron: + * cf. 25130917.pdf / page 7, footnote 5 even + * though 25072120.pdf / page 7 doesn't say + * samples are only of B-stepping... + */ + if (ebx == 0x0e) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 9: + /* + * D-stepping [M-P4-M or M-P4/533] + * + * this is totally strange: CPUID 0x0F29 is + * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. + * The latter need to be sorted out as they don't + * support speedstep. + * Celerons with CPUID 0x0F29 may have either + * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything + * specific. + * M-P4-Ms may have either ebx=0xe or 0xf [see above] + * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] + * also, M-P4M HTs have ebx=0x8, too + * For now, they are distinguished by the model_id string + */ + if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + default: + break; + } + return 0; + } + + switch (c->x86_model) { + case 0x0B: /* Intel PIII [Tualatin] */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ + ebx = cpuid_ebx(0x00000001); + dprintk("ebx is %x\n", ebx); + + ebx &= 0x000000FF; + + if (ebx != 0x06) + return 0; + + /* So far all PIII-M processors support SpeedStep. See + * Intel's 24540640.pdf of June 2003 + */ + return SPEEDSTEP_PROCESSOR_PIII_T; + + case 0x08: /* Intel PIII [Coppermine] */ + + /* all mobile PIII Coppermines have FSB 100 MHz + * ==> sort out a few desktop PIIIs. */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); + msr_lo &= 0x00c0000; + if (msr_lo != 0x0080000) + return 0; + + /* + * If the processor is a mobile version, + * platform ID has bit 50 set + * it has SpeedStep technology if either + * bit 56 or 57 is set + */ + rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { + if (c->x86_mask == 0x01) { + dprintk("early PIII version\n"); + return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; + } else + return SPEEDSTEP_PROCESSOR_PIII_C; + } + + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(speedstep_detect_processor); + + +/********************************************************************* + * DETECT SPEEDSTEP SPEEDS * + *********************************************************************/ + +unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)) +{ + unsigned int prev_speed; + unsigned int ret = 0; + unsigned long flags; + struct timeval tv1, tv2; + + if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) + return -EINVAL; + + dprintk("trying to determine both speeds\n"); + + /* get current speed */ + prev_speed = speedstep_get_processor_frequency(processor); + if (!prev_speed) + return -EIO; + + dprintk("previous speed is %u\n", prev_speed); + + local_irq_save(flags); + + /* switch to low state */ + set_state(SPEEDSTEP_LOW); + *low_speed = speedstep_get_processor_frequency(processor); + if (!*low_speed) { + ret = -EIO; + goto out; + } + + dprintk("low speed is %u\n", *low_speed); + + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + + /* switch to high state */ + set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + + *high_speed = speedstep_get_processor_frequency(processor); + if (!*high_speed) { + ret = -EIO; + goto out; + } + + dprintk("high speed is %u\n", *high_speed); + + if (*low_speed == *high_speed) { + ret = -ENODEV; + goto out; + } + + /* switch to previous state, if necessary */ + if (*high_speed != prev_speed) + set_state(SPEEDSTEP_LOW); + + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + +out: + local_irq_restore(flags); + return (ret); +} +EXPORT_SYMBOL_GPL(speedstep_get_freqs); + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +module_param(relaxed_check, int, 0444); +MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); +#endif + +MODULE_AUTHOR ("Dominik Brodowski "); +MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); +MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h new file mode 100644 index 00000000000..b11bcc608ca --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -0,0 +1,49 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + + +/* processors */ + +#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ + +/* the following processors are not speedstep-capable and are not auto-detected + * in speedstep_detect_processor(). However, their speed can be detected using + * the speedstep_get_processor_frequency() call. */ +#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ +#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ + +/* speedstep states -- only two of them */ + +#define SPEEDSTEP_HIGH 0x00000000 +#define SPEEDSTEP_LOW 0x00000001 + + +/* detect a speedstep-capable processor */ +extern unsigned int speedstep_detect_processor (void); + +/* detect the current speed (in khz) of the processor */ +extern unsigned int speedstep_get_processor_frequency(unsigned int processor); + + +/* detect the low and high speeds of the processor. The callback + * set_state"'s first argument is either SPEEDSTEP_HIGH or + * SPEEDSTEP_LOW; the second argument is zero so that no + * cpufreq_notify_transition calls are initiated. + */ +extern unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c new file mode 100644 index 00000000000..e1c509aa305 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -0,0 +1,424 @@ +/* + * Intel SpeedStep SMI driver. + * + * (C) 2003 Hiroshi Miura + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "speedstep-lib.h" + +/* speedstep system management interface port/command. + * + * These parameters are got from IST-SMI BIOS call. + * If user gives it, these are used. + * + */ +static int smi_port = 0; +static int smi_cmd = 0; +static unsigned int smi_sig = 0; + +/* info about the processor */ +static unsigned int speedstep_processor = 0; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +#define GET_SPEEDSTEP_OWNER 0 +#define GET_SPEEDSTEP_STATE 1 +#define SET_SPEEDSTEP_STATE 2 +#define GET_SPEEDSTEP_FREQS 4 + +/* how often shall the SMI call be tried if it failed, e.g. because + * of DMA activity going on? */ +#define SMI_TRIES 5 + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) + +/** + * speedstep_smi_ownership + */ +static int speedstep_smi_ownership (void) +{ + u32 command, result, magic; + u32 function = GET_SPEEDSTEP_OWNER; + unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + magic = virt_to_phys(magic_data); + + dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__( + "out %%al, (%%dx)\n" + : "=D" (result) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" + ); + + dprintk("result is %x\n", result); + + return result; +} + +/** + * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. + * @low: the low frequency value is placed here + * @high: the high frequency value is placed here + * + * Only available on later SpeedStep-enabled systems, returns false results or + * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing + * shows that the latter occurs if !(ist_info.event & 0xFFFF). + */ +static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) +{ + u32 command, result = 0, edi, high_mhz, low_mhz; + u32 state=0; + u32 function = GET_SPEEDSTEP_FREQS; + + if (!(ist_info.event & 0xFFFF)) { + dprintk("bug #1422 -- can't read freqs from BIOS\n"); + return -ENODEV; + } + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + + dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); + + /* abort if results are obviously incorrect... */ + if ((high_mhz + low_mhz) < 600) + return -EINVAL; + + *high = high_mhz * 1000; + *low = low_mhz * 1000; + + return result; +} + +/** + * speedstep_get_state - set the SpeedStep state + * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static int speedstep_get_state (void) +{ + u32 function=GET_SPEEDSTEP_STATE; + u32 result, state, edi, command; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) + ); + + dprintk("state is %x, result is %x\n", state, result); + + return (state & 1); +} + + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static void speedstep_set_state (unsigned int state) +{ + unsigned int result = 0, command, new_state; + unsigned long flags; + unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int retry = 0; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); + + do { + if (retry) { + dprintk("retry %u, previous result %u, waiting...\n", retry, result); + mdelay(retry * 50); + } + retry++; + __asm__ __volatile__( + "movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=b" (new_state), "=D" (result) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + } while ((new_state != state) && (retry <= SMI_TRIES)); + + /* enable IRQs */ + local_irq_restore(flags); + + if (new_state == state) { + dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); + } else { + printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); + } + + return; +} + + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: new freq + * @relation: + * + * Sets a new CPUFreq policy/freq. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = speedstep_freqs[speedstep_get_state()].frequency; + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = 0; /* speedstep.c is UP only driver */ + + if (freqs.old == freqs.new) + return 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + speedstep_set_state(newstate); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result; + unsigned int speed,state; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + result = speedstep_smi_ownership(); + if (result) { + dprintk("fails in aquiring ownership of a SMI interface.\n"); + return -EINVAL; + } + + /* detect low and high frequency */ + result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency); + if (result) { + /* fall back to speedstep_lib.c dection mechanism: try both states out */ + dprintk("could not detect low and high frequencies by SMI call.\n"); + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, + &speedstep_set_state); + + if (result) { + dprintk("could not detect two different speeds -- aborting.\n"); + return result; + } else + dprintk("workaround worked.\n"); + } + + /* get current speed setting */ + state = speedstep_get_state(); + speed = speedstep_freqs[state].frequency; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + if (cpu) + return -ENODEV; + return speedstep_get_processor_frequency(speedstep_processor); +} + + +static int speedstep_resume(struct cpufreq_policy *policy) +{ + int result = speedstep_smi_ownership(); + + if (result) + dprintk("fails in re-aquiring ownership of a SMI interface.\n"); + + return result; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-smi", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .resume = speedstep_resume, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * BIOS, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + speedstep_processor = speedstep_detect_processor(); + + switch (speedstep_processor) { + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + break; + default: + speedstep_processor = 0; + } + + if (!speedstep_processor) { + dprintk ("No supported Intel CPU detected.\n"); + return -ENODEV; + } + + dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", + ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); + + /* Error if no IST-SMI BIOS or no PARM + sig= 'ISGE' aka 'Intel Speedstep Gate E' */ + if ((ist_info.signature != 0x47534943) && ( + (smi_port == 0) || (smi_cmd == 0))) + return -ENODEV; + + if (smi_sig == 1) + smi_sig = 0x47534943; + else + smi_sig = ist_info.signature; + + /* setup smi_port from MODLULE_PARM or BIOS */ + if ((smi_port > 0xff) || (smi_port < 0)) + return -EINVAL; + else if (smi_port == 0) + smi_port = ist_info.command & 0xff; + + if ((smi_cmd > 0xff) || (smi_cmd < 0)) + return -EINVAL; + else if (smi_cmd == 0) + smi_cmd = (ist_info.command >> 16) & 0xff; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + cpufreq_unregister_driver(&speedstep_driver); +} + +module_param(smi_port, int, 0444); +module_param(smi_cmd, int, 0444); +module_param(smi_sig, uint, 0444); + +MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); +MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); +MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); + +MODULE_AUTHOR ("Hiroshi Miura"); +MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index 1c9de796fa1..a6d8216084a 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -27,7 +27,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash_64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o obj-$(CONFIG_PM) += suspend_64.o obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o -obj-$(CONFIG_CPU_FREQ) += ../../i386/kernel/cpu/cpufreq/ +obj-$(CONFIG_CPU_FREQ) += ../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o -- cgit v1.2.3-70-g09d2 From 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:28 +0200 Subject: i386: move kernel/cpu/mtrr Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/mtrr/Makefile | 3 - arch/i386/kernel/cpu/mtrr/amd.c | 121 ------ arch/i386/kernel/cpu/mtrr/centaur.c | 224 ----------- arch/i386/kernel/cpu/mtrr/cyrix.c | 380 ------------------ arch/i386/kernel/cpu/mtrr/generic.c | 509 ------------------------ arch/i386/kernel/cpu/mtrr/if.c | 439 --------------------- arch/i386/kernel/cpu/mtrr/main.c | 768 ------------------------------------ arch/i386/kernel/cpu/mtrr/mtrr.h | 98 ----- arch/i386/kernel/cpu/mtrr/state.c | 79 ---- arch/x86/kernel/cpu/mtrr/Makefile | 3 + arch/x86/kernel/cpu/mtrr/amd.c | 121 ++++++ arch/x86/kernel/cpu/mtrr/centaur.c | 224 +++++++++++ arch/x86/kernel/cpu/mtrr/cyrix.c | 380 ++++++++++++++++++ arch/x86/kernel/cpu/mtrr/generic.c | 509 ++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/if.c | 439 +++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 768 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/mtrr.h | 98 +++++ arch/x86/kernel/cpu/mtrr/state.c | 79 ++++ arch/x86_64/kernel/Makefile_64 | 2 +- 20 files changed, 2623 insertions(+), 2623 deletions(-) delete mode 100644 arch/i386/kernel/cpu/mtrr/Makefile delete mode 100644 arch/i386/kernel/cpu/mtrr/amd.c delete mode 100644 arch/i386/kernel/cpu/mtrr/centaur.c delete mode 100644 arch/i386/kernel/cpu/mtrr/cyrix.c delete mode 100644 arch/i386/kernel/cpu/mtrr/generic.c delete mode 100644 arch/i386/kernel/cpu/mtrr/if.c delete mode 100644 arch/i386/kernel/cpu/mtrr/main.c delete mode 100644 arch/i386/kernel/cpu/mtrr/mtrr.h delete mode 100644 arch/i386/kernel/cpu/mtrr/state.c create mode 100644 arch/x86/kernel/cpu/mtrr/Makefile create mode 100644 arch/x86/kernel/cpu/mtrr/amd.c create mode 100644 arch/x86/kernel/cpu/mtrr/centaur.c create mode 100644 arch/x86/kernel/cpu/mtrr/cyrix.c create mode 100644 arch/x86/kernel/cpu/mtrr/generic.c create mode 100644 arch/x86/kernel/cpu/mtrr/if.c create mode 100644 arch/x86/kernel/cpu/mtrr/main.c create mode 100644 arch/x86/kernel/cpu/mtrr/mtrr.h create mode 100644 arch/x86/kernel/cpu/mtrr/state.c (limited to 'arch/x86/kernel') diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 8d9ce0232ad..6687f6d5ad2 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -14,7 +14,7 @@ obj-y += umc.o obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ -obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile deleted file mode 100644 index 191fc053364..00000000000 --- a/arch/i386/kernel/cpu/mtrr/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-y := main.o if.o generic.o state.o -obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o - diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c deleted file mode 100644 index 0949cdbf848..00000000000 --- a/arch/i386/kernel/cpu/mtrr/amd.c +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include - -#include "mtrr.h" - -static void -amd_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - unsigned long low, high; - - rdmsr(MSR_K6_UWCCR, low, high); - /* Upper dword is region 1, lower is region 0 */ - if (reg == 1) - low = high; - /* The base masks off on the right alignment */ - *base = (low & 0xFFFE0000) >> PAGE_SHIFT; - *type = 0; - if (low & 1) - *type = MTRR_TYPE_UNCACHABLE; - if (low & 2) - *type = MTRR_TYPE_WRCOMB; - if (!(low & 3)) { - *size = 0; - return; - } - /* - * This needs a little explaining. The size is stored as an - * inverted mask of bits of 128K granularity 15 bits long offset - * 2 bits - * - * So to get a size we do invert the mask and add 1 to the lowest - * mask bit (4 as its 2 bits in). This gives us a size we then shift - * to turn into 128K blocks - * - * eg 111 1111 1111 1100 is 512K - * - * invert 000 0000 0000 0011 - * +1 000 0000 0000 0100 - * *128K ... - */ - low = (~low) & 0x1FFFC; - *size = (low + 4) << (15 - PAGE_SHIFT); - return; -} - -static void amd_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. - [RETURNS] Nothing. -*/ -{ - u32 regs[2]; - - /* - * Low is MTRR0 , High MTRR 1 - */ - rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); - /* - * Blank to disable - */ - if (size == 0) - regs[reg] = 0; - else - /* Set the register to the base, the type (off by one) and an - inverted bitmask of the size The size is the only odd - bit. We are fed say 512K We invert this and we get 111 1111 - 1111 1011 but if you subtract one and invert you get the - desired 111 1111 1111 1100 mask - - But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ - regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) - | (base << PAGE_SHIFT) | (type + 1); - - /* - * The writeback rule is quite specific. See the manual. Its - * disable local interrupts, write back the cache, set the mtrr - */ - wbinvd(); - wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); -} - -static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - /* Apply the K6 block alignment and size rules - In order - o Uncached or gathering only - o 128K or bigger block - o Power of 2 block - o base suitably aligned to the power - */ - if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) - || (size & ~(size - 1)) - size || (base & (size - 1))) - return -EINVAL; - return 0; -} - -static struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, - .set = amd_set_mtrr, - .get = amd_get_mtrr, - .get_free_region = generic_get_free_region, - .validate_add_page = amd_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init amd_init_mtrr(void) -{ - set_mtrr_ops(&amd_mtrr_ops); - return 0; -} - -//arch_initcall(amd_mtrr_init); diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c deleted file mode 100644 index cb9aa3a7a7a..00000000000 --- a/arch/i386/kernel/cpu/mtrr/centaur.c +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include "mtrr.h" - -static struct { - unsigned long high; - unsigned long low; -} centaur_mcr[8]; - -static u8 centaur_mcr_reserved; -static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ - -/* - * Report boot time MCR setups - */ - -static int -centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - for (i = 0; i < max; ++i) { - if (centaur_mcr_reserved & (1 << i)) - continue; - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - return -ENOSPC; -} - -void -mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ - centaur_mcr[mcr].low = lo; - centaur_mcr[mcr].high = hi; -} - -static void -centaur_get_mcr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - *base = centaur_mcr[reg].high >> PAGE_SHIFT; - *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; - *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ - if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) - *type = MTRR_TYPE_UNCACHABLE; - if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) - *type = MTRR_TYPE_WRBACK; - if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) - *type = MTRR_TYPE_WRBACK; - -} - -static void centaur_set_mcr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned long low, high; - - if (size == 0) { - /* Disable */ - high = low = 0; - } else { - high = base << PAGE_SHIFT; - if (centaur_mcr_type == 0) - low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ - else { - if (type == MTRR_TYPE_UNCACHABLE) - low = -size << PAGE_SHIFT | 0x02; /* NC */ - else - low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ - } - } - centaur_mcr[reg].high = high; - centaur_mcr[reg].low = low; - wrmsr(MSR_IDT_MCR0 + reg, low, high); -} - -#if 0 -/* - * Initialise the later (saner) Winchip MCR variant. In this version - * the BIOS can pass us the registers it has used (but not their values) - * and the control register is read/write - */ - -static void __init -centaur_mcr1_init(void) -{ - unsigned i; - u32 lo, hi; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ - lo &= ~0x1C0; /* clear key */ - lo |= 0x040; /* set key to 1 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ - } - - centaur_mcr_type = 1; - - /* - * Clear any unconfigured MCR's. - */ - - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { - if (!(lo & (1 << (9 + i)))) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - else - /* - * If the BIOS set up an MCR we cannot see it - * but we don't wish to obliterate it - */ - centaur_mcr_reserved |= (1 << i); - } - } - /* - * Throw the main write-combining switch... - * However if OOSTORE is enabled then people have already done far - * cleverer things and we should behave. - */ - - lo |= 15; /* Write combine enables */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -/* - * Initialise the original winchip with read only MCR registers - * no used bitmask for the BIOS to pass on and write only control - */ - -static void __init -centaur_mcr0_init(void) -{ - unsigned i; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - /* Clear any unconfigured MCR's. - * This way we are sure that the centaur_mcr array contains the actual - * values. The disadvantage is that any BIOS tweaks are thus undone. - * - */ - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - } - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ -} - -/* - * Initialise Winchip series MCR registers - */ - -static void __init -centaur_mcr_init(void) -{ - struct set_mtrr_context ctxt; - - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - if (boot_cpu_data.x86_model == 4) - centaur_mcr0_init(); - else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) - centaur_mcr1_init(); - - set_mtrr_done(&ctxt); -} -#endif - -static int centaur_validate_add_page(unsigned long base, - unsigned long size, unsigned int type) -{ - /* - * FIXME: Winchip2 supports uncached - */ - if (type != MTRR_TYPE_WRCOMB && - (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - printk(KERN_WARNING - "mtrr: only write-combining%s supported\n", - centaur_mcr_type ? " and uncacheable are" - : " is"); - return -EINVAL; - } - return 0; -} - -static struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, -// .init = centaur_mcr_init, - .set = centaur_set_mcr, - .get = centaur_get_mcr, - .get_free_region = centaur_get_free_region, - .validate_add_page = centaur_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init centaur_init_mtrr(void) -{ - set_mtrr_ops(¢aur_mtrr_ops); - return 0; -} - -//arch_initcall(centaur_init_mtrr); diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c deleted file mode 100644 index 2287d4863a8..00000000000 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ /dev/null @@ -1,380 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - -int arr3_protected; - -static void -cyrix_get_arr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - unsigned long flags; - unsigned char arr, ccr3, rcr, shift; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - /* Save flags and disable interrupts */ - local_irq_save(flags); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ((unsigned char *) base)[3] = getCx86(arr); - ((unsigned char *) base)[2] = getCx86(arr + 1); - ((unsigned char *) base)[1] = getCx86(arr + 2); - rcr = getCx86(CX86_RCR_BASE + reg); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); - shift = ((unsigned char *) base)[1] & 0x0f; - *base >>= PAGE_SHIFT; - - /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 - * Note: shift==0xf means 4G, this is unsupported. - */ - if (shift) - *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); - else - *size = 0; - - /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ - if (reg < 7) { - switch (rcr) { - case 1: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRBACK; - break; - case 9: - *type = MTRR_TYPE_WRCOMB; - break; - case 24: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } else { - switch (rcr) { - case 0: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRCOMB; - break; - case 9: - *type = MTRR_TYPE_WRBACK; - break; - case 25: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } -} - -static int -cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free ARR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i; - mtrr_type ltype; - unsigned long lbase, lsize; - - switch (replace_reg) { - case 7: - if (size < 0x40) - break; - case 6: - case 5: - case 4: - return replace_reg; - case 3: - if (arr3_protected) - break; - case 2: - case 1: - case 0: - return replace_reg; - } - /* If we are to set up a region >32M then look at ARR7 immediately */ - if (size > 0x2000) { - cyrix_get_arr(7, &lbase, &lsize, <ype); - if (lsize == 0) - return 7; - /* Else try ARR0-ARR6 first */ - } else { - for (i = 0; i < 7; i++) { - cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((i == 3) && arr3_protected) - continue; - if (lsize == 0) - return i; - } - /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ - cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((lsize == 0) && (size >= 0x40)) - return i; - } - return -ENOSPC; -} - -static u32 cr4 = 0; -static u32 ccr3; - -static void prepare_set(void) -{ - u32 cr0; - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ - cr0 = read_cr0() | 0x40000000; - wbinvd(); - write_cr0(cr0); - wbinvd(); - - /* Cyrix ARRs - everything else were excluded at the top */ - ccr3 = getCx86(CX86_CCR3); - - /* Cyrix ARRs - everything else were excluded at the top */ - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); - -} - -static void post_set(void) -{ - /* Flush caches and TLBs */ - wbinvd(); - - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, ccr3); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(cr4); -} - -static void cyrix_set_arr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned char arr, arr_type, arr_size; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ - if (reg >= 7) - size >>= 6; - - size &= 0x7fff; /* make sure arr_size <= 14 */ - for (arr_size = 0; size; arr_size++, size >>= 1) ; - - if (reg < 7) { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 1; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 9; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 24; - break; - default: - arr_type = 8; - break; - } - } else { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 0; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 8; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 25; - break; - default: - arr_type = 9; - break; - } - } - - prepare_set(); - - base <<= PAGE_SHIFT; - setCx86(arr, ((unsigned char *) &base)[3]); - setCx86(arr + 1, ((unsigned char *) &base)[2]); - setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); - setCx86(CX86_RCR_BASE + reg, arr_type); - - post_set(); -} - -typedef struct { - unsigned long base; - unsigned long size; - mtrr_type type; -} arr_state_t; - -static arr_state_t arr_state[8] = { - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} -}; - -static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; - -static void cyrix_set_all(void) -{ - int i; - - prepare_set(); - - /* the CCRs are not contiguous */ - for (i = 0; i < 4; i++) - setCx86(CX86_CCR0 + i, ccr_state[i]); - for (; i < 7; i++) - setCx86(CX86_CCR4 + i, ccr_state[i]); - for (i = 0; i < 8; i++) - cyrix_set_arr(i, arr_state[i].base, - arr_state[i].size, arr_state[i].type); - - post_set(); -} - -#if 0 -/* - * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection - * with the SMM (System Management Mode) mode. So we need the following: - * Check whether SMI_LOCK (CCR3 bit 0) is set - * if it is set, write a warning message: ARR3 cannot be changed! - * (it cannot be changed until the next processor reset) - * if it is reset, then we can change it, set all the needed bits: - * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) - * - disable access to SMM memory (CCR1 bit 2 reset) - * - disable SMM mode (CCR1 bit 1 reset) - * - disable write protection of ARR3 (CCR6 bit 1 reset) - * - (maybe) disable ARR3 - * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) - */ -static void __init -cyrix_arr_init(void) -{ - struct set_mtrr_context ctxt; - unsigned char ccr[7]; - int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; -#ifdef CONFIG_SMP - int i; -#endif - - /* flush cache and enable MAPEN */ - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - /* Save all CCRs locally */ - ccr[0] = getCx86(CX86_CCR0); - ccr[1] = getCx86(CX86_CCR1); - ccr[2] = getCx86(CX86_CCR2); - ccr[3] = ctxt.ccr3; - ccr[4] = getCx86(CX86_CCR4); - ccr[5] = getCx86(CX86_CCR5); - ccr[6] = getCx86(CX86_CCR6); - - if (ccr[3] & 1) { - ccrc[3] = 1; - arr3_protected = 1; - } else { - /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and - * access to SMM memory through ARR3 (bit 7). - */ - if (ccr[1] & 0x80) { - ccr[1] &= 0x7f; - ccrc[1] |= 0x80; - } - if (ccr[1] & 0x04) { - ccr[1] &= 0xfb; - ccrc[1] |= 0x04; - } - if (ccr[1] & 0x02) { - ccr[1] &= 0xfd; - ccrc[1] |= 0x02; - } - arr3_protected = 0; - if (ccr[6] & 0x02) { - ccr[6] &= 0xfd; - ccrc[6] = 1; /* Disable write protection of ARR3 */ - setCx86(CX86_CCR6, ccr[6]); - } - /* Disable ARR3. This is safe now that we disabled SMM. */ - /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ - } - /* If we changed CCR1 in memory, change it in the processor, too. */ - if (ccrc[1]) - setCx86(CX86_CCR1, ccr[1]); - - /* Enable ARR usage by the processor */ - if (!(ccr[5] & 0x20)) { - ccr[5] |= 0x20; - ccrc[5] = 1; - setCx86(CX86_CCR5, ccr[5]); - } -#ifdef CONFIG_SMP - for (i = 0; i < 7; i++) - ccr_state[i] = ccr[i]; - for (i = 0; i < 8; i++) - cyrix_get_arr(i, - &arr_state[i].base, &arr_state[i].size, - &arr_state[i].type); -#endif - - set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ - - if (ccrc[5]) - printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); - if (ccrc[3]) - printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); -/* - if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); - if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); - if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); -*/ - if (ccrc[6]) - printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); -} -#endif - -static struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, -// .init = cyrix_arr_init, - .set_all = cyrix_set_all, - .set = cyrix_set_arr, - .get = cyrix_get_arr, - .get_free_region = cyrix_get_free_region, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init cyrix_init_mtrr(void) -{ - set_mtrr_ops(&cyrix_mtrr_ops); - return 0; -} - -//arch_initcall(cyrix_init_mtrr); diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c deleted file mode 100644 index 56f64e34829..00000000000 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ /dev/null @@ -1,509 +0,0 @@ -/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong - because MTRRs can span upto 40 bits (36bits on most modern x86) */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - -struct mtrr_state { - struct mtrr_var_range *var_ranges; - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - -struct fixed_range_block { - int base_msr; /* start address of an MTRR block */ - int ranges; /* number of MTRRs in this block */ -}; - -static struct fixed_range_block fixed_range_blocks[] = { - { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ - { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ - { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ - {} -}; - -static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; - -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "mtrr." - -static int mtrr_show; -module_param_named(show, mtrr_show, bool, 0); - -/* Get the MSR pair relating to a var range */ -static void -get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) -{ - rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); -} - -static void -get_fixed_ranges(mtrr_type * frs) -{ - unsigned int *p = (unsigned int *) frs; - int i; - - rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); - - for (i = 0; i < 2; i++) - rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); - for (i = 0; i < 8; i++) - rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); -} - -void mtrr_save_fixed_ranges(void *info) -{ - if (cpu_has_mtrr) - get_fixed_ranges(mtrr_state.fixed_ranges); -} - -static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) -{ - unsigned i; - - for (i = 0; i < 8; ++i, ++types, base += step) - printk(KERN_INFO "MTRR %05X-%05X %s\n", - base, base + step - 1, mtrr_attrib_to_str(*types)); -} - -/* Grab all of the MTRR state for this CPU into *state */ -void __init get_mtrr_state(void) -{ - unsigned int i; - struct mtrr_var_range *vrs; - unsigned lo, dummy; - - if (!mtrr_state.var_ranges) { - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), - GFP_KERNEL); - if (!mtrr_state.var_ranges) - return; - } - vrs = mtrr_state.var_ranges; - - rdmsr(MTRRcap_MSR, lo, dummy); - mtrr_state.have_fixed = (lo >> 8) & 1; - - for (i = 0; i < num_var_ranges; i++) - get_mtrr_var_range(i, &vrs[i]); - if (mtrr_state.have_fixed) - get_fixed_ranges(mtrr_state.fixed_ranges); - - rdmsr(MTRRdefType_MSR, lo, dummy); - mtrr_state.def_type = (lo & 0xff); - mtrr_state.enabled = (lo & 0xc00) >> 10; - - if (mtrr_show) { - int high_width; - - printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); - if (mtrr_state.have_fixed) { - printk(KERN_INFO "MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); - print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); - for (i = 0; i < 2; ++i) - print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); - for (i = 0; i < 8; ++i) - print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); - } - printk(KERN_INFO "MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); - high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; - for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); - else - printk(KERN_INFO "MTRR %u disabled\n", i); - } - } -} - -/* Some BIOS's are fucked and don't set all MTRRs the same! */ -void __init mtrr_state_warn(void) -{ - unsigned long mask = smp_changes_mask; - - if (!mask) - return; - if (mask & MTRR_CHANGE_MASK_FIXED) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_VARIABLE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_DEFTYPE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); -} - -/* Doesn't attempt to pass an error out to MTRR users - because it's quite complicated in some cases and probably not - worth it because the best error handling is to ignore it. */ -void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) -{ - if (wrmsr_safe(msr, a, b) < 0) - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", - smp_processor_id(), msr, a, b); -} - -/** - * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs - * see AMD publication no. 24593, chapter 3.2.1 for more information - */ -static inline void k8_enable_fixed_iorrs(void) -{ - unsigned lo, hi; - - rdmsr(MSR_K8_SYSCFG, lo, hi); - mtrr_wrmsr(MSR_K8_SYSCFG, lo - | K8_MTRRFIXRANGE_DRAM_ENABLE - | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); -} - -/** - * Checks and updates an fixed-range MTRR if it differs from the value it - * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. - * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information - * \param msr MSR address of the MTTR which should be checked and updated - * \param changed pointer which indicates whether the MTRR needed to be changed - * \param msrwords pointer to the MSR values which the MSR should have - */ -static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) -{ - unsigned lo, hi; - - rdmsr(msr, lo, hi); - - if (lo != msrwords[0] || hi != msrwords[1]) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 15 && - ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) - k8_enable_fixed_iorrs(); - mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = TRUE; - } -} - -int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - return -ENOSPC; -} - -static void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type *type) -{ - unsigned int mask_lo, mask_hi, base_lo, base_hi; - - rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); - if ((mask_lo & 0x800) == 0) { - /* Invalid (i.e. free) range */ - *base = 0; - *size = 0; - *type = 0; - return; - } - - rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); - - /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; - - /* This works correctly if size is a power of two, i.e. a - contiguous range. */ - *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *type = base_lo & 0xff; -} - -/** - * Checks and updates the fixed-range MTRRs if they differ from the saved set - * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() - */ -static int set_fixed_ranges(mtrr_type * frs) -{ - unsigned long long *saved = (unsigned long long *) frs; - int changed = FALSE; - int block=-1, range; - - while (fixed_range_blocks[++block].ranges) - for (range=0; range < fixed_range_blocks[block].ranges; range++) - set_fixed_range(fixed_range_blocks[block].base_msr + range, - &changed, (unsigned int *) saved++); - - return changed; -} - -/* Set the MSR pair relating to a var range. Returns TRUE if - changes are made */ -static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) -{ - unsigned int lo, hi; - int changed = FALSE; - - rdmsr(MTRRphysBase_MSR(index), lo, hi); - if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = TRUE; - } - - rdmsr(MTRRphysMask_MSR(index), lo, hi); - - if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = TRUE; - } - return changed; -} - -static u32 deftype_lo, deftype_hi; - -static unsigned long set_mtrr_state(void) -/* [SUMMARY] Set the MTRR state for this CPU. - The MTRR state information to read. - Some relevant CPU context. - [NOTE] The CPU must already be in a safe state for MTRR changes. - [RETURNS] 0 if no changes made, else a mask indication what was changed. -*/ -{ - unsigned int i; - unsigned long change_mask = 0; - - for (i = 0; i < num_var_ranges; i++) - if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) - change_mask |= MTRR_CHANGE_MASK_VARIABLE; - - if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) - change_mask |= MTRR_CHANGE_MASK_FIXED; - - /* Set_mtrr_restore restores the old value of MTRRdefType, - so to set it we fiddle with the saved value */ - if ((deftype_lo & 0xff) != mtrr_state.def_type - || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); - change_mask |= MTRR_CHANGE_MASK_DEFTYPE; - } - - return change_mask; -} - - -static unsigned long cr4 = 0; -static DEFINE_SPINLOCK(set_atomicity_lock); - -/* - * Since we are disabling the cache don't allow any interrupts - they - * would run extremely slow and would only increase the pain. The caller must - * ensure that local interrupts are disabled and are reenabled after post_set() - * has been called. - */ - -static void prepare_set(void) __acquires(set_atomicity_lock) -{ - unsigned long cr0; - - /* Note that this is not ideal, since the cache is only flushed/disabled - for this CPU while the MTRRs are changed, but changing this requires - more invasive changes to the way the kernel boots */ - - spin_lock(&set_atomicity_lock); - - /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ - cr0 = read_cr0() | 0x40000000; /* set CD flag */ - write_cr0(cr0); - wbinvd(); - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - __flush_tlb(); - - /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); - - /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); -} - -static void post_set(void) __releases(set_atomicity_lock) -{ - /* Flush TLBs (no need to flush caches - they are disabled) */ - __flush_tlb(); - - /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(cr4); - spin_unlock(&set_atomicity_lock); -} - -static void generic_set_all(void) -{ - unsigned long mask, count; - unsigned long flags; - - local_irq_save(flags); - prepare_set(); - - /* Actually set the state */ - mask = set_mtrr_state(); - - post_set(); - local_irq_restore(flags); - - /* Use the atomic bitops to update the global mask */ - for (count = 0; count < sizeof mask * 8; ++count) { - if (mask & 0x01) - set_bit(count, &smp_changes_mask); - mask >>= 1; - } - -} - -static void generic_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. - [RETURNS] Nothing. -*/ -{ - unsigned long flags; - struct mtrr_var_range *vr; - - vr = &mtrr_state.var_ranges[reg]; - - local_irq_save(flags); - prepare_set(); - - if (size == 0) { - /* The invalid bit is kept in the mask, so we simply clear the - relevant mask register to disable a range. */ - mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); - memset(vr, 0, sizeof(struct mtrr_var_range)); - } else { - vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); - - mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); - mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); - } - - post_set(); - local_irq_restore(flags); -} - -int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - unsigned long lbase, last; - - /* For Intel PPro stepping <= 7, must be 4 MiB aligned - and not touch 0x70000000->0x7003FFFF */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); - return -EINVAL; - } - if (!(base + size < 0x70000 || base > 0x7003F) && - (type == MTRR_TYPE_WRCOMB - || type == MTRR_TYPE_WRBACK)) { - printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); - return -EINVAL; - } - } - - /* Check upper bits of base and last are equal and lower bits are 0 - for base and 1 for last */ - last = base + size - 1; - for (lbase = base; !(lbase & 1) && (last & 1); - lbase = lbase >> 1, last = last >> 1) ; - if (lbase != last) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", - base, size); - return -EINVAL; - } - return 0; -} - - -static int generic_have_wrcomb(void) -{ - unsigned long config, dummy; - rdmsr(MTRRcap_MSR, config, dummy); - return (config & (1 << 10)); -} - -int positive_have_wrcomb(void) -{ - return 1; -} - -/* generic structure... - */ -struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, - .set_all = generic_set_all, - .get = generic_get_mtrr, - .get_free_region = generic_get_free_region, - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, -}; diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c deleted file mode 100644 index c7d8f175674..00000000000 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ /dev/null @@ -1,439 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define LINE_SIZE 80 - -#include -#include "mtrr.h" - -/* RED-PEN: this is accessed without any locking */ -extern unsigned int *usage_table; - - -#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) - -static const char *const mtrr_strings[MTRR_NUM_TYPES] = -{ - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ -}; - -const char *mtrr_attrib_to_str(int x) -{ - return (x <= 6) ? mtrr_strings[x] : "?"; -} - -#ifdef CONFIG_PROC_FS - -static int -mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, char increment, struct file *file, int page) -{ - int reg, max; - unsigned int *fcount = FILE_FCOUNT(file); - - max = num_var_ranges; - if (fcount == NULL) { - fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); - if (!fcount) - return -ENOMEM; - FILE_FCOUNT(file) = fcount; - } - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_add_page(base, size, type, 1); - if (reg >= 0) - ++fcount[reg]; - return reg; -} - -static int -mtrr_file_del(unsigned long base, unsigned long size, - struct file *file, int page) -{ - int reg; - unsigned int *fcount = FILE_FCOUNT(file); - - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_del_page(-1, base, size); - if (reg < 0) - return reg; - if (fcount == NULL) - return reg; - if (fcount[reg] < 1) - return -EINVAL; - --fcount[reg]; - return reg; -} - -/* RED-PEN: seq_file can seek now. this is ignored. */ -static ssize_t -mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) -/* Format of control line: - "base=%Lx size=%Lx type=%s" OR: - "disable=%d" -*/ -{ - int i, err; - unsigned long reg; - unsigned long long base, size; - char *ptr; - char line[LINE_SIZE]; - size_t linelen; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (!len) - return -EINVAL; - memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) - return -EFAULT; - linelen = strlen(line); - ptr = line + linelen - 1; - if (linelen && *ptr == '\n') - *ptr = '\0'; - if (!strncmp(line, "disable=", 8)) { - reg = simple_strtoul(line + 8, &ptr, 0); - err = mtrr_del_page(reg, 0, 0); - if (err < 0) - return err; - return len; - } - if (strncmp(line, "base=", 5)) - return -EINVAL; - base = simple_strtoull(line + 5, &ptr, 0); - for (; isspace(*ptr); ++ptr) ; - if (strncmp(ptr, "size=", 5)) - return -EINVAL; - size = simple_strtoull(ptr + 5, &ptr, 0); - if ((base & 0xfff) || (size & 0xfff)) - return -EINVAL; - for (; isspace(*ptr); ++ptr) ; - if (strncmp(ptr, "type=", 5)) - return -EINVAL; - ptr += 5; - for (; isspace(*ptr); ++ptr) ; - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = - mtrr_add_page((unsigned long) base, (unsigned long) size, i, - 1); - if (err < 0) - return err; - return len; - } - return -EINVAL; -} - -static long -mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) -{ - int err = 0; - mtrr_type type; - unsigned long size; - struct mtrr_sentry sentry; - struct mtrr_gentry gentry; - void __user *arg = (void __user *) __arg; - - switch (cmd) { - case MTRRIOC_ADD_ENTRY: - case MTRRIOC_SET_ENTRY: - case MTRRIOC_DEL_ENTRY: - case MTRRIOC_KILL_ENTRY: - case MTRRIOC_ADD_PAGE_ENTRY: - case MTRRIOC_SET_PAGE_ENTRY: - case MTRRIOC_DEL_PAGE_ENTRY: - case MTRRIOC_KILL_PAGE_ENTRY: - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; - break; - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: - case MTRRIOC32_SET_ENTRY: - case MTRRIOC32_DEL_ENTRY: - case MTRRIOC32_KILL_ENTRY: - case MTRRIOC32_ADD_PAGE_ENTRY: - case MTRRIOC32_SET_PAGE_ENTRY: - case MTRRIOC32_DEL_PAGE_ENTRY: - case MTRRIOC32_KILL_PAGE_ENTRY: { - struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; - err = get_user(sentry.base, &s32->base); - err |= get_user(sentry.size, &s32->size); - err |= get_user(sentry.type, &s32->type); - if (err) - return err; - break; - } - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; - err = get_user(gentry.regnum, &g32->regnum); - err |= get_user(gentry.base, &g32->base); - err |= get_user(gentry.size, &g32->size); - err |= get_user(gentry.type, &g32->type); - if (err) - return err; - break; - } -#endif - } - - switch (cmd) { - default: - return -ENOTTY; - case MTRRIOC_ADD_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, - file, 0); - break; - case MTRRIOC_SET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); - break; - case MTRRIOC_DEL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 0); - break; - case MTRRIOC_KILL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); - - /* Hide entries that go above 4GB */ - if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) - || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.base <<= PAGE_SHIFT; - gentry.size = size << PAGE_SHIFT; - gentry.type = type; - } - - break; - case MTRRIOC_ADD_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, - file, 1); - break; - case MTRRIOC_SET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); - break; - case MTRRIOC_DEL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 1); - break; - case MTRRIOC_KILL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del_page(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_PAGE_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); - /* Hide entries that would overflow */ - if (size != (__typeof__(gentry.size))size) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.size = size; - gentry.type = type; - } - break; - } - - if (err) - return err; - - switch(cmd) { - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_to_user(arg, &gentry, sizeof gentry)) - err = -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; - err = put_user(gentry.base, &g32->base); - err |= put_user(gentry.size, &g32->size); - err |= put_user(gentry.regnum, &g32->regnum); - err |= put_user(gentry.type, &g32->type); - break; - } -#endif - } - return err; -} - -static int -mtrr_close(struct inode *ino, struct file *file) -{ - int i, max; - unsigned int *fcount = FILE_FCOUNT(file); - - if (fcount != NULL) { - max = num_var_ranges; - for (i = 0; i < max; ++i) { - while (fcount[i] > 0) { - mtrr_del(i, 0, 0); - --fcount[i]; - } - } - kfree(fcount); - FILE_FCOUNT(file) = NULL; - } - return single_release(ino, file); -} - -static int mtrr_seq_show(struct seq_file *seq, void *offset); - -static int mtrr_open(struct inode *inode, struct file *file) -{ - if (!mtrr_if) - return -EIO; - if (!mtrr_if->get) - return -ENXIO; - return single_open(file, mtrr_seq_show, NULL); -} - -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, -}; - - -static struct proc_dir_entry *proc_root_mtrr; - - -static int mtrr_seq_show(struct seq_file *seq, void *offset) -{ - char factor; - int i, max, len; - mtrr_type type; - unsigned long base, size; - - len = 0; - max = num_var_ranges; - for (i = 0; i < max; i++) { - mtrr_if->get(i, &base, &size, &type); - if (size == 0) - usage_table[i] = 0; - else { - if (size < (0x100000 >> PAGE_SHIFT)) { - /* less than 1MB */ - factor = 'K'; - size <<= PAGE_SHIFT - 10; - } else { - factor = 'M'; - size >>= 20 - PAGE_SHIFT; - } - /* RED-PEN: base can be > 32bit */ - len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", - i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), usage_table[i]); - } - } - return 0; -} - -static int __init mtrr_if_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - if ((!cpu_has(c, X86_FEATURE_MTRR)) && - (!cpu_has(c, X86_FEATURE_K6_MTRR)) && - (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && - (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) - return -ENODEV; - - proc_root_mtrr = - create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); - if (proc_root_mtrr) { - proc_root_mtrr->owner = THIS_MODULE; - proc_root_mtrr->proc_fops = &mtrr_fops; - } - return 0; -} - -arch_initcall(mtrr_if_init); -#endif /* CONFIG_PROC_FS */ diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c deleted file mode 100644 index c48b6fea5ab..00000000000 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ /dev/null @@ -1,768 +0,0 @@ -/* Generic MTRR (Memory Type Range Register) driver. - - Copyright (C) 1997-2000 Richard Gooch - Copyright (c) 2002 Patrick Mochel - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. - - Source: "Pentium Pro Family Developer's Manual, Volume 3: - Operating System Writer's Guide" (Intel document number 242692), - section 11.11.7 - - This was cleaned and made readable by Patrick Mochel - on 6-7 March 2002. - Source: Intel Architecture Software Developers Manual, Volume 3: - System Programming Guide; Section 9.11. (1997 edition - PPro). -*/ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include "mtrr.h" - -u32 num_var_ranges = 0; - -unsigned int *usage_table; -static DEFINE_MUTEX(mtrr_mutex); - -u64 size_or_mask, size_and_mask; - -static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; - -struct mtrr_ops * mtrr_if = NULL; - -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - -#ifndef CONFIG_X86_64 -extern int arr3_protected; -#else -#define arr3_protected 0 -#endif - -void set_mtrr_ops(struct mtrr_ops * ops) -{ - if (ops->vendor && ops->vendor < X86_VENDOR_NUM) - mtrr_ops[ops->vendor] = ops; -} - -/* Returns non-zero if we have the write-combining memory type */ -static int have_wrcomb(void) -{ - struct pci_dev *dev; - u8 rev; - - if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { - /* ServerWorks LE chipsets < rev 6 have problems with write-combining - Don't allow it and leave room for other chipsets to be tagged */ - if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && - dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); - if (rev <= 5) { - printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - } - /* Intel 450NX errata # 23. Non ascending cacheline evictions to - write combining memory may resulting in data corruption */ - if (dev->vendor == PCI_VENDOR_ID_INTEL && - dev->device == PCI_DEVICE_ID_INTEL_82451NX) { - printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - pci_dev_put(dev); - } - return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); -} - -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(void) -{ - unsigned long config = 0, dummy; - - if (use_intel()) { - rdmsr(MTRRcap_MSR, config, dummy); - } else if (is_cpu(AMD)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - num_var_ranges = config & 0xff; -} - -static void __init init_table(void) -{ - int i, max; - - max = num_var_ranges; - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) - == NULL) { - printk(KERN_ERR "mtrr: could not allocate\n"); - return; - } - for (i = 0; i < max; i++) - usage_table[i] = 1; -} - -struct set_mtrr_data { - atomic_t count; - atomic_t gate; - unsigned long smp_base; - unsigned long smp_size; - unsigned int smp_reg; - mtrr_type smp_type; -}; - -#ifdef CONFIG_SMP - -static void ipi_handler(void *info) -/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. - [RETURNS] Nothing. -*/ -{ - struct set_mtrr_data *data = info; - unsigned long flags; - - local_irq_save(flags); - - atomic_dec(&data->count); - while(!atomic_read(&data->gate)) - cpu_relax(); - - /* The master has cleared me to execute */ - if (data->smp_reg != ~0U) - mtrr_if->set(data->smp_reg, data->smp_base, - data->smp_size, data->smp_type); - else - mtrr_if->set_all(); - - atomic_dec(&data->count); - while(atomic_read(&data->gate)) - cpu_relax(); - - atomic_dec(&data->count); - local_irq_restore(flags); -} - -#endif - -static inline int types_compatible(mtrr_type type1, mtrr_type type2) { - return type1 == MTRR_TYPE_UNCACHABLE || - type2 == MTRR_TYPE_UNCACHABLE || - (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || - (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); -} - -/** - * set_mtrr - update mtrrs on all processors - * @reg: mtrr in question - * @base: mtrr base - * @size: mtrr size - * @type: mtrr type - * - * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: - * - * 1. Send IPI to do the following: - * 2. Disable Interrupts - * 3. Wait for all procs to do so - * 4. Enter no-fill cache mode - * 5. Flush caches - * 6. Clear PGE bit - * 7. Flush all TLBs - * 8. Disable all range registers - * 9. Update the MTRRs - * 10. Enable all range registers - * 11. Flush all TLBs and caches again - * 12. Enter normal cache mode and reenable caching - * 13. Set PGE - * 14. Wait for buddies to catch up - * 15. Enable interrupts. - * - * What does that mean for us? Well, first we set data.count to the number - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each - * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it - * differently, so we call mtrr_if->set() callback and let them take care of it. - * When they're done, they again decrement data->count and wait for data.gate to - * be reset. - * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. - * Everyone then enables interrupts and we all continue on. - * - * Note that the mechanism is the same for UP systems, too; all the SMP stuff - * becomes nops. - */ -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data; - unsigned long flags; - - data.smp_reg = reg; - data.smp_base = base; - data.smp_size = size; - data.smp_type = type; - atomic_set(&data.count, num_booting_cpus() - 1); - /* make sure data.count is visible before unleashing other CPUs */ - smp_wmb(); - atomic_set(&data.gate,0); - - /* Start the ball rolling on other CPUs */ - if (smp_call_function(ipi_handler, &data, 1, 0) != 0) - panic("mtrr: timed out waiting for other CPUs\n"); - - local_irq_save(flags); - - while(atomic_read(&data.count)) - cpu_relax(); - - /* ok, reset count and toggle gate */ - atomic_set(&data.count, num_booting_cpus() - 1); - smp_wmb(); - atomic_set(&data.gate,1); - - /* do our MTRR business */ - - /* HACK! - * We use this same function to initialize the mtrrs on boot. - * The state of the boot cpu's mtrrs has been saved, and we want - * to replicate across all the APs. - * If we're doing that @reg is set to something special... - */ - if (reg != ~0U) - mtrr_if->set(reg,base,size,type); - - /* wait for the others */ - while(atomic_read(&data.count)) - cpu_relax(); - - atomic_set(&data.count, num_booting_cpus() - 1); - smp_wmb(); - atomic_set(&data.gate,0); - - /* - * Wait here for everyone to have seen the gate change - * So we're the last ones to touch 'data' - */ - while(atomic_read(&data.count)) - cpu_relax(); - - local_irq_restore(flags); -} - -/** - * mtrr_add_page - Add a memory type region - * @base: Physical base address of region in pages (in units of 4 kB!) - * @size: Physical size of region in pages (4 kB) - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ - -int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, char increment) -{ - int i, replace, error; - mtrr_type ltype; - unsigned long lbase, lsize; - - if (!mtrr_if) - return -ENXIO; - - if ((error = mtrr_if->validate_add_page(base,size,type))) - return error; - - if (type >= MTRR_NUM_TYPES) { - printk(KERN_WARNING "mtrr: type: %u invalid\n", type); - return -EINVAL; - } - - /* If the type is WC, check that this processor supports it */ - if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - printk(KERN_WARNING - "mtrr: your processor doesn't support write-combining\n"); - return -ENOSYS; - } - - if (!size) { - printk(KERN_WARNING "mtrr: zero sized request\n"); - return -EINVAL; - } - - if (base & size_or_mask || size & size_or_mask) { - printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); - return -EINVAL; - } - - error = -EINVAL; - replace = -1; - - /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); - /* Search for existing MTRR */ - mutex_lock(&mtrr_mutex); - for (i = 0; i < num_var_ranges; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) - continue; - /* At this point we know there is some kind of overlap/enclosure */ - if (base < lbase || base + size - 1 > lbase + lsize - 1) { - if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { - /* New region encloses an existing region */ - if (type == ltype) { - replace = replace == -1 ? i : -2; - continue; - } - else if (types_compatible(type, ltype)) - continue; - } - printk(KERN_WARNING - "mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%lx000\n", base, size, lbase, - lsize); - goto out; - } - /* New region is enclosed by an existing region */ - if (ltype != type) { - if (types_compatible(type, ltype)) - continue; - printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", - base, size, mtrr_attrib_to_str(ltype), - mtrr_attrib_to_str(type)); - goto out; - } - if (increment) - ++usage_table[i]; - error = i; - goto out; - } - /* Search for an empty MTRR */ - i = mtrr_if->get_free_region(base, size, replace); - if (i >= 0) { - set_mtrr(i, base, size, type); - if (likely(replace < 0)) - usage_table[i] = 1; - else { - usage_table[i] = usage_table[replace] + !!increment; - if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); - usage_table[replace] = 0; - } - } - } else - printk(KERN_INFO "mtrr: no more MTRRs available\n"); - error = i; - out: - mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); - return error; -} - -static int mtrr_check(unsigned long base, unsigned long size) -{ - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING - "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG - "mtrr: size: 0x%lx base: 0x%lx\n", size, base); - dump_stack(); - return -1; - } - return 0; -} - -/** - * mtrr_add - Add a memory type region - * @base: Physical base address of region - * @size: Physical size of region - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ - -int -mtrr_add(unsigned long base, unsigned long size, unsigned int type, - char increment) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, - increment); -} - -/** - * mtrr_del_page - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ - -int mtrr_del_page(int reg, unsigned long base, unsigned long size) -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - int error = -EINVAL; - - if (!mtrr_if) - return -ENXIO; - - max = num_var_ranges; - /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); - mutex_lock(&mtrr_mutex); - if (reg < 0) { - /* Search for existing MTRR */ - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lbase == base && lsize == size) { - reg = i; - break; - } - } - if (reg < 0) { - printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, - size); - goto out; - } - } - if (reg >= max) { - printk(KERN_WARNING "mtrr: register: %d too big\n", reg); - goto out; - } - if (is_cpu(CYRIX) && !use_intel()) { - if ((reg == 3) && arr3_protected) { - printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); - goto out; - } - } - mtrr_if->get(reg, &lbase, &lsize, <ype); - if (lsize < 1) { - printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); - goto out; - } - if (usage_table[reg] < 1) { - printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); - goto out; - } - if (--usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); - error = reg; - out: - mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); - return error; -} -/** - * mtrr_del - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ - -int -mtrr_del(int reg, unsigned long base, unsigned long size) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); -} - -EXPORT_SYMBOL(mtrr_add); -EXPORT_SYMBOL(mtrr_del); - -/* HACK ALERT! - * These should be called implicitly, but we can't yet until all the initcall - * stuff is done... - */ -extern void amd_init_mtrr(void); -extern void cyrix_init_mtrr(void); -extern void centaur_init_mtrr(void); - -static void __init init_ifs(void) -{ -#ifndef CONFIG_X86_64 - amd_init_mtrr(); - cyrix_init_mtrr(); - centaur_init_mtrr(); -#endif -} - -/* The suspend/resume methods are only for CPU without MTRR. CPU using generic - * MTRR driver doesn't require this - */ -struct mtrr_value { - mtrr_type ltype; - unsigned long lbase; - unsigned long lsize; -}; - -static struct mtrr_value * mtrr_state; - -static int mtrr_save(struct sys_device * sysdev, pm_message_t state) -{ - int i; - int size = num_var_ranges * sizeof(struct mtrr_value); - - mtrr_state = kzalloc(size,GFP_ATOMIC); - if (!mtrr_state) - return -ENOMEM; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, - &mtrr_state[i].lbase, - &mtrr_state[i].lsize, - &mtrr_state[i].ltype); - } - return 0; -} - -static int mtrr_restore(struct sys_device * sysdev) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - if (mtrr_state[i].lsize) - set_mtrr(i, - mtrr_state[i].lbase, - mtrr_state[i].lsize, - mtrr_state[i].ltype); - } - kfree(mtrr_state); - return 0; -} - - - -static struct sysdev_driver mtrr_sysdev_driver = { - .suspend = mtrr_save, - .resume = mtrr_restore, -}; - - -/** - * mtrr_bp_init - initialize mtrrs on the boot CPU - * - * This needs to be called early; before any of the other CPUs are - * initialized (i.e. before smp_init()). - * - */ -void __init mtrr_bp_init(void) -{ - init_ifs(); - - if (cpu_has_mtrr) { - mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ - size_and_mask = 0x00f00000; - - /* This is an AMD specific MSR, but we assume(hope?) that - Intel will implement it to when they extend the address - bus of the Xeon. */ - if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; - phys_addr = cpuid_eax(0x80000008) & 0xff; - /* CPUID workaround for Intel 0F33/0F34 CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) - phys_addr = 36; - - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && - boot_cpu_data.x86 == 6) { - /* VIA C* family have Intel style MTRRs, but - don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - } else { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { - /* Pre-Athlon (K6) AMD CPU MTRRs */ - mtrr_if = mtrr_ops[X86_VENDOR_AMD]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { - mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { - mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - default: - break; - } - } - - if (mtrr_if) { - set_num_var_ranges(); - init_table(); - if (use_intel()) - get_mtrr_state(); - } -} - -void mtrr_ap_init(void) -{ - unsigned long flags; - - if (!mtrr_if || !use_intel()) - return; - /* - * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, - * but this routine will be called in cpu boot time, holding the lock - * breaks it. This routine is called in two cases: 1.very earily time - * of software resume, when there absolutely isn't mtrr entry changes; - * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to - * prevent mtrr entry changes - */ - local_irq_save(flags); - - mtrr_if->set_all(); - - local_irq_restore(flags); -} - -/** - * Save current fixed-range MTRR state of the BSP - */ -void mtrr_save_state(void) -{ - int cpu = get_cpu(); - - if (cpu == 0) - mtrr_save_fixed_ranges(NULL); - else - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); - put_cpu(); -} - -static int __init mtrr_init_finialize(void) -{ - if (!mtrr_if) - return 0; - if (use_intel()) - mtrr_state_warn(); - else { - /* The CPUs haven't MTRR and seemes not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * TBD: is there any system with such CPU which supports - * suspend/resume? if no, we should remove the code. - */ - sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); - } - return 0; -} -subsys_initcall(mtrr_init_finialize); diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h deleted file mode 100644 index 289dfe6030e..00000000000 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * local mtrr defines. - */ - -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif - -#define MTRRcap_MSR 0x0fe -#define MTRRdefType_MSR 0x2ff - -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define NUM_FIXED_RANGES 88 -#define MTRRfix64K_00000_MSR 0x250 -#define MTRRfix16K_80000_MSR 0x258 -#define MTRRfix16K_A0000_MSR 0x259 -#define MTRRfix4K_C0000_MSR 0x268 -#define MTRRfix4K_C8000_MSR 0x269 -#define MTRRfix4K_D0000_MSR 0x26a -#define MTRRfix4K_D8000_MSR 0x26b -#define MTRRfix4K_E0000_MSR 0x26c -#define MTRRfix4K_E8000_MSR 0x26d -#define MTRRfix4K_F0000_MSR 0x26e -#define MTRRfix4K_F8000_MSR 0x26f - -#define MTRR_CHANGE_MASK_FIXED 0x01 -#define MTRR_CHANGE_MASK_VARIABLE 0x02 -#define MTRR_CHANGE_MASK_DEFTYPE 0x04 - -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -struct mtrr_ops { - u32 vendor; - u32 use_intel_if; -// void (*init)(void); - void (*set)(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - void (*set_all)(void); - - void (*get)(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type); - int (*get_free_region)(unsigned long base, unsigned long size, - int replace_reg); - int (*validate_add_page)(unsigned long base, unsigned long size, - unsigned int type); - int (*have_wrcomb)(void); -}; - -extern int generic_get_free_region(unsigned long base, unsigned long size, - int replace_reg); -extern int generic_validate_add_page(unsigned long base, unsigned long size, - unsigned int type); - -extern struct mtrr_ops generic_mtrr_ops; - -extern int positive_have_wrcomb(void); - -/* library functions for processor-specific routines */ -struct set_mtrr_context { - unsigned long flags; - unsigned long cr4val; - u32 deftype_lo; - u32 deftype_hi; - u32 ccr3; -}; - -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - -void set_mtrr_done(struct set_mtrr_context *ctxt); -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); - -void get_mtrr_state(void); - -extern void set_mtrr_ops(struct mtrr_ops * ops); - -extern u64 size_or_mask, size_and_mask; -extern struct mtrr_ops * mtrr_if; - -#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) -#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) - -extern unsigned int num_var_ranges; - -void mtrr_state_warn(void); -const char *mtrr_attrib_to_str(int x); -void mtrr_wrmsr(unsigned, unsigned, unsigned); - diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c deleted file mode 100644 index c9014ca4a57..00000000000 --- a/arch/i386/kernel/cpu/mtrr/state.c +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - - -/* Put the processor into a state where MTRRs can be safely set */ -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) -{ - unsigned int cr0; - - /* Disable interrupts locally */ - local_irq_save(ctxt->flags); - - if (use_intel() || is_cpu(CYRIX)) { - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - ctxt->cr4val = read_cr4(); - write_cr4(ctxt->cr4val & ~X86_CR4_PGE); - } - - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ - cr0 = read_cr0() | 0x40000000; - wbinvd(); - write_cr0(cr0); - wbinvd(); - - if (use_intel()) - /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else were excluded at the top */ - ctxt->ccr3 = getCx86(CX86_CCR3); - } -} - -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) -{ - if (use_intel()) - /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, - ctxt->deftype_hi); - else if (is_cpu(CYRIX)) - /* Cyrix ARRs - everything else were excluded at the top */ - setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); -} - -/* Restore the processor after a set_mtrr_prepare */ -void set_mtrr_done(struct set_mtrr_context *ctxt) -{ - if (use_intel() || is_cpu(CYRIX)) { - - /* Flush caches and TLBs */ - wbinvd(); - - /* Restore MTRRdefType */ - if (use_intel()) - /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, ctxt->ccr3); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(ctxt->cr4val); - } - /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); -} - diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile new file mode 100644 index 00000000000..191fc053364 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -0,0 +1,3 @@ +obj-y := main.o if.o generic.o state.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o + diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c new file mode 100644 index 00000000000..0949cdbf848 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "mtrr.h" + +static void +amd_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long low, high; + + rdmsr(MSR_K6_UWCCR, low, high); + /* Upper dword is region 1, lower is region 0 */ + if (reg == 1) + low = high; + /* The base masks off on the right alignment */ + *base = (low & 0xFFFE0000) >> PAGE_SHIFT; + *type = 0; + if (low & 1) + *type = MTRR_TYPE_UNCACHABLE; + if (low & 2) + *type = MTRR_TYPE_WRCOMB; + if (!(low & 3)) { + *size = 0; + return; + } + /* + * This needs a little explaining. The size is stored as an + * inverted mask of bits of 128K granularity 15 bits long offset + * 2 bits + * + * So to get a size we do invert the mask and add 1 to the lowest + * mask bit (4 as its 2 bits in). This gives us a size we then shift + * to turn into 128K blocks + * + * eg 111 1111 1111 1100 is 512K + * + * invert 000 0000 0000 0011 + * +1 000 0000 0000 0100 + * *128K ... + */ + low = (~low) & 0x1FFFC; + *size = (low + 4) << (15 - PAGE_SHIFT); + return; +} + +static void amd_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. + The base address of the region. + The size of the region. If this is 0 the region is disabled. + The type of the region. + If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + u32 regs[2]; + + /* + * Low is MTRR0 , High MTRR 1 + */ + rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); + /* + * Blank to disable + */ + if (size == 0) + regs[reg] = 0; + else + /* Set the register to the base, the type (off by one) and an + inverted bitmask of the size The size is the only odd + bit. We are fed say 512K We invert this and we get 111 1111 + 1111 1011 but if you subtract one and invert you get the + desired 111 1111 1111 1100 mask + + But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ + regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) + | (base << PAGE_SHIFT) | (type + 1); + + /* + * The writeback rule is quite specific. See the manual. Its + * disable local interrupts, write back the cache, set the mtrr + */ + wbinvd(); + wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); +} + +static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + /* Apply the K6 block alignment and size rules + In order + o Uncached or gathering only + o 128K or bigger block + o Power of 2 block + o base suitably aligned to the power + */ + if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) + || (size & ~(size - 1)) - size || (base & (size - 1))) + return -EINVAL; + return 0; +} + +static struct mtrr_ops amd_mtrr_ops = { + .vendor = X86_VENDOR_AMD, + .set = amd_set_mtrr, + .get = amd_get_mtrr, + .get_free_region = generic_get_free_region, + .validate_add_page = amd_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init amd_init_mtrr(void) +{ + set_mtrr_ops(&amd_mtrr_ops); + return 0; +} + +//arch_initcall(amd_mtrr_init); diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c new file mode 100644 index 00000000000..cb9aa3a7a7a --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -0,0 +1,224 @@ +#include +#include +#include +#include +#include "mtrr.h" + +static struct { + unsigned long high; + unsigned long low; +} centaur_mcr[8]; + +static u8 centaur_mcr_reserved; +static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ + +/* + * Report boot time MCR setups + */ + +static int +centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + if (centaur_mcr_reserved & (1 << i)) + continue; + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +void +mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +{ + centaur_mcr[mcr].low = lo; + centaur_mcr[mcr].high = hi; +} + +static void +centaur_get_mcr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + *base = centaur_mcr[reg].high >> PAGE_SHIFT; + *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; + *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ + if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) + *type = MTRR_TYPE_UNCACHABLE; + if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) + *type = MTRR_TYPE_WRBACK; + if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) + *type = MTRR_TYPE_WRBACK; + +} + +static void centaur_set_mcr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned long low, high; + + if (size == 0) { + /* Disable */ + high = low = 0; + } else { + high = base << PAGE_SHIFT; + if (centaur_mcr_type == 0) + low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ + else { + if (type == MTRR_TYPE_UNCACHABLE) + low = -size << PAGE_SHIFT | 0x02; /* NC */ + else + low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ + } + } + centaur_mcr[reg].high = high; + centaur_mcr[reg].low = low; + wrmsr(MSR_IDT_MCR0 + reg, low, high); +} + +#if 0 +/* + * Initialise the later (saner) Winchip MCR variant. In this version + * the BIOS can pass us the registers it has used (but not their values) + * and the control register is read/write + */ + +static void __init +centaur_mcr1_init(void) +{ + unsigned i; + u32 lo, hi; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ + lo &= ~0x1C0; /* clear key */ + lo |= 0x040; /* set key to 1 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ + } + + centaur_mcr_type = 1; + + /* + * Clear any unconfigured MCR's. + */ + + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { + if (!(lo & (1 << (9 + i)))) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + else + /* + * If the BIOS set up an MCR we cannot see it + * but we don't wish to obliterate it + */ + centaur_mcr_reserved |= (1 << i); + } + } + /* + * Throw the main write-combining switch... + * However if OOSTORE is enabled then people have already done far + * cleverer things and we should behave. + */ + + lo |= 15; /* Write combine enables */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +/* + * Initialise the original winchip with read only MCR registers + * no used bitmask for the BIOS to pass on and write only control + */ + +static void __init +centaur_mcr0_init(void) +{ + unsigned i; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + /* Clear any unconfigured MCR's. + * This way we are sure that the centaur_mcr array contains the actual + * values. The disadvantage is that any BIOS tweaks are thus undone. + * + */ + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + } + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ +} + +/* + * Initialise Winchip series MCR registers + */ + +static void __init +centaur_mcr_init(void) +{ + struct set_mtrr_context ctxt; + + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + if (boot_cpu_data.x86_model == 4) + centaur_mcr0_init(); + else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) + centaur_mcr1_init(); + + set_mtrr_done(&ctxt); +} +#endif + +static int centaur_validate_add_page(unsigned long base, + unsigned long size, unsigned int type) +{ + /* + * FIXME: Winchip2 supports uncached + */ + if (type != MTRR_TYPE_WRCOMB && + (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { + printk(KERN_WARNING + "mtrr: only write-combining%s supported\n", + centaur_mcr_type ? " and uncacheable are" + : " is"); + return -EINVAL; + } + return 0; +} + +static struct mtrr_ops centaur_mtrr_ops = { + .vendor = X86_VENDOR_CENTAUR, +// .init = centaur_mcr_init, + .set = centaur_set_mcr, + .get = centaur_get_mcr, + .get_free_region = centaur_get_free_region, + .validate_add_page = centaur_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init centaur_init_mtrr(void) +{ + set_mtrr_ops(¢aur_mtrr_ops); + return 0; +} + +//arch_initcall(centaur_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c new file mode 100644 index 00000000000..2287d4863a8 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -0,0 +1,380 @@ +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + +int arr3_protected; + +static void +cyrix_get_arr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long flags; + unsigned char arr, ccr3, rcr, shift; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* Save flags and disable interrupts */ + local_irq_save(flags); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ((unsigned char *) base)[3] = getCx86(arr); + ((unsigned char *) base)[2] = getCx86(arr + 1); + ((unsigned char *) base)[1] = getCx86(arr + 2); + rcr = getCx86(CX86_RCR_BASE + reg); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + /* Enable interrupts if it was enabled previously */ + local_irq_restore(flags); + shift = ((unsigned char *) base)[1] & 0x0f; + *base >>= PAGE_SHIFT; + + /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 + * Note: shift==0xf means 4G, this is unsupported. + */ + if (shift) + *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); + else + *size = 0; + + /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ + if (reg < 7) { + switch (rcr) { + case 1: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRBACK; + break; + case 9: + *type = MTRR_TYPE_WRCOMB; + break; + case 24: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } else { + switch (rcr) { + case 0: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRCOMB; + break; + case 9: + *type = MTRR_TYPE_WRBACK; + break; + case 25: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } +} + +static int +cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free ARR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i; + mtrr_type ltype; + unsigned long lbase, lsize; + + switch (replace_reg) { + case 7: + if (size < 0x40) + break; + case 6: + case 5: + case 4: + return replace_reg; + case 3: + if (arr3_protected) + break; + case 2: + case 1: + case 0: + return replace_reg; + } + /* If we are to set up a region >32M then look at ARR7 immediately */ + if (size > 0x2000) { + cyrix_get_arr(7, &lbase, &lsize, <ype); + if (lsize == 0) + return 7; + /* Else try ARR0-ARR6 first */ + } else { + for (i = 0; i < 7; i++) { + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((i == 3) && arr3_protected) + continue; + if (lsize == 0) + return i; + } + /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((lsize == 0) && (size >= 0x40)) + return i; + } + return -ENOSPC; +} + +static u32 cr4 = 0; +static u32 ccr3; + +static void prepare_set(void) +{ + u32 cr0; + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + /* Cyrix ARRs - everything else were excluded at the top */ + ccr3 = getCx86(CX86_CCR3); + + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); + +} + +static void post_set(void) +{ + /* Flush caches and TLBs */ + wbinvd(); + + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); +} + +static void cyrix_set_arr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned char arr, arr_type, arr_size; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ + if (reg >= 7) + size >>= 6; + + size &= 0x7fff; /* make sure arr_size <= 14 */ + for (arr_size = 0; size; arr_size++, size >>= 1) ; + + if (reg < 7) { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 1; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 9; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 24; + break; + default: + arr_type = 8; + break; + } + } else { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 0; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 8; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 25; + break; + default: + arr_type = 9; + break; + } + } + + prepare_set(); + + base <<= PAGE_SHIFT; + setCx86(arr, ((unsigned char *) &base)[3]); + setCx86(arr + 1, ((unsigned char *) &base)[2]); + setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); + setCx86(CX86_RCR_BASE + reg, arr_type); + + post_set(); +} + +typedef struct { + unsigned long base; + unsigned long size; + mtrr_type type; +} arr_state_t; + +static arr_state_t arr_state[8] = { + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} +}; + +static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; + +static void cyrix_set_all(void) +{ + int i; + + prepare_set(); + + /* the CCRs are not contiguous */ + for (i = 0; i < 4; i++) + setCx86(CX86_CCR0 + i, ccr_state[i]); + for (; i < 7; i++) + setCx86(CX86_CCR4 + i, ccr_state[i]); + for (i = 0; i < 8; i++) + cyrix_set_arr(i, arr_state[i].base, + arr_state[i].size, arr_state[i].type); + + post_set(); +} + +#if 0 +/* + * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection + * with the SMM (System Management Mode) mode. So we need the following: + * Check whether SMI_LOCK (CCR3 bit 0) is set + * if it is set, write a warning message: ARR3 cannot be changed! + * (it cannot be changed until the next processor reset) + * if it is reset, then we can change it, set all the needed bits: + * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) + * - disable access to SMM memory (CCR1 bit 2 reset) + * - disable SMM mode (CCR1 bit 1 reset) + * - disable write protection of ARR3 (CCR6 bit 1 reset) + * - (maybe) disable ARR3 + * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) + */ +static void __init +cyrix_arr_init(void) +{ + struct set_mtrr_context ctxt; + unsigned char ccr[7]; + int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; +#ifdef CONFIG_SMP + int i; +#endif + + /* flush cache and enable MAPEN */ + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + /* Save all CCRs locally */ + ccr[0] = getCx86(CX86_CCR0); + ccr[1] = getCx86(CX86_CCR1); + ccr[2] = getCx86(CX86_CCR2); + ccr[3] = ctxt.ccr3; + ccr[4] = getCx86(CX86_CCR4); + ccr[5] = getCx86(CX86_CCR5); + ccr[6] = getCx86(CX86_CCR6); + + if (ccr[3] & 1) { + ccrc[3] = 1; + arr3_protected = 1; + } else { + /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and + * access to SMM memory through ARR3 (bit 7). + */ + if (ccr[1] & 0x80) { + ccr[1] &= 0x7f; + ccrc[1] |= 0x80; + } + if (ccr[1] & 0x04) { + ccr[1] &= 0xfb; + ccrc[1] |= 0x04; + } + if (ccr[1] & 0x02) { + ccr[1] &= 0xfd; + ccrc[1] |= 0x02; + } + arr3_protected = 0; + if (ccr[6] & 0x02) { + ccr[6] &= 0xfd; + ccrc[6] = 1; /* Disable write protection of ARR3 */ + setCx86(CX86_CCR6, ccr[6]); + } + /* Disable ARR3. This is safe now that we disabled SMM. */ + /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ + } + /* If we changed CCR1 in memory, change it in the processor, too. */ + if (ccrc[1]) + setCx86(CX86_CCR1, ccr[1]); + + /* Enable ARR usage by the processor */ + if (!(ccr[5] & 0x20)) { + ccr[5] |= 0x20; + ccrc[5] = 1; + setCx86(CX86_CCR5, ccr[5]); + } +#ifdef CONFIG_SMP + for (i = 0; i < 7; i++) + ccr_state[i] = ccr[i]; + for (i = 0; i < 8; i++) + cyrix_get_arr(i, + &arr_state[i].base, &arr_state[i].size, + &arr_state[i].type); +#endif + + set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ + + if (ccrc[5]) + printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); + if (ccrc[3]) + printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); +/* + if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); + if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); + if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); +*/ + if (ccrc[6]) + printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); +} +#endif + +static struct mtrr_ops cyrix_mtrr_ops = { + .vendor = X86_VENDOR_CYRIX, +// .init = cyrix_arr_init, + .set_all = cyrix_set_all, + .set = cyrix_set_arr, + .get = cyrix_get_arr, + .get_free_region = cyrix_get_free_region, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init cyrix_init_mtrr(void) +{ + set_mtrr_ops(&cyrix_mtrr_ops); + return 0; +} + +//arch_initcall(cyrix_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c new file mode 100644 index 00000000000..56f64e34829 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -0,0 +1,509 @@ +/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong + because MTRRs can span upto 40 bits (36bits on most modern x86) */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + +struct mtrr_state { + struct mtrr_var_range *var_ranges; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +struct fixed_range_block { + int base_msr; /* start address of an MTRR block */ + int ranges; /* number of MTRRs in this block */ +}; + +static struct fixed_range_block fixed_range_blocks[] = { + { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ + { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ + { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ + {} +}; + +static unsigned long smp_changes_mask; +static struct mtrr_state mtrr_state = {}; + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "mtrr." + +static int mtrr_show; +module_param_named(show, mtrr_show, bool, 0); + +/* Get the MSR pair relating to a var range */ +static void +get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) +{ + rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); +} + +static void +get_fixed_ranges(mtrr_type * frs) +{ + unsigned int *p = (unsigned int *) frs; + int i; + + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) + rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); + for (i = 0; i < 8; i++) + rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); +} + +void mtrr_save_fixed_ranges(void *info) +{ + if (cpu_has_mtrr) + get_fixed_ranges(mtrr_state.fixed_ranges); +} + +static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) +{ + unsigned i; + + for (i = 0; i < 8; ++i, ++types, base += step) + printk(KERN_INFO "MTRR %05X-%05X %s\n", + base, base + step - 1, mtrr_attrib_to_str(*types)); +} + +/* Grab all of the MTRR state for this CPU into *state */ +void __init get_mtrr_state(void) +{ + unsigned int i; + struct mtrr_var_range *vrs; + unsigned lo, dummy; + + if (!mtrr_state.var_ranges) { + mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), + GFP_KERNEL); + if (!mtrr_state.var_ranges) + return; + } + vrs = mtrr_state.var_ranges; + + rdmsr(MTRRcap_MSR, lo, dummy); + mtrr_state.have_fixed = (lo >> 8) & 1; + + for (i = 0; i < num_var_ranges; i++) + get_mtrr_var_range(i, &vrs[i]); + if (mtrr_state.have_fixed) + get_fixed_ranges(mtrr_state.fixed_ranges); + + rdmsr(MTRRdefType_MSR, lo, dummy); + mtrr_state.def_type = (lo & 0xff); + mtrr_state.enabled = (lo & 0xc00) >> 10; + + if (mtrr_show) { + int high_width; + + printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + if (mtrr_state.have_fixed) { + printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); + print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); + for (i = 0; i < 2; ++i) + print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + for (i = 0; i < 8; ++i) + print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + } + printk(KERN_INFO "MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); + high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { + if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) + printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + else + printk(KERN_INFO "MTRR %u disabled\n", i); + } + } +} + +/* Some BIOS's are fucked and don't set all MTRRs the same! */ +void __init mtrr_state_warn(void) +{ + unsigned long mask = smp_changes_mask; + + if (!mask) + return; + if (mask & MTRR_CHANGE_MASK_FIXED) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_VARIABLE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_DEFTYPE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); + printk(KERN_INFO "mtrr: corrected configuration.\n"); +} + +/* Doesn't attempt to pass an error out to MTRR users + because it's quite complicated in some cases and probably not + worth it because the best error handling is to ignore it. */ +void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) +{ + if (wrmsr_safe(msr, a, b) < 0) + printk(KERN_ERR + "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + smp_processor_id(), msr, a, b); +} + +/** + * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs + * see AMD publication no. 24593, chapter 3.2.1 for more information + */ +static inline void k8_enable_fixed_iorrs(void) +{ + unsigned lo, hi; + + rdmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_K8_SYSCFG, lo + | K8_MTRRFIXRANGE_DRAM_ENABLE + | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); +} + +/** + * Checks and updates an fixed-range MTRR if it differs from the value it + * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. + * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information + * \param msr MSR address of the MTTR which should be checked and updated + * \param changed pointer which indicates whether the MTRR needed to be changed + * \param msrwords pointer to the MSR values which the MSR should have + */ +static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +{ + unsigned lo, hi; + + rdmsr(msr, lo, hi); + + if (lo != msrwords[0] || hi != msrwords[1]) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 15 && + ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) + k8_enable_fixed_iorrs(); + mtrr_wrmsr(msr, msrwords[0], msrwords[1]); + *changed = TRUE; + } +} + +int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +static void generic_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + unsigned int mask_lo, mask_hi, base_lo, base_hi; + + rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); + if ((mask_lo & 0x800) == 0) { + /* Invalid (i.e. free) range */ + *base = 0; + *size = 0; + *type = 0; + return; + } + + rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); + + /* Work out the shifted address mask. */ + mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) + | mask_lo >> PAGE_SHIFT; + + /* This works correctly if size is a power of two, i.e. a + contiguous range. */ + *size = -mask_lo; + *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *type = base_lo & 0xff; +} + +/** + * Checks and updates the fixed-range MTRRs if they differ from the saved set + * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() + */ +static int set_fixed_ranges(mtrr_type * frs) +{ + unsigned long long *saved = (unsigned long long *) frs; + int changed = FALSE; + int block=-1, range; + + while (fixed_range_blocks[++block].ranges) + for (range=0; range < fixed_range_blocks[block].ranges; range++) + set_fixed_range(fixed_range_blocks[block].base_msr + range, + &changed, (unsigned int *) saved++); + + return changed; +} + +/* Set the MSR pair relating to a var range. Returns TRUE if + changes are made */ +static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +{ + unsigned int lo, hi; + int changed = FALSE; + + rdmsr(MTRRphysBase_MSR(index), lo, hi); + if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) + || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + changed = TRUE; + } + + rdmsr(MTRRphysMask_MSR(index), lo, hi); + + if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) + || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + changed = TRUE; + } + return changed; +} + +static u32 deftype_lo, deftype_hi; + +static unsigned long set_mtrr_state(void) +/* [SUMMARY] Set the MTRR state for this CPU. + The MTRR state information to read. + Some relevant CPU context. + [NOTE] The CPU must already be in a safe state for MTRR changes. + [RETURNS] 0 if no changes made, else a mask indication what was changed. +*/ +{ + unsigned int i; + unsigned long change_mask = 0; + + for (i = 0; i < num_var_ranges; i++) + if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) + change_mask |= MTRR_CHANGE_MASK_VARIABLE; + + if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) + change_mask |= MTRR_CHANGE_MASK_FIXED; + + /* Set_mtrr_restore restores the old value of MTRRdefType, + so to set it we fiddle with the saved value */ + if ((deftype_lo & 0xff) != mtrr_state.def_type + || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { + deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); + change_mask |= MTRR_CHANGE_MASK_DEFTYPE; + } + + return change_mask; +} + + +static unsigned long cr4 = 0; +static DEFINE_SPINLOCK(set_atomicity_lock); + +/* + * Since we are disabling the cache don't allow any interrupts - they + * would run extremely slow and would only increase the pain. The caller must + * ensure that local interrupts are disabled and are reenabled after post_set() + * has been called. + */ + +static void prepare_set(void) __acquires(set_atomicity_lock) +{ + unsigned long cr0; + + /* Note that this is not ideal, since the cache is only flushed/disabled + for this CPU while the MTRRs are changed, but changing this requires + more invasive changes to the way the kernel boots */ + + spin_lock(&set_atomicity_lock); + + /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ + cr0 = read_cr0() | 0x40000000; /* set CD flag */ + write_cr0(cr0); + wbinvd(); + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ + __flush_tlb(); + + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); +} + +static void post_set(void) __releases(set_atomicity_lock) +{ + /* Flush TLBs (no need to flush caches - they are disabled) */ + __flush_tlb(); + + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); + spin_unlock(&set_atomicity_lock); +} + +static void generic_set_all(void) +{ + unsigned long mask, count; + unsigned long flags; + + local_irq_save(flags); + prepare_set(); + + /* Actually set the state */ + mask = set_mtrr_state(); + + post_set(); + local_irq_restore(flags); + + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) { + if (mask & 0x01) + set_bit(count, &smp_changes_mask); + mask >>= 1; + } + +} + +static void generic_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. + The base address of the region. + The size of the region. If this is 0 the region is disabled. + The type of the region. + If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; + + local_irq_save(flags); + prepare_set(); + + if (size == 0) { + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); + } else { + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); + } + + post_set(); + local_irq_restore(flags); +} + +int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + unsigned long lbase, last; + + /* For Intel PPro stepping <= 7, must be 4 MiB aligned + and not touch 0x70000000->0x7003FFFF */ + if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask <= 7) { + if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + return -EINVAL; + } + if (!(base + size < 0x70000 || base > 0x7003F) && + (type == MTRR_TYPE_WRCOMB + || type == MTRR_TYPE_WRBACK)) { + printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + return -EINVAL; + } + } + + /* Check upper bits of base and last are equal and lower bits are 0 + for base and 1 for last */ + last = base + size - 1; + for (lbase = base; !(lbase & 1) && (last & 1); + lbase = lbase >> 1, last = last >> 1) ; + if (lbase != last) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", + base, size); + return -EINVAL; + } + return 0; +} + + +static int generic_have_wrcomb(void) +{ + unsigned long config, dummy; + rdmsr(MTRRcap_MSR, config, dummy); + return (config & (1 << 10)); +} + +int positive_have_wrcomb(void) +{ + return 1; +} + +/* generic structure... + */ +struct mtrr_ops generic_mtrr_ops = { + .use_intel_if = 1, + .set_all = generic_set_all, + .get = generic_get_mtrr, + .get_free_region = generic_get_free_region, + .set = generic_set_mtrr, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = generic_have_wrcomb, +}; diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c new file mode 100644 index 00000000000..c7d8f175674 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -0,0 +1,439 @@ +#include +#include +#include +#include +#include +#include +#include + +#define LINE_SIZE 80 + +#include +#include "mtrr.h" + +/* RED-PEN: this is accessed without any locking */ +extern unsigned int *usage_table; + + +#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) + +static const char *const mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; + +const char *mtrr_attrib_to_str(int x) +{ + return (x <= 6) ? mtrr_strings[x] : "?"; +} + +#ifdef CONFIG_PROC_FS + +static int +mtrr_file_add(unsigned long base, unsigned long size, + unsigned int type, char increment, struct file *file, int page) +{ + int reg, max; + unsigned int *fcount = FILE_FCOUNT(file); + + max = num_var_ranges; + if (fcount == NULL) { + fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); + if (!fcount) + return -ENOMEM; + FILE_FCOUNT(file) = fcount; + } + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_add_page(base, size, type, 1); + if (reg >= 0) + ++fcount[reg]; + return reg; +} + +static int +mtrr_file_del(unsigned long base, unsigned long size, + struct file *file, int page) +{ + int reg; + unsigned int *fcount = FILE_FCOUNT(file); + + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_del_page(-1, base, size); + if (reg < 0) + return reg; + if (fcount == NULL) + return reg; + if (fcount[reg] < 1) + return -EINVAL; + --fcount[reg]; + return reg; +} + +/* RED-PEN: seq_file can seek now. this is ignored. */ +static ssize_t +mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) +/* Format of control line: + "base=%Lx size=%Lx type=%s" OR: + "disable=%d" +*/ +{ + int i, err; + unsigned long reg; + unsigned long long base, size; + char *ptr; + char line[LINE_SIZE]; + size_t linelen; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!len) + return -EINVAL; + memset(line, 0, LINE_SIZE); + if (len > LINE_SIZE) + len = LINE_SIZE; + if (copy_from_user(line, buf, len - 1)) + return -EFAULT; + linelen = strlen(line); + ptr = line + linelen - 1; + if (linelen && *ptr == '\n') + *ptr = '\0'; + if (!strncmp(line, "disable=", 8)) { + reg = simple_strtoul(line + 8, &ptr, 0); + err = mtrr_del_page(reg, 0, 0); + if (err < 0) + return err; + return len; + } + if (strncmp(line, "base=", 5)) + return -EINVAL; + base = simple_strtoull(line + 5, &ptr, 0); + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "size=", 5)) + return -EINVAL; + size = simple_strtoull(ptr + 5, &ptr, 0); + if ((base & 0xfff) || (size & 0xfff)) + return -EINVAL; + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "type=", 5)) + return -EINVAL; + ptr += 5; + for (; isspace(*ptr); ++ptr) ; + for (i = 0; i < MTRR_NUM_TYPES; ++i) { + if (strcmp(ptr, mtrr_strings[i])) + continue; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = + mtrr_add_page((unsigned long) base, (unsigned long) size, i, + 1); + if (err < 0) + return err; + return len; + } + return -EINVAL; +} + +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) +{ + int err = 0; + mtrr_type type; + unsigned long size; + struct mtrr_sentry sentry; + struct mtrr_gentry gentry; + void __user *arg = (void __user *) __arg; + + switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { + default: + return -ENOTTY; + case MTRRIOC_ADD_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 0); + break; + case MTRRIOC_SET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 0); + break; + case MTRRIOC_KILL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + + /* Hide entries that go above 4GB */ + if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.base <<= PAGE_SHIFT; + gentry.size = size << PAGE_SHIFT; + gentry.type = type; + } + + break; + case MTRRIOC_ADD_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 1); + break; + case MTRRIOC_SET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 1); + break; + case MTRRIOC_KILL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del_page(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_PAGE_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + /* Hide entries that would overflow */ + if (size != (__typeof__(gentry.size))size) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.size = size; + gentry.type = type; + } + break; + } + + if (err) + return err; + + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_to_user(arg, &gentry, sizeof gentry)) + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); + break; + } +#endif + } + return err; +} + +static int +mtrr_close(struct inode *ino, struct file *file) +{ + int i, max; + unsigned int *fcount = FILE_FCOUNT(file); + + if (fcount != NULL) { + max = num_var_ranges; + for (i = 0; i < max; ++i) { + while (fcount[i] > 0) { + mtrr_del(i, 0, 0); + --fcount[i]; + } + } + kfree(fcount); + FILE_FCOUNT(file) = NULL; + } + return single_release(ino, file); +} + +static int mtrr_seq_show(struct seq_file *seq, void *offset); + +static int mtrr_open(struct inode *inode, struct file *file) +{ + if (!mtrr_if) + return -EIO; + if (!mtrr_if->get) + return -ENXIO; + return single_open(file, mtrr_seq_show, NULL); +} + +static const struct file_operations mtrr_fops = { + .owner = THIS_MODULE, + .open = mtrr_open, + .read = seq_read, + .llseek = seq_lseek, + .write = mtrr_write, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, + .release = mtrr_close, +}; + + +static struct proc_dir_entry *proc_root_mtrr; + + +static int mtrr_seq_show(struct seq_file *seq, void *offset) +{ + char factor; + int i, max, len; + mtrr_type type; + unsigned long base, size; + + len = 0; + max = num_var_ranges; + for (i = 0; i < max; i++) { + mtrr_if->get(i, &base, &size, &type); + if (size == 0) + usage_table[i] = 0; + else { + if (size < (0x100000 >> PAGE_SHIFT)) { + /* less than 1MB */ + factor = 'K'; + size <<= PAGE_SHIFT - 10; + } else { + factor = 'M'; + size >>= 20 - PAGE_SHIFT; + } + /* RED-PEN: base can be > 32bit */ + len += seq_printf(seq, + "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + i, base, base >> (20 - PAGE_SHIFT), size, factor, + mtrr_attrib_to_str(type), usage_table[i]); + } + } + return 0; +} + +static int __init mtrr_if_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if ((!cpu_has(c, X86_FEATURE_MTRR)) && + (!cpu_has(c, X86_FEATURE_K6_MTRR)) && + (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && + (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) + return -ENODEV; + + proc_root_mtrr = + create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); + if (proc_root_mtrr) { + proc_root_mtrr->owner = THIS_MODULE; + proc_root_mtrr->proc_fops = &mtrr_fops; + } + return 0; +} + +arch_initcall(mtrr_if_init); +#endif /* CONFIG_PROC_FS */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c new file mode 100644 index 00000000000..c48b6fea5ab --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -0,0 +1,768 @@ +/* Generic MTRR (Memory Type Range Register) driver. + + Copyright (C) 1997-2000 Richard Gooch + Copyright (c) 2002 Patrick Mochel + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. + + Source: "Pentium Pro Family Developer's Manual, Volume 3: + Operating System Writer's Guide" (Intel document number 242692), + section 11.11.7 + + This was cleaned and made readable by Patrick Mochel + on 6-7 March 2002. + Source: Intel Architecture Software Developers Manual, Volume 3: + System Programming Guide; Section 9.11. (1997 edition - PPro). +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include "mtrr.h" + +u32 num_var_ranges = 0; + +unsigned int *usage_table; +static DEFINE_MUTEX(mtrr_mutex); + +u64 size_or_mask, size_and_mask; + +static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; + +struct mtrr_ops * mtrr_if = NULL; + +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + +#ifndef CONFIG_X86_64 +extern int arr3_protected; +#else +#define arr3_protected 0 +#endif + +void set_mtrr_ops(struct mtrr_ops * ops) +{ + if (ops->vendor && ops->vendor < X86_VENDOR_NUM) + mtrr_ops[ops->vendor] = ops; +} + +/* Returns non-zero if we have the write-combining memory type */ +static int have_wrcomb(void) +{ + struct pci_dev *dev; + u8 rev; + + if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { + /* ServerWorks LE chipsets < rev 6 have problems with write-combining + Don't allow it and leave room for other chipsets to be tagged */ + if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev <= 5) { + printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + } + /* Intel 450NX errata # 23. Non ascending cacheline evictions to + write combining memory may resulting in data corruption */ + if (dev->vendor == PCI_VENDOR_ID_INTEL && + dev->device == PCI_DEVICE_ID_INTEL_82451NX) { + printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + pci_dev_put(dev); + } + return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); +} + +/* This function returns the number of variable MTRRs */ +static void __init set_num_var_ranges(void) +{ + unsigned long config = 0, dummy; + + if (use_intel()) { + rdmsr(MTRRcap_MSR, config, dummy); + } else if (is_cpu(AMD)) + config = 2; + else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) + config = 8; + num_var_ranges = config & 0xff; +} + +static void __init init_table(void) +{ + int i, max; + + max = num_var_ranges; + if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) + == NULL) { + printk(KERN_ERR "mtrr: could not allocate\n"); + return; + } + for (i = 0; i < max; i++) + usage_table[i] = 1; +} + +struct set_mtrr_data { + atomic_t count; + atomic_t gate; + unsigned long smp_base; + unsigned long smp_size; + unsigned int smp_reg; + mtrr_type smp_type; +}; + +#ifdef CONFIG_SMP + +static void ipi_handler(void *info) +/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. + [RETURNS] Nothing. +*/ +{ + struct set_mtrr_data *data = info; + unsigned long flags; + + local_irq_save(flags); + + atomic_dec(&data->count); + while(!atomic_read(&data->gate)) + cpu_relax(); + + /* The master has cleared me to execute */ + if (data->smp_reg != ~0U) + mtrr_if->set(data->smp_reg, data->smp_base, + data->smp_size, data->smp_type); + else + mtrr_if->set_all(); + + atomic_dec(&data->count); + while(atomic_read(&data->gate)) + cpu_relax(); + + atomic_dec(&data->count); + local_irq_restore(flags); +} + +#endif + +static inline int types_compatible(mtrr_type type1, mtrr_type type2) { + return type1 == MTRR_TYPE_UNCACHABLE || + type2 == MTRR_TYPE_UNCACHABLE || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || + (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); +} + +/** + * set_mtrr - update mtrrs on all processors + * @reg: mtrr in question + * @base: mtrr base + * @size: mtrr size + * @type: mtrr type + * + * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: + * + * 1. Send IPI to do the following: + * 2. Disable Interrupts + * 3. Wait for all procs to do so + * 4. Enter no-fill cache mode + * 5. Flush caches + * 6. Clear PGE bit + * 7. Flush all TLBs + * 8. Disable all range registers + * 9. Update the MTRRs + * 10. Enable all range registers + * 11. Flush all TLBs and caches again + * 12. Enter normal cache mode and reenable caching + * 13. Set PGE + * 14. Wait for buddies to catch up + * 15. Enable interrupts. + * + * What does that mean for us? Well, first we set data.count to the number + * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait + * until it hits 0 and proceed. We set the data.gate flag and reset data.count. + * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it + * differently, so we call mtrr_if->set() callback and let them take care of it. + * When they're done, they again decrement data->count and wait for data.gate to + * be reset. + * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. + * Everyone then enables interrupts and we all continue on. + * + * Note that the mechanism is the same for UP systems, too; all the SMP stuff + * becomes nops. + */ +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data; + unsigned long flags; + + data.smp_reg = reg; + data.smp_base = base; + data.smp_size = size; + data.smp_type = type; + atomic_set(&data.count, num_booting_cpus() - 1); + /* make sure data.count is visible before unleashing other CPUs */ + smp_wmb(); + atomic_set(&data.gate,0); + + /* Start the ball rolling on other CPUs */ + if (smp_call_function(ipi_handler, &data, 1, 0) != 0) + panic("mtrr: timed out waiting for other CPUs\n"); + + local_irq_save(flags); + + while(atomic_read(&data.count)) + cpu_relax(); + + /* ok, reset count and toggle gate */ + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,1); + + /* do our MTRR business */ + + /* HACK! + * We use this same function to initialize the mtrrs on boot. + * The state of the boot cpu's mtrrs has been saved, and we want + * to replicate across all the APs. + * If we're doing that @reg is set to something special... + */ + if (reg != ~0U) + mtrr_if->set(reg,base,size,type); + + /* wait for the others */ + while(atomic_read(&data.count)) + cpu_relax(); + + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,0); + + /* + * Wait here for everyone to have seen the gate change + * So we're the last ones to touch 'data' + */ + while(atomic_read(&data.count)) + cpu_relax(); + + local_irq_restore(flags); +} + +/** + * mtrr_add_page - Add a memory type region + * @base: Physical base address of region in pages (in units of 4 kB!) + * @size: Physical size of region in pages (4 kB) + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + int i, replace, error; + mtrr_type ltype; + unsigned long lbase, lsize; + + if (!mtrr_if) + return -ENXIO; + + if ((error = mtrr_if->validate_add_page(base,size,type))) + return error; + + if (type >= MTRR_NUM_TYPES) { + printk(KERN_WARNING "mtrr: type: %u invalid\n", type); + return -EINVAL; + } + + /* If the type is WC, check that this processor supports it */ + if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -ENOSYS; + } + + if (!size) { + printk(KERN_WARNING "mtrr: zero sized request\n"); + return -EINVAL; + } + + if (base & size_or_mask || size & size_or_mask) { + printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); + return -EINVAL; + } + + error = -EINVAL; + replace = -1; + + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + /* Search for existing MTRR */ + mutex_lock(&mtrr_mutex); + for (i = 0; i < num_var_ranges; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) + continue; + /* At this point we know there is some kind of overlap/enclosure */ + if (base < lbase || base + size - 1 > lbase + lsize - 1) { + if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { + /* New region encloses an existing region */ + if (type == ltype) { + replace = replace == -1 ? i : -2; + continue; + } + else if (types_compatible(type, ltype)) + continue; + } + printk(KERN_WARNING + "mtrr: 0x%lx000,0x%lx000 overlaps existing" + " 0x%lx000,0x%lx000\n", base, size, lbase, + lsize); + goto out; + } + /* New region is enclosed by an existing region */ + if (ltype != type) { + if (types_compatible(type, ltype)) + continue; + printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + base, size, mtrr_attrib_to_str(ltype), + mtrr_attrib_to_str(type)); + goto out; + } + if (increment) + ++usage_table[i]; + error = i; + goto out; + } + /* Search for an empty MTRR */ + i = mtrr_if->get_free_region(base, size, replace); + if (i >= 0) { + set_mtrr(i, base, size, type); + if (likely(replace < 0)) + usage_table[i] = 1; + else { + usage_table[i] = usage_table[replace] + !!increment; + if (unlikely(replace != i)) { + set_mtrr(replace, 0, 0, 0); + usage_table[replace] = 0; + } + } + } else + printk(KERN_INFO "mtrr: no more MTRRs available\n"); + error = i; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} + +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + +/** + * mtrr_add - Add a memory type region + * @base: Physical base address of region + * @size: Physical size of region + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int +mtrr_add(unsigned long base, unsigned long size, unsigned int type, + char increment) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, + increment); +} + +/** + * mtrr_del_page - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int mtrr_del_page(int reg, unsigned long base, unsigned long size) +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + int error = -EINVAL; + + if (!mtrr_if) + return -ENXIO; + + max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + mutex_lock(&mtrr_mutex); + if (reg < 0) { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lbase == base && lsize == size) { + reg = i; + break; + } + } + if (reg < 0) { + printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, + size); + goto out; + } + } + if (reg >= max) { + printk(KERN_WARNING "mtrr: register: %d too big\n", reg); + goto out; + } + if (is_cpu(CYRIX) && !use_intel()) { + if ((reg == 3) && arr3_protected) { + printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); + goto out; + } + } + mtrr_if->get(reg, &lbase, &lsize, <ype); + if (lsize < 1) { + printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); + goto out; + } + if (usage_table[reg] < 1) { + printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); + goto out; + } + if (--usage_table[reg] < 1) + set_mtrr(reg, 0, 0, 0); + error = reg; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} +/** + * mtrr_del - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int +mtrr_del(int reg, unsigned long base, unsigned long size) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(mtrr_add); +EXPORT_SYMBOL(mtrr_del); + +/* HACK ALERT! + * These should be called implicitly, but we can't yet until all the initcall + * stuff is done... + */ +extern void amd_init_mtrr(void); +extern void cyrix_init_mtrr(void); +extern void centaur_init_mtrr(void); + +static void __init init_ifs(void) +{ +#ifndef CONFIG_X86_64 + amd_init_mtrr(); + cyrix_init_mtrr(); + centaur_init_mtrr(); +#endif +} + +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ +struct mtrr_value { + mtrr_type ltype; + unsigned long lbase; + unsigned long lsize; +}; + +static struct mtrr_value * mtrr_state; + +static int mtrr_save(struct sys_device * sysdev, pm_message_t state) +{ + int i; + int size = num_var_ranges * sizeof(struct mtrr_value); + + mtrr_state = kzalloc(size,GFP_ATOMIC); + if (!mtrr_state) + return -ENOMEM; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, + &mtrr_state[i].lbase, + &mtrr_state[i].lsize, + &mtrr_state[i].ltype); + } + return 0; +} + +static int mtrr_restore(struct sys_device * sysdev) +{ + int i; + + for (i = 0; i < num_var_ranges; i++) { + if (mtrr_state[i].lsize) + set_mtrr(i, + mtrr_state[i].lbase, + mtrr_state[i].lsize, + mtrr_state[i].ltype); + } + kfree(mtrr_state); + return 0; +} + + + +static struct sysdev_driver mtrr_sysdev_driver = { + .suspend = mtrr_save, + .resume = mtrr_restore, +}; + + +/** + * mtrr_bp_init - initialize mtrrs on the boot CPU + * + * This needs to be called early; before any of the other CPUs are + * initialized (i.e. before smp_init()). + * + */ +void __init mtrr_bp_init(void) +{ + init_ifs(); + + if (cpu_has_mtrr) { + mtrr_if = &generic_mtrr_ops; + size_or_mask = 0xff000000; /* 36 bits */ + size_and_mask = 0x00f00000; + + /* This is an AMD specific MSR, but we assume(hope?) that + Intel will implement it to when they extend the address + bus of the Xeon. */ + if (cpuid_eax(0x80000000) >= 0x80000008) { + u32 phys_addr; + phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + + size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfffff00000ULL; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && + boot_cpu_data.x86 == 6) { + /* VIA C* family have Intel style MTRRs, but + don't support PAE */ + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + } else { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (cpu_has_k6_mtrr) { + /* Pre-Athlon (K6) AMD CPU MTRRs */ + mtrr_if = mtrr_ops[X86_VENDOR_AMD]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CENTAUR: + if (cpu_has_centaur_mcr) { + mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CYRIX: + if (cpu_has_cyrix_arr) { + mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + default: + break; + } + } + + if (mtrr_if) { + set_num_var_ranges(); + init_table(); + if (use_intel()) + get_mtrr_state(); + } +} + +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +/** + * Save current fixed-range MTRR state of the BSP + */ +void mtrr_save_state(void) +{ + int cpu = get_cpu(); + + if (cpu == 0) + mtrr_save_fixed_ranges(NULL); + else + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); + put_cpu(); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h new file mode 100644 index 00000000000..289dfe6030e --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -0,0 +1,98 @@ +/* + * local mtrr defines. + */ + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define MTRRcap_MSR 0x0fe +#define MTRRdefType_MSR 0x2ff + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MTRRfix64K_00000_MSR 0x250 +#define MTRRfix16K_80000_MSR 0x258 +#define MTRRfix16K_A0000_MSR 0x259 +#define MTRRfix4K_C0000_MSR 0x268 +#define MTRRfix4K_C8000_MSR 0x269 +#define MTRRfix4K_D0000_MSR 0x26a +#define MTRRfix4K_D8000_MSR 0x26b +#define MTRRfix4K_E0000_MSR 0x26c +#define MTRRfix4K_E8000_MSR 0x26d +#define MTRRfix4K_F0000_MSR 0x26e +#define MTRRfix4K_F8000_MSR 0x26f + +#define MTRR_CHANGE_MASK_FIXED 0x01 +#define MTRR_CHANGE_MASK_VARIABLE 0x02 +#define MTRR_CHANGE_MASK_DEFTYPE 0x04 + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +struct mtrr_ops { + u32 vendor; + u32 use_intel_if; +// void (*init)(void); + void (*set)(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + void (*set_all)(void); + + void (*get)(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type); + int (*get_free_region)(unsigned long base, unsigned long size, + int replace_reg); + int (*validate_add_page)(unsigned long base, unsigned long size, + unsigned int type); + int (*have_wrcomb)(void); +}; + +extern int generic_get_free_region(unsigned long base, unsigned long size, + int replace_reg); +extern int generic_validate_add_page(unsigned long base, unsigned long size, + unsigned int type); + +extern struct mtrr_ops generic_mtrr_ops; + +extern int positive_have_wrcomb(void); + +/* library functions for processor-specific routines */ +struct set_mtrr_context { + unsigned long flags; + unsigned long cr4val; + u32 deftype_lo; + u32 deftype_hi; + u32 ccr3; +}; + +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +void set_mtrr_done(struct set_mtrr_context *ctxt); +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); + +void get_mtrr_state(void); + +extern void set_mtrr_ops(struct mtrr_ops * ops); + +extern u64 size_or_mask, size_and_mask; +extern struct mtrr_ops * mtrr_if; + +#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) + +extern unsigned int num_var_ranges; + +void mtrr_state_warn(void); +const char *mtrr_attrib_to_str(int x); +void mtrr_wrmsr(unsigned, unsigned, unsigned); + diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c new file mode 100644 index 00000000000..c9014ca4a57 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + + +/* Put the processor into a state where MTRRs can be safely set */ +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) +{ + unsigned int cr0; + + /* Disable interrupts locally */ + local_irq_save(ctxt->flags); + + if (use_intel() || is_cpu(CYRIX)) { + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + ctxt->cr4val = read_cr4(); + write_cr4(ctxt->cr4val & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + if (use_intel()) + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else were excluded at the top */ + ctxt->ccr3 = getCx86(CX86_CCR3); + } +} + +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) +{ + if (use_intel()) + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + ctxt->deftype_hi); + else if (is_cpu(CYRIX)) + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); +} + +/* Restore the processor after a set_mtrr_prepare */ +void set_mtrr_done(struct set_mtrr_context *ctxt) +{ + if (use_intel() || is_cpu(CYRIX)) { + + /* Flush caches and TLBs */ + wbinvd(); + + /* Restore MTRRdefType */ + if (use_intel()) + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ctxt->ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(ctxt->cr4val); + } + /* Re-enable interrupts locally (if enabled previously) */ + local_irq_restore(ctxt->flags); +} + diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index a6d8216084a..e7480509103 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -15,7 +15,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce_64.o therm_throt.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o -obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ +obj-$(CONFIG_MTRR) += ../../x86/kernel/cpu/mtrr/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o -- cgit v1.2.3-70-g09d2 From f7627e2513987bb5d4e8cb13c4e0a478352141ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:58 +0200 Subject: i386: move kernel/cpu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/Makefile_32 | 2 +- arch/i386/kernel/cpu/Makefile | 20 - arch/i386/kernel/cpu/addon_cpuid_features.c | 50 -- arch/i386/kernel/cpu/amd.c | 337 ------------ arch/i386/kernel/cpu/bugs.c | 192 ------- arch/i386/kernel/cpu/centaur.c | 471 ---------------- arch/i386/kernel/cpu/common.c | 733 ------------------------- arch/i386/kernel/cpu/cpu.h | 28 - arch/i386/kernel/cpu/cyrix.c | 463 ---------------- arch/i386/kernel/cpu/intel.c | 333 ------------ arch/i386/kernel/cpu/intel_cacheinfo.c | 806 ---------------------------- arch/i386/kernel/cpu/nexgen.c | 60 --- arch/i386/kernel/cpu/perfctr-watchdog.c | 713 ------------------------ arch/i386/kernel/cpu/proc.c | 192 ------- arch/i386/kernel/cpu/transmeta.c | 116 ---- arch/i386/kernel/cpu/umc.c | 26 - arch/x86/kernel/cpu/Makefile | 20 + arch/x86/kernel/cpu/addon_cpuid_features.c | 50 ++ arch/x86/kernel/cpu/amd.c | 337 ++++++++++++ arch/x86/kernel/cpu/bugs.c | 192 +++++++ arch/x86/kernel/cpu/centaur.c | 471 ++++++++++++++++ arch/x86/kernel/cpu/common.c | 733 +++++++++++++++++++++++++ arch/x86/kernel/cpu/cpu.h | 28 + arch/x86/kernel/cpu/cyrix.c | 463 ++++++++++++++++ arch/x86/kernel/cpu/intel.c | 333 ++++++++++++ arch/x86/kernel/cpu/intel_cacheinfo.c | 806 ++++++++++++++++++++++++++++ arch/x86/kernel/cpu/nexgen.c | 60 +++ arch/x86/kernel/cpu/perfctr-watchdog.c | 713 ++++++++++++++++++++++++ arch/x86/kernel/cpu/proc.c | 192 +++++++ arch/x86/kernel/cpu/transmeta.c | 116 ++++ arch/x86/kernel/cpu/umc.c | 26 + arch/x86_64/kernel/Makefile_64 | 6 +- 32 files changed, 4544 insertions(+), 4544 deletions(-) delete mode 100644 arch/i386/kernel/cpu/Makefile delete mode 100644 arch/i386/kernel/cpu/addon_cpuid_features.c delete mode 100644 arch/i386/kernel/cpu/amd.c delete mode 100644 arch/i386/kernel/cpu/bugs.c delete mode 100644 arch/i386/kernel/cpu/centaur.c delete mode 100644 arch/i386/kernel/cpu/common.c delete mode 100644 arch/i386/kernel/cpu/cpu.h delete mode 100644 arch/i386/kernel/cpu/cyrix.c delete mode 100644 arch/i386/kernel/cpu/intel.c delete mode 100644 arch/i386/kernel/cpu/intel_cacheinfo.c delete mode 100644 arch/i386/kernel/cpu/nexgen.c delete mode 100644 arch/i386/kernel/cpu/perfctr-watchdog.c delete mode 100644 arch/i386/kernel/cpu/proc.c delete mode 100644 arch/i386/kernel/cpu/transmeta.c delete mode 100644 arch/i386/kernel/cpu/umc.c create mode 100644 arch/x86/kernel/cpu/Makefile create mode 100644 arch/x86/kernel/cpu/addon_cpuid_features.c create mode 100644 arch/x86/kernel/cpu/amd.c create mode 100644 arch/x86/kernel/cpu/bugs.c create mode 100644 arch/x86/kernel/cpu/centaur.c create mode 100644 arch/x86/kernel/cpu/common.c create mode 100644 arch/x86/kernel/cpu/cpu.h create mode 100644 arch/x86/kernel/cpu/cyrix.c create mode 100644 arch/x86/kernel/cpu/intel.c create mode 100644 arch/x86/kernel/cpu/intel_cacheinfo.c create mode 100644 arch/x86/kernel/cpu/nexgen.c create mode 100644 arch/x86/kernel/cpu/perfctr-watchdog.c create mode 100644 arch/x86/kernel/cpu/proc.c create mode 100644 arch/x86/kernel/cpu/transmeta.c create mode 100644 arch/x86/kernel/cpu/umc.c (limited to 'arch/x86/kernel') diff --git a/arch/i386/kernel/Makefile_32 b/arch/i386/kernel/Makefile_32 index af8304b921d..5096f486d38 100644 --- a/arch/i386/kernel/Makefile_32 +++ b/arch/i386/kernel/Makefile_32 @@ -10,7 +10,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ quirks.o i8237.o topology.o alternative.o i8253_32.o tsc_32.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-y += cpu/ +obj-y += ../../x86/kernel/cpu/ obj-y += ../../x86/kernel/acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o obj-$(CONFIG_MCA) += mca_32.o diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile deleted file mode 100644 index 6687f6d5ad2..00000000000 --- a/arch/i386/kernel/cpu/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# -# Makefile for x86-compatible CPU details and quirks -# - -obj-y := common.o proc.o bugs.o - -obj-y += amd.o -obj-y += cyrix.o -obj-y += centaur.o -obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o -obj-y += nexgen.o -obj-y += umc.o - -obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ - -obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ -obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ - -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c deleted file mode 100644 index 3e91d3ee26e..00000000000 --- a/arch/i386/kernel/cpu/addon_cpuid_features.c +++ /dev/null @@ -1,50 +0,0 @@ - -/* - * Routines to indentify additional cpu features that are scattered in - * cpuid space. - */ - -#include - -#include - -struct cpuid_bit { - u16 feature; - u8 reg; - u8 bit; - u32 level; -}; - -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX -}; - -void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) -{ - u32 max_level; - u32 regs[4]; - const struct cpuid_bit *cb; - - static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, - { 0, 0, 0, 0 } - }; - - for (cb = cpuid_bits; cb->feature; cb++) { - - /* Verify that the level is valid */ - max_level = cpuid_eax(cb->level & 0xffff0000); - if (max_level < cb->level || - max_level > (cb->level | 0xffff)) - continue; - - cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], - ®s[CR_ECX], ®s[CR_EDX]); - - if (regs[cb->reg] & (1 << cb->bit)) - set_bit(cb->feature, c->x86_capability); - } -} diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c deleted file mode 100644 index dcf6bbb1c7c..00000000000 --- a/arch/i386/kernel/cpu/amd.c +++ /dev/null @@ -1,337 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -#ifdef CONFIG_X86_LOCAL_APIC -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi; - u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) - return 1; - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} -#endif - -int force_mwait __cpuinitdata; - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; - -#ifdef CONFIG_SMP - unsigned long long value; - - /* Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 15) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model==9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; - case 5: - if( c->x86_model < 6 ) - { - /* Based on AMD doc 20734R - June 2000 */ - if ( c->x86_model == 0 ) { - clear_bit(X86_FEATURE_APIC, c->x86_capability); - set_bit(X86_FEATURE_PGE, c->x86_capability); - } - break; - } - - if ( c->x86_model == 6 && c->x86_mask == 1 ) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model== 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if(mbytes>508) - mbytes=508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF)==0) { - unsigned long flags; - l=(1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask >7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if(mbytes>4092) - mbytes=4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000)==0) { - unsigned long flags; - l=((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_bit(X86_FEATURE_XMM, c->x86_capability); - } - } - - /* It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_bit(X86_FEATURE_K8, c->x86_capability); - break; - case 6: - set_bit(X86_FEATURE_K7, c->x86_capability); - break; - } - if (c->x86 >= 6) - set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); - - display_cacheinfo(c); - - if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - } - - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - } - -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distingush the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; - - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif - - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - -#ifdef CONFIG_X86_LOCAL_APIC - if (amd_apic_timer_broken()) - local_apic_timer_disabled = 1; -#endif - - if (c->x86 == 0x10 && !force_mwait) - clear_bit(X86_FEATURE_MWAIT, c->x86_capability); - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_bit(X86_FEATURE_MCE, c->x86_capability); -} - -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_init = init_amd, - .c_size_cache = amd_size_cache, -}; - -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c deleted file mode 100644 index 59266f03d1c..00000000000 --- a/arch/i386/kernel/cpu/bugs.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * arch/i386/cpu/bugs.c - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ -#include -#include -#include -#include -#include -#include -#include -#include - -static int __init no_halt(char *s) -{ - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init mca_pentium(char *s) -{ - mca_pentium_flag = 1; - return 1; -} - -__setup("mca-pentium", mca_pentium); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) - : "m" (*&x), "m" (*&y)); - if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk("OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif - -/* - * If we configured ourselves for a TSC, we'd better have one! - */ -#ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) - panic("Kernel compiled for Pentium+, requires TSC feature!"); -#endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif -} - - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c deleted file mode 100644 index 473eac883c7..00000000000 --- a/arch/i386/kernel/cpu/centaur.c +++ /dev/null @@ -1,471 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "cpu.h" - -#ifdef CONFIG_X86_OOSTORE - -static u32 __cpuinit power2(u32 x) -{ - u32 s=1; - while(s<=x) - s<<=1; - return s>>=1; -} - - -/* - * Set up an actual MCR - */ - -static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ - -static u32 __cpuinit ramtop(void) /* 16388 */ -{ - int i; - u32 top = 0; - u32 clip = 0xFFFFFFFFUL; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophy already - */ - - if (e820.map[i].type == E820_RESERVED) - { - if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* Everything below 'top' should be RAM except for the ISA hole. - Because of the limited MCR's we want to map NV/ACPI into our - MCR range for gunk in RAM - - Clip might cause us to MCR insufficient RAM but that is an - acceptable failure mode and should only bite obscure boxes with - a VESA hole at 15Mb - - The second case Clip sometimes kicks in is when the EBDA is marked - as reserved. Again we fail safe with reasonable results - */ - - if(top>clip) - top=clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ - -static int __cpuinit centaur_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while(ct high && fspace > low) - { - centaur_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } - else if(high > low) - { - centaur_mcr_insert(ct, top, high, key); - top += high; - } - else if(low > 0) - { - base -= low; - centaur_mcr_insert(ct, base, low, key); - } - else break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - - return ct; -} - -static void __cpuinit centaur_create_optimal_mcr(void) -{ - int i; - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - int used = centaur_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - - for(i=used;i<8;i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void __cpuinit winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - - int used = centaur_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for(i=0;i>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void __cpuinit winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo&=~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} -#endif /* CONFIG_X86_OOSTORE */ - -#define ACE_PRESENT (1 << 6) -#define ACE_ENABLED (1 << 7) -#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ - -#define RNG_PRESENT (1 << 2) -#define RNG_ENABLED (1 << 3) -#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ - -static void __cpuinit init_c3(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - u32 tmp = cpuid_edx(0xC0000001); - - /* enable ACE unit, if present and disabled */ - if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { - rdmsr (MSR_VIA_FCR, lo, hi); - lo |= ACE_FCR; /* enable ACE unit */ - wrmsr (MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); - } - - /* enable RNG unit, if present and disabled */ - if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { - rdmsr (MSR_VIA_RNG, lo, hi); - lo |= RNG_ENABLE; /* enable RNG unit */ - wrmsr (MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); - } - - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } - - /* Cyrix III family needs CX8 & PGE explicity enabled. */ - if (c->x86_model >=6 && c->x86_model <= 9) { - rdmsr (MSR_VIA_FCR, lo, hi); - lo |= (1<<1 | 1<<7); - wrmsr (MSR_VIA_FCR, lo, hi); - set_bit(X86_FEATURE_CX8, c->x86_capability); - } - - /* Before Nehemiah, the C3's had 3dNOW! */ - if (c->x86_model >=6 && c->x86_model <9) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - - get_model_name(c); - display_cacheinfo(c); -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ - enum { - ECX8=1<<1, - EIERRINT=1<<2, - DPM=1<<3, - DMCE=1<<4, - DSTPCLK=1<<5, - ELINEAR=1<<6, - DSMC=1<<7, - DTLOCK=1<<8, - EDCTLB=1<<8, - EMMX=1<<9, - DPDC=1<<11, - EBRPRED=1<<12, - DIC=1<<13, - DDC=1<<14, - DNA=1<<15, - ERETSTK=1<<16, - E2MMX=1<<19, - EAMD3D=1<<20, - }; - - char *name; - u32 fcr_set=0; - u32 fcr_clr=0; - u32 lo,hi,newlo; - u32 aa,bb,cc,dd; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - switch (c->x86) { - - case 5: - switch(c->x86_model) { - case 4: - name="C6"; - fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr=DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_bit(X86_FEATURE_TSC, c->x86_capability); -#ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - - The C6 original lacks weak read order - - Note 0x120 is write only on Winchip 1 */ - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif - break; - case 8: - switch(c->x86_mask) { - default: - name="2"; - break; - case 7 ... 9: - name="2A"; - break; - case 10 ... 15: - name="2B"; - break; - } - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - case 9: - name="3"; - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - default: - name="??"; - } - - rdmsr(MSR_IDT_FCR1, lo, hi); - newlo=(lo|fcr_set) & (~fcr_clr); - - if (newlo!=lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); - wrmsr(MSR_IDT_FCR1, newlo, hi ); - } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); - } - /* Emulate MTRRs using Centaur's MCR. */ - set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); - /* Report CX8 */ - set_bit(X86_FEATURE_CX8, c->x86_capability); - /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >=8) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - /* See if we can find out some more. */ - if ( cpuid_eax(0x80000000) >= 0x80000005 ) { - /* Yes, we can. */ - cpuid(0x80000005,&aa,&bb,&cc,&dd); - /* Add L1 data and code cache sizes. */ - c->x86_cache_size = (cc>>24)+(dd>>24); - } - sprintf( c->x86_model_id, "WinChip %s", name ); - break; - - case 6: - init_c3(c); - break; - } -} - -static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) - size >>= 8; - - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) - size -=1; - - return size; -} - -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { - .c_vendor = "Centaur", - .c_ident = { "CentaurHauls" }, - .c_init = init_centaur, - .c_size_cache = centaur_size_cache, -}; - -int __init centaur_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c deleted file mode 100644 index d506201d397..00000000000 --- a/arch/i386/kernel/cpu/common.c +++ /dev/null @@ -1,733 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -#include "cpu.h" - -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, - [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, - [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, - [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ - [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, - [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ - - [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, - [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_fxsr __cpuinitdata; -static int disable_x86_serial_nr __cpuinitdata = 1; -static int disable_x86_sep __cpuinitdata; - -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; - -extern int disable_pse; - -static void __cpuinit default_init(struct cpuinfo_x86 * c) -{ - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", -}; -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; - -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c,l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* Naming convention should be: [()] */ -/* This table only is used unless init_() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v,cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v,cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; - return; - } - } - } - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - - -static int __init x86_fxsr_setup(char * s) -{ - /* Tell all the other CPU's to not use it... */ - disable_x86_fxsr = 1; - - /* - * ... and clear the bits early in the boot_cpu_data - * so that the bootup process doesn't try to do this - * either. - */ - clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char * s) -{ - disable_x86_sep = 1; - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -void __init cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - if (cap0 & (1<<19)) - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; - } -} - -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - - cpu_detect(c); - - get_cpu_vendor(c, 1); -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 * c) -{ - u32 tfms, xlvl; - int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; -#ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); -#else - c->apicid = (ebx >> 24) & 0xFF; -#endif - if (c->x86_capability[0] & (1<<19)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - init_scattered_cpuid_features(c); - } - - early_intel_workaround(c); - -#ifdef CONFIG_X86_HT - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; -#endif -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { - /* Disable processor serial number */ - unsigned long lo,hi; - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_bit(X86_FEATURE_PN, c->x86_capability); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; - c->x86_clflush_size = 32; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* First of all, decide if this is a 486 or higher */ - /* It's a 486 if we can modify the AC flag */ - if ( flag_is_changeable_p(X86_EFLAGS_AC) ) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - printk(KERN_DEBUG "CPU: After generic identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - - if (this_cpu->c_identify) { - this_cpu->c_identify(c); - - printk(KERN_DEBUG "CPU: After vendor identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - } - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* TSC disabled? */ - if ( tsc_disable ) - clear_bit(X86_FEATURE_TSC, c->x86_capability); - - /* FXSR disabled? */ - if (disable_x86_fxsr) { - clear_bit(X86_FEATURE_FXSR, c->x86_capability); - clear_bit(X86_FEATURE_XMM, c->x86_capability); - } - - /* SEP disabled? */ - if (disable_x86_sep) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - if (disable_pse) - clear_bit(X86_FEATURE_PSE, c->x86_capability); - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After all inits, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Init Machine Check Exception if available. */ - mcheck_init(c); -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); - sysenter_setup(); - enable_sep_cpu(); - mtrr_bp_init(); -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); - enable_sep_cpu(); - mtrr_ap_init(); -} - -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -/* This is hacky. :) - * We're emulating future behavior. - * In the future, the cpu-specific init functions will be called implicitly - * via the magic of initcalls. - * They will insert themselves into the cpu_devs structure. - * Then, when cpu_init() is called, we can just iterate over that array. - */ - -extern int intel_cpu_init(void); -extern int cyrix_init_cpu(void); -extern int nsc_init_cpu(void); -extern int amd_init_cpu(void); -extern int centaur_init_cpu(void); -extern int transmeta_init_cpu(void); -extern int nexgen_init_cpu(void); -extern int umc_init_cpu(void); - -void __init early_cpu_init(void) -{ - intel_cpu_init(); - cyrix_init_cpu(); - nsc_init_cpu(); - amd_init_cpu(); - centaur_init_cpu(); - transmeta_init_cpu(); - nexgen_init_cpu(); - umc_init_cpu(); - early_cpu_detect(); - -#ifdef CONFIG_DEBUG_PAGEALLOC - /* pse is not compatible with on-the-fly unmapping, - * disable it even if the cpus claim to support it. - */ - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; -#endif -} - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __devinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->xfs = __KERNEL_PERCPU; - return regs; -} - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct Xgt_desc_struct gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_esp0(t, thread); - set_tss_desc(cpu,t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h deleted file mode 100644 index 2f6432cef6f..00000000000 --- a/arch/i386/kernel/cpu/cpu.h +++ /dev/null @@ -1,28 +0,0 @@ - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* attempt to consolidate cpu attributes */ -struct cpu_dev { - char * c_vendor; - - /* some have two possibilities for cpuid string */ - char * c_ident[2]; - - struct cpu_model_info c_models[4]; - - void (*c_init)(struct cpuinfo_x86 * c); - void (*c_identify)(struct cpuinfo_x86 * c); - unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); -}; - -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; - -extern int get_model_name(struct cpuinfo_x86 *c); -extern void display_cacheinfo(struct cpuinfo_x86 *c); - -extern void early_intel_workaround(struct cpuinfo_x86 *c); - diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c deleted file mode 100644 index 122d2d75aa9..00000000000 --- a/arch/i386/kernel/cpu/cyrix.c +++ /dev/null @@ -1,463 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU - */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) -{ - unsigned char ccr2, ccr3; - unsigned long flags; - - /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, ccr3 ^ 0x80); - getCx86(0xc0); /* dummy to change bus */ - - if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ - ccr2 = getCx86(CX86_CCR2); - setCx86(CX86_CCR2, ccr2 ^ 0x04); - getCx86(0xc0); /* dummy */ - - if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ - *dir0 = 0xfd; - else { /* Cx486S A step */ - setCx86(CX86_CCR2, ccr2); - *dir0 = 0xfe; - } - } - else { - setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ - - /* read DIR0 and DIR1 CPU registers */ - *dir0 = getCx86(CX86_DIR0); - *dir1 = getCx86(CX86_DIR1); - } - local_irq_restore(flags); -} - -/* - * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in - * order to identify the Cyrix CPU model after we're out of setup.c - * - * Actually since bugs.h doesn't even reference this perhaps someone should - * fix the documentation ??? - */ -static unsigned char Cx86_dir0_msb __cpuinitdata = 0; - -static char Cx86_model[][9] __cpuinitdata = { - "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", - "M II ", "Unknown" -}; -static char Cx486_name[][5] __cpuinitdata = { - "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", - "SRx2", "DRx2" -}; -static char Cx486S_name[][4] __cpuinitdata = { - "S", "S2", "Se", "S2e" -}; -static char Cx486D_name[][4] __cpuinitdata = { - "DX", "DX2", "?", "?", "?", "DX4" -}; -static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __cpuinitdata = "12??43"; -static char cyrix_model_mult2[] __cpuinitdata = "12233445"; - -/* - * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old - * BIOSes for compatibility with DOS games. This makes the udelay loop - * work correctly, and improves performance. - * - * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP - */ - -extern void calibrate_delay(void) __init; - -static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) -{ - unsigned long flags; - - if (Cx86_dir0_msb == 3) { - unsigned char ccr3, ccr5; - - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr5 = getCx86(CX86_CCR5); - if (ccr5 & 2) - setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - - if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); - calibrate_delay(); - c->loops_per_jiffy = loops_per_jiffy; - } - } -} - - -static void __cpuinit set_cx86_reorder(void) -{ - u8 ccr3; - - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ - - /* Load/Store Serialize to mem access disable (=reorder it)  */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); - /* set load/store serialize from 1GB to 4GB */ - ccr3 |= 0xe0; - setCx86(CX86_CCR3, ccr3); -} - -static void __cpuinit set_cx86_memwb(void) -{ - u32 cr0; - - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); - - /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); - /* set 'Not Write-through' */ - cr0 = 0x20000000; - write_cr0(read_cr0() | cr0); - /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); -} - -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - -/* - * Configure later MediaGX and/or Geode processor. - */ - -static void __cpuinit geode_configure(void) -{ - unsigned long flags; - u8 ccr3; - local_irq_save(flags); - - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - - - /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - set_cx86_memwb(); - set_cx86_reorder(); - set_cx86_inc(); - - local_irq_restore(flags); -} - - -static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) -{ - unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; - char *buf = c->x86_model_id; - const char *p = NULL; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if ( test_bit(1*32+24, c->x86_capability) ) { - clear_bit(1*32+24, c->x86_capability); - set_bit(X86_FEATURE_CXMMX, c->x86_capability); - } - - do_cyrix_devid(&dir0, &dir1); - - check_cx686_slop(c); - - Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ - dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ - - /* common case step number/rev -- exceptions handled below */ - c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; - - /* Now cook; the original recipe is by Channing Corn, from Cyrix. - * We do the same thing for each generation: we work out - * the model, multiplier and stepping. Black magic included, - * to make the silicon step/rev numbers match the printed ones. - */ - - switch (dir0_msn) { - unsigned char tmp; - - case 0: /* Cx486SLC/DLC/SRx/DRx */ - p = Cx486_name[dir0_lsn & 7]; - break; - - case 1: /* Cx486S/DX/DX2/DX4 */ - p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] - : Cx486S_name[dir0_lsn & 3]; - break; - - case 2: /* 5x86 */ - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - p = Cx86_cb+2; - break; - - case 3: /* 6x86/6x86L */ - Cx86_cb[1] = ' '; - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - if (dir1 > 0x21) { /* 686L */ - Cx86_cb[0] = 'L'; - p = Cx86_cb; - (c->x86_model)++; - } else /* 686 */ - p = Cx86_cb+1; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - /* 6x86's contain this bug */ - c->coma_bug = 1; - break; - - case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ -#ifdef CONFIG_PCI - { - u32 vendor, device; - /* It isn't really a PCI quirk directly, but the cure is the - same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It thows away the fifo on disable_dma() which - is wrong and ruins the audio. - - Bug2: VSA1 has a wrap bug so that using maximum sized DMA - causes bad things. According to NatSemi VSA2 has another - bug to do with 'hlt'. I've not seen any boards using VSA2 - and X doesn't seem to support it either so who cares 8). - VSA1 we work around however. - */ - - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); - isa_dma_bridge_buggy = 2; - - /* We do this before the PCI layer is running. However we - are safe here as we know the bridge must be a Cyrix - companion and must be present */ - vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); - device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); - - /* - * The 5510/5520 companion chips have a funky PIT. - */ - if (vendor == PCI_VENDOR_ID_CYRIX && - (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) - mark_tsc_unstable("cyrix 5510/5520 detected"); - } -#endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ - - /* GXm supports extended cpuid levels 'ala' AMD */ - if (c->cpuid_level == 2) { - /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); - - /* - * GXm : 0x30 ... 0x5f GXm datasheet 51 - * GXlv: 0x6x GXlv datasheet 54 - * ? : 0x7x - * GX1 : 0x8x GX1 datasheet 56 - */ - if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) - geode_configure(); - get_model_name(c); /* get CPU marketing name */ - return; - } - else { /* MediaGX */ - Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; - p = Cx86_cb+2; - c->x86_model = (dir1 & 0x20) ? 1 : 2; - } - break; - - case 5: /* 6x86MX/M II */ - if (dir1 > 7) - { - dir0_msn++; /* M II */ - /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); - } - else - { - c->coma_bug = 1; /* 6x86MX, it has the bug. */ - } - tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; - Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; - p = Cx86_cb+tmp; - if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) - (c->x86_model)++; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - break; - - case 0xf: /* Cyrix 486 without DEVID registers */ - switch (dir0_lsn) { - case 0xd: /* either a 486SLC or DLC w/o DEVID */ - dir0_msn = 0; - p = Cx486_name[(c->hard_math) ? 1 : 0]; - break; - - case 0xe: /* a 486S A step */ - dir0_msn = 0; - p = Cx486S_name[0]; - break; - } - break; - - default: /* unknown (shouldn't happen, we know everyone ;-) */ - dir0_msn = 7; - break; - } - strcpy(buf, Cx86_model[dir0_msn & 7]); - if (p) strcat(buf, p); - return; -} - -/* - * Handle National Semiconductor branded processors - */ -static void __cpuinit init_nsc(struct cpuinfo_x86 *c) -{ - /* There may be GX1 processors in the wild that are branded - * NSC and not Cyrix. - * - * This function only handles the GX processor, and kicks every - * thing else to the Cyrix init function above - that should - * cover any processors that might have been branded differently - * after NSC acquired Cyrix. - * - * If this breaks your GX1 horribly, please e-mail - * info-linux@ldcmail.amd.com to tell us. - */ - - /* Handle the GX (Formally known as the GX2) */ - - if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); - else - init_cyrix(c); -} - -/* - * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected - * by the fact that they preserve the flags across the division of 5/2. - * PII and PPro exhibit this behavior too, but they have cpuid available. - */ - -/* - * Perform the Cyrix 5/2 test. A Cyrix won't change - * the flags, while other 486 chips will. - */ -static inline int test_cyrix_52div(void) -{ - unsigned int test; - - __asm__ __volatile__( - "sahf\n\t" /* clear flags (%eax = 0x0005) */ - "div %b2\n\t" /* divide 5 by 2 */ - "lahf" /* store flags into %ah */ - : "=a" (test) - : "0" (5), "q" (2) - : "cc"); - - /* AH is 0x02 on Cyrix after the divide.. */ - return (unsigned char) (test >> 8) == 0x02; -} - -static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) -{ - /* Detect Cyrix with disabled CPUID */ - if ( c->x86 == 4 && test_cyrix_52div() ) { - unsigned char dir0, dir1; - - strcpy(c->x86_vendor_id, "CyrixInstead"); - c->x86_vendor = X86_VENDOR_CYRIX; - - /* Actually enable cpuid on the older cyrix */ - - /* Retrieve CPU revisions */ - - do_cyrix_devid(&dir0, &dir1); - - dir0>>=4; - - /* Check it is an affected model */ - - if (dir0 == 5 || dir0 == 3) - { - unsigned char ccr3; - unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - } - } -} - -static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { - .c_vendor = "Cyrix", - .c_ident = { "CyrixInstead" }, - .c_init = init_cyrix, - .c_identify = cyrix_identify, -}; - -int __init cyrix_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; - return 0; -} - -static struct cpu_dev nsc_cpu_dev __cpuinitdata = { - .c_vendor = "NSC", - .c_ident = { "Geode by NSC" }, - .c_init = init_nsc, -}; - -int __init nsc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; - return 0; -} - diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c deleted file mode 100644 index dc4e08147b1..00000000000 --- a/arch/i386/kernel/cpu/intel.c +++ /dev/null @@ -1,333 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -extern int trap_init_f00f_bug(void); - -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - -void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) -{ - if (c->x86_vendor != X86_VENDOR_INTEL) - return; - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -} - -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled = 0; - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - select_idle_routine(c); - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9 ) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); - - c->x86_max_cores = num_cpu_cores(c); - - detect_ht(c); - - /* Work around errata */ - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - - if (c->x86 == 15) { - set_bit(X86_FEATURE_P4, c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); - } - if (c->x86 == 6) - set_bit(X86_FEATURE_P3, c->x86_capability); - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_bit(X86_FEATURE_BTS, c->x86_capability); - if (!(l1 & (1<<12))) - set_bit(X86_FEATURE_PEBS, c->x86_capability); - } -} - -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_init = init_intel, - .c_size_cache = intel_size_cache, -}; - -__init int intel_cpu_init(void) -{ - cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; - return 0; -} - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -// arch_initcall(intel_cpu_init); - diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c deleted file mode 100644 index db6c25aa577..00000000000 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Routines to indentify caches on Intel CPU. - * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj : Work with CPU hotplug infrastructure. - * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define LVL_1_INST 1 -#define LVL_1_DATA 2 -#define LVL_2 3 -#define LVL_3 4 -#define LVL_TRACE 5 - -struct _cache_table -{ - unsigned char descriptor; - char cache_type; - short size; -}; - -/* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __cpuinitdata = -{ - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ - { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ - { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ - { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ - { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ - { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0x00, 0, 0} -}; - - -enum _cache_type -{ - CACHE_TYPE_NULL = 0, - CACHE_TYPE_DATA = 1, - CACHE_TYPE_INST = 2, - CACHE_TYPE_UNIFIED = 3 -}; - -union _cpuid4_leaf_eax { - struct { - enum _cache_type type:5; - unsigned int level:3; - unsigned int is_self_initializing:1; - unsigned int is_fully_associative:1; - unsigned int reserved:4; - unsigned int num_threads_sharing:12; - unsigned int num_cores_on_die:6; - } split; - u32 full; -}; - -union _cpuid4_leaf_ebx { - struct { - unsigned int coherency_line_size:12; - unsigned int physical_line_partition:10; - unsigned int ways_of_associativity:10; - } split; - u32 full; -}; - -union _cpuid4_leaf_ecx { - struct { - unsigned int number_of_sets:32; - } split; - u32 full; -}; - -struct _cpuid4_info { - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned long size; - cpumask_t shared_cpu_map; -}; - -unsigned short num_cache_leaves; - -/* AMD doesn't have CPUID4. Emulate it here to report the same - information to the user. This makes some assumptions about the machine: - L2 not shared, no SMT etc. that is currently true on AMD CPUs. - - In theory the TLBs could be reported as fake type (they are in "dummy"). - Maybe later */ -union l1_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 8; - unsigned assoc : 8; - unsigned size_in_kb : 8; - }; - unsigned val; -}; - -union l2_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned size_in_kb : 16; - }; - unsigned val; -}; - -union l3_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned res : 2; - unsigned size_encoded : 14; - }; - unsigned val; -}; - -static const unsigned short assocs[] = { - [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, [0xa] = 32, [0xb] = 48, - [0xc] = 64, - [0xf] = 0xffff // ?? -}; - -static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; - -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) -{ - unsigned dummy; - unsigned line_size, lines_per_tag, assoc, size_in_kb; - union l1_cache l1i, l1d; - union l2_cache l2; - union l3_cache l3; - union l1_cache *l1 = &l1d; - - eax->full = 0; - ebx->full = 0; - ecx->full = 0; - - cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); - cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); - - switch (leaf) { - case 1: - l1 = &l1i; - case 0: - if (!l1->val) - return; - assoc = l1->assoc; - line_size = l1->line_size; - lines_per_tag = l1->lines_per_tag; - size_in_kb = l1->size_in_kb; - break; - case 2: - if (!l2.val) - return; - assoc = l2.assoc; - line_size = l2.line_size; - lines_per_tag = l2.lines_per_tag; - /* cpu_data has errata corrections for K7 applied */ - size_in_kb = current_cpu_data.x86_cache_size; - break; - case 3: - if (!l3.val) - return; - assoc = l3.assoc; - line_size = l3.line_size; - lines_per_tag = l3.lines_per_tag; - size_in_kb = l3.size_encoded * 512; - break; - default: - return; - } - - eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; - if (leaf == 3) - eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; - else - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; - - - if (assoc == 0xf) - eax->split.is_fully_associative = 1; - ebx->split.coherency_line_size = line_size - 1; - ebx->split.ways_of_associativity = assocs[assoc] - 1; - ebx->split.physical_line_partition = lines_per_tag - 1; - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / - (ebx->split.ways_of_associativity + 1) - 1; -} - -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) -{ - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned edx; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - amd_cpuid4(index, &eax, &ebx, &ecx); - else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - if (eax.split.type == CACHE_TYPE_NULL) - return -EIO; /* better error ? */ - - this_leaf->eax = eax; - this_leaf->ebx = ebx; - this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); - return 0; -} - -static int __cpuinit find_num_cache_leaves(void) -{ - unsigned int eax, ebx, ecx, edx; - union _cpuid4_leaf_eax cache_eax; - int i = -1; - - do { - ++i; - /* Do cpuid(4) loop to find out num_cache_leaves */ - cpuid_count(4, i, &eax, &ebx, &ecx, &edx); - cache_eax.full = eax; - } while (cache_eax.split.type != CACHE_TYPE_NULL); - return i; -} - -unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); -#endif - - if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(); - is_initialized++; - } - - /* - * Whenever possible use cpuid(4), deterministic cache - * parameters cpuid leaf to find the cache details - */ - for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - - int retval; - - retval = cpuid4_cache_lookup(i, &this_leaf); - if (retval >= 0) { - switch(this_leaf.eax.split.level) { - case 1: - if (this_leaf.eax.split.type == - CACHE_TYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == - CACHE_TYPE_INST) - new_l1i = this_leaf.size/1024; - break; - case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->apicid >> index_msb; - break; - case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l3_id = c->apicid >> index_msb; - break; - default: - break; - } - } - } - } - /* - * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for - * trace cache - */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - int only_trace = 0; - - if (num_cache_leaves != 0 && c->x86 == 15) - only_trace = 1; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char k = 0; - - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) - { - if (cache_table[k].descriptor == des) { - if (only_trace && cache_table[k].cache_type != LVL_TRACE) - break; - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - case LVL_TRACE: - trace += cache_table[k].size; - break; - } - - break; - } - - k++; - } - } - } - } - - if (new_l1d) - l1d = new_l1d; - - if (new_l1i) - l1i = new_l1i; - - if (new_l2) { - l2 = new_l2; -#ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; -#endif - } - - if (new_l3) { - l3 = new_l3; -#ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; -#endif - } - - if (trace) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - - return l2; -} - -/* pointer to _cpuid4_info array (for each cache leaf) */ -static struct _cpuid4_info *cpuid4_info[NR_CPUS]; -#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) - -#ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - unsigned long num_threads_sharing; - int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; - - if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); - else { - index_msb = get_count_order(num_threads_sharing); - - for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); - if (i != cpu && cpuid4_info[i]) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); - } - } - } - } -} -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - int sibling; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); - } -} -#else -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} -#endif - -static void free_cache_attributes(unsigned int cpu) -{ - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; - cpumask_t oldmask; - - if (num_cache_leaves == 0) - return -ENOENT; - - cpuid4_info[cpu] = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (cpuid4_info[cpu] == NULL) - return -ENOMEM; - - oldmask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (retval) - goto out; - - /* Do cpuid and store the results */ - retval = 0; - for (j = 0; j < num_cache_leaves; j++) { - this_leaf = CPUID4_INFO_IDX(cpu, j); - retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) - break; - cache_shared_cpu_map_setup(cpu, j); - } - set_cpus_allowed(current, oldmask); - -out: - if (retval) - free_cache_attributes(cpu); - return retval; -} - -#ifdef CONFIG_SYSFS - -#include -#include - -extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ - -/* pointer to kobject for cpuX/cache */ -static struct kobject * cache_kobject[NR_CPUS]; - -struct _index_kobject { - struct kobject kobj; - unsigned int cpu; - unsigned short index; -}; - -/* pointer to array of kobjects for cpuX/cache/indexY */ -static struct _index_kobject *index_kobject[NR_CPUS]; -#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) - -#define show_one_plus(file_name, object, val) \ -static ssize_t show_##file_name \ - (struct _cpuid4_info *this_leaf, char *buf) \ -{ \ - return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ -} - -show_one_plus(level, eax.split.level, 0); -show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); -show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); -show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); -show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); - -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) -{ - return sprintf (buf, "%luK\n", this_leaf->size / 1024); -} - -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) -{ - char mask_str[NR_CPUS]; - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); - return sprintf(buf, "%s\n", mask_str); -} - -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { - switch(this_leaf->eax.split.type) { - case CACHE_TYPE_DATA: - return sprintf(buf, "Data\n"); - break; - case CACHE_TYPE_INST: - return sprintf(buf, "Instruction\n"); - break; - case CACHE_TYPE_UNIFIED: - return sprintf(buf, "Unified\n"); - break; - default: - return sprintf(buf, "Unknown\n"); - break; - } -} - -struct _cache_attr { - struct attribute attr; - ssize_t (*show)(struct _cpuid4_info *, char *); - ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); -}; - -#define define_one_ro(_name) \ -static struct _cache_attr _name = \ - __ATTR(_name, 0444, show_##_name, NULL) - -define_one_ro(level); -define_one_ro(type); -define_one_ro(coherency_line_size); -define_one_ro(physical_line_partition); -define_one_ro(ways_of_associativity); -define_one_ro(number_of_sets); -define_one_ro(size); -define_one_ro(shared_cpu_map); - -static struct attribute * default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - NULL -}; - -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - -static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->show ? - fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf) : - 0; - return ret; -} - -static ssize_t store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) -{ - return 0; -} - -static struct sysfs_ops sysfs_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type ktype_cache = { - .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, -}; - -static struct kobj_type ktype_percpu_entry = { - .sysfs_ops = &sysfs_ops, -}; - -static void cpuid4_cache_sysfs_exit(unsigned int cpu) -{ - kfree(cache_kobject[cpu]); - kfree(index_kobject[cpu]); - cache_kobject[cpu] = NULL; - index_kobject[cpu] = NULL; - free_cache_attributes(cpu); -} - -static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) -{ - - if (num_cache_leaves == 0) - return -ENOENT; - - detect_cache_attributes(cpu); - if (cpuid4_info[cpu] == NULL) - return -ENOENT; - - /* Allocate all required memory */ - cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(cache_kobject[cpu] == NULL)) - goto err_out; - - index_kobject[cpu] = kzalloc( - sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); - if (unlikely(index_kobject[cpu] == NULL)) - goto err_out; - - return 0; - -err_out: - cpuid4_cache_sysfs_exit(cpu); - return -ENOMEM; -} - -/* Add/Remove cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - unsigned long i, j; - struct _index_kobject *this_object; - int retval = 0; - - retval = cpuid4_cache_sysfs_init(cpu); - if (unlikely(retval < 0)) - return retval; - - cache_kobject[cpu]->parent = &sys_dev->kobj; - kobject_set_name(cache_kobject[cpu], "%s", "cache"); - cache_kobject[cpu]->ktype = &ktype_percpu_entry; - retval = kobject_register(cache_kobject[cpu]); - - for (i = 0; i < num_cache_leaves; i++) { - this_object = INDEX_KOBJECT_PTR(cpu,i); - this_object->cpu = cpu; - this_object->index = i; - this_object->kobj.parent = cache_kobject[cpu]; - kobject_set_name(&(this_object->kobj), "index%1lu", i); - this_object->kobj.ktype = &ktype_cache; - retval = kobject_register(&(this_object->kobj)); - if (unlikely(retval)) { - for (j = 0; j < i; j++) { - kobject_unregister( - &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); - } - kobject_unregister(cache_kobject[cpu]); - cpuid4_cache_sysfs_exit(cpu); - break; - } - } - return retval; -} - -static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - unsigned long i; - - if (cpuid4_info[cpu] == NULL) - return; - for (i = 0; i < num_cache_leaves; i++) { - cache_remove_shared_cpu_map(cpu, i); - kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - } - kobject_unregister(cache_kobject[cpu]); - cpuid4_cache_sysfs_exit(cpu); - return; -} - -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cache_add_dev(sys_dev); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - cache_remove_dev(sys_dev); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = -{ - .notifier_call = cacheinfo_cpu_callback, -}; - -static int __cpuinit cache_sysfs_init(void) -{ - int i; - - if (num_cache_leaves == 0) - return 0; - - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - - for_each_online_cpu(i) { - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, - (void *)(long)i); - } - - return 0; -} - -device_initcall(cache_sysfs_init); - -#endif diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c deleted file mode 100644 index 961fbe1a748..00000000000 --- a/arch/i386/kernel/cpu/nexgen.c +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx" ); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) -{ - /* Detect NexGen with old hypercode */ - if ( deep_magic_nexgen_probe() ) { - strcpy(c->x86_vendor_id, "NexGenDriven"); - } -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c deleted file mode 100644 index 93fecd4b03d..00000000000 --- a/arch/i386/kernel/cpu/perfctr-watchdog.c +++ /dev/null @@ -1,713 +0,0 @@ -/* local apic based NMI watchdog for various CPUs. - This file also handles reservation of performance counters for coordination - with other users (like oprofile). - - Note that these events normally don't tick when the CPU idles. This means - the frequency varies with CPU load. - - Original code for K7/P6 written by Keith Owens */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct nmi_watchdog_ctlblk { - unsigned int cccr_msr; - unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ - unsigned int evntsel_msr; /* the MSR to select the events to handle */ -}; - -/* Interface defining a CPU specific perfctr watchdog */ -struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); - unsigned perfctr; - unsigned evntsel; - u64 checkbit; -}; - -static struct wd_ops *wd_ops; - -/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's - * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) - */ -#define NMI_MAX_COUNTER_BITS 66 - -/* perfctr_nmi_owner tracks the ownership of the perfctr registers: - * evtsel_nmi_owner tracks the ownership of the event selection - * - different performance counters/ event selection may be reserved for - * different subsystems this reservation system just tries to coordinate - * things a little - */ -static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); -static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); - -static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); - -/* converts an msr to an appropriate reservation bit */ -static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the performance counter register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - return (msr - MSR_K7_PERFCTR0); - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_PERFCTR0); - - switch (boot_cpu_data.x86) { - case 6: - return (msr - MSR_P6_PERFCTR0); - case 15: - return (msr - MSR_P4_BPU_PERFCTR0); - } - } - return 0; -} - -/* converts an msr to an appropriate reservation bit */ -/* returns the bit offset of the event selection register */ -static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the event selection register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - return (msr - MSR_K7_EVNTSEL0); - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_EVENTSEL0); - - switch (boot_cpu_data.x86) { - case 6: - return (msr - MSR_P6_EVNTSEL0); - case 15: - return (msr - MSR_P4_BSU_ESCR0); - } - } - return 0; - -} - -/* checks for a bit availability (hack for oprofile) */ -int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) -{ - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return (!test_bit(counter, perfctr_nmi_owner)); -} - -/* checks the an msr for availability */ -int avail_to_resrv_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return (!test_bit(counter, perfctr_nmi_owner)); -} - -int reserve_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - if (!test_and_set_bit(counter, perfctr_nmi_owner)) - return 1; - return 0; -} - -void release_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - clear_bit(counter, perfctr_nmi_owner); -} - -int reserve_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - if (!test_and_set_bit(counter, evntsel_nmi_owner)) - return 1; - return 0; -} - -void release_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - clear_bit(counter, evntsel_nmi_owner); -} - -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); -EXPORT_SYMBOL(reserve_perfctr_nmi); -EXPORT_SYMBOL(release_perfctr_nmi); -EXPORT_SYMBOL(reserve_evntsel_nmi); -EXPORT_SYMBOL(release_evntsel_nmi); - -void disable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - if (atomic_read(&nmi_active) <= 0) - return; - - on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wd_ops->unreserve(); - - BUG_ON(atomic_read(&nmi_active) != 0); -} - -void enable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - /* are we already enabled */ - if (atomic_read(&nmi_active) != 0) - return; - - /* are we lapic aware */ - if (!wd_ops) - return; - if (!wd_ops->reserve()) { - printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); - return; - } - - on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); - touch_nmi_watchdog(); -} - -/* - * Activate the NMI watchdog via the local APIC. - */ - -static unsigned int adjust_for_32bit_ctr(unsigned int hz) -{ - u64 counter_val; - unsigned int retval = hz; - - /* - * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter - * are writable, with higher bits sign extending from bit 31. - * So, we can only program the counter with 31 bit values and - * 32nd bit should be 1, for 33.. to be 1. - * Find the appropriate nmi_hz - */ - counter_val = (u64)cpu_khz * 1000; - do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { - u64 count = (u64)cpu_khz * 1000; - do_div(count, 0x7fffffffUL); - retval = count + 1; - } - return retval; -} - -static void -write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(perfctr_msr, 0 - count); -} - -static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsr(perfctr_msr, (u32)(-count), 0); -} - -/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface - nicely stable so there is not much variety */ - -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -static int setup_k7_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - return 1; -} - -static void single_msr_stop_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int single_msr_reserve(void) -{ - if (!reserve_perfctr_nmi(wd_ops->perfctr)) - return 0; - - if (!reserve_evntsel_nmi(wd_ops->evntsel)) { - release_perfctr_nmi(wd_ops->perfctr); - return 0; - } - return 1; -} - -static void single_msr_unreserve(void) -{ - release_evntsel_nmi(wd_ops->evntsel); - release_perfctr_nmi(wd_ops->perfctr); -} - -static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL<<47, -}; - -/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ - -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -static int setup_p6_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - /* KVM doesn't implement this MSR */ - if (wrmsr_safe(perfctr_msr, 0, 0) < 0) - return 0; - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - return 1; -} - -static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant. - * ArchPerfom/Core Duo also needs this */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); -} - -static struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL<<39, -}; - -/* Intel P4 performance counters. By far the most complicated of all. */ - -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS (1<<3) -#define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -#define P4_CCCR_OVF (1<<31) - -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - CRU_ESCR0 (with any non-null event selector) through a complemented - max threshold. [IA32-Vol3, Section 14.9.9] */ - -static int setup_p4_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr, cccr_msr; - unsigned int evntsel, cccr_val; - unsigned int misc_enable, dummy; - unsigned int ht_num; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - -#ifdef CONFIG_SMP - /* detect which hyperthread we are on */ - if (smp_num_siblings == 2) { - unsigned int ebx, apicid; - - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; - } else -#endif - ht_num = 0; - - /* performance counters are shared resources - * assign each hyperthread its own set - * (re-use the ESCR0 register, seems safe - * and keeps the cccr_val the same) - */ - if (!ht_num) { - /* logical cpu 0 */ - perfctr_msr = MSR_P4_IQ_PERFCTR0; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR0; - cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); - } else { - /* logical cpu 1 */ - perfctr_msr = MSR_P4_IQ_PERFCTR1; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); - } - - evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS - | P4_ESCR_USR; - - cccr_val |= P4_CCCR_THRESHOLD(15) - | P4_CCCR_COMPLEMENT - | P4_CCCR_COMPARE - | P4_CCCR_REQUIRED; - - wrmsr(evntsel_msr, evntsel, 0); - wrmsr(cccr_msr, cccr_val, 0); - write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = cccr_msr; - return 1; -} - -static void stop_p4_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - wrmsr(wd->cccr_msr, 0, 0); - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int p4_reserve(void) -{ - if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) - return 0; -#ifdef CONFIG_SMP - if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) - goto fail1; -#endif - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) - goto fail2; - /* RED-PEN why is ESCR1 not reserved here? */ - return 1; - fail2: -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); - fail1: -#endif - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); - return 0; -} - -static void p4_unreserve(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); -#endif - release_evntsel_nmi(MSR_P4_CRU_ESCR0); - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); -} - -static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - unsigned dummy; - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - rdmsrl(wd->cccr_msr, dummy); - dummy &= ~P4_CCCR_OVF; - wrmsrl(wd->cccr_msr, dummy); - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, - /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL<<39, -}; - -/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully - all future Intel CPUs. */ - -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - -static int setup_intel_arch_watchdog(unsigned nmi_hz) -{ - unsigned int ebx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebx indicates event present. - */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || - (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); - return 1; -} - -static struct wd_ops intel_arch_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, -}; - -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - -static void probe_nmi_watchdog(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) - return; - wd_ops = &k7_wd_ops; - break; - case X86_VENDOR_INTEL: - /* Work around Core Duo (Yonah) errata AE49 where perfctr1 - doesn't have a working enable bit. */ - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; - } - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - wd_ops = &intel_arch_wd_ops; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) - return; - - wd_ops = &p6_wd_ops; - break; - case 15: - if (boot_cpu_data.x86_model > 0x4) - return; - - wd_ops = &p4_wd_ops; - break; - default: - return; - } - break; - } -} - -/* Interface to nmi.c */ - -int lapic_watchdog_init(unsigned nmi_hz) -{ - if (!wd_ops) { - probe_nmi_watchdog(); - if (!wd_ops) - return -1; - - if (!wd_ops->reserve()) { - printk(KERN_ERR - "NMI watchdog: cannot reserve perfctrs\n"); - return -1; - } - } - - if (!(wd_ops->setup(nmi_hz))) { - printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", - raw_smp_processor_id()); - return -1; - } - - return 0; -} - -void lapic_watchdog_stop(void) -{ - if (wd_ops) - wd_ops->stop(); -} - -unsigned lapic_adjust_nmi_hz(unsigned hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - if (wd->perfctr_msr == MSR_P6_PERFCTR0 || - wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) - hz = adjust_for_32bit_ctr(hz); - return hz; -} - -int lapic_wd_event(unsigned nmi_hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - u64 ctr; - rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) { /* perfctr still running? */ - return 0; - } - wd_ops->rearm(wd, nmi_hz); - return 1; -} - -int lapic_watchdog_ok(void) -{ - return wd_ops != NULL; -} diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c deleted file mode 100644 index 1e31b6caffb..00000000000 --- a/arch/i386/kernel/cpu/proc.c +++ /dev/null @@ -1,192 +0,0 @@ -#include -#include -#include -#include -#include -#include - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ - static const char * const x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, "sync_rdtsc", - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - }; - static const char * const x86_power_flags[] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* constant_tsc - moved to flags */ - /* nothing */ - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!cpu_online(n)) - return 0; -#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( cpu_has(c, X86_FEATURE_TSC) ) { - unsigned int freq = cpufreq_quick_get(n); - if (!freq) - freq = cpu_khz; - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); -#ifdef CONFIG_X86_HT - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - for (i = 0; i < 32; i++) - if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags) && - x86_power_flags[i]) - seq_printf(m, "%s%s", - x86_power_flags[i][0]?" ":"", - x86_power_flags[i]); - else - seq_printf(m, " [%d]", i); - } - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c deleted file mode 100644 index 200fb3f9ebf..00000000000 --- a/arch/i386/kernel/cpu/transmeta.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include -#include -#include "cpu.h" - -static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) -{ - unsigned int cap_mask, uk, max, dummy; - unsigned int cms_rev1, cms_rev2; - unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; - char cpu_info[65]; - - get_model_name(c); /* Same as AMD/Cyrix */ - display_cacheinfo(c); - - /* Print CMS and CPU revision */ - max = cpuid_eax(0x80860000); - cpu_rev = 0; - if ( max >= 0x80860001 ) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); - if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", - (cpu_rev >> 24) & 0xff, - (cpu_rev >> 16) & 0xff, - (cpu_rev >> 8) & 0xff, - cpu_rev & 0xff, - cpu_freq); - } - } - if ( max >= 0x80860002 ) { - cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); - if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", - new_cpu_rev, cpu_freq); - } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", - (cms_rev1 >> 24) & 0xff, - (cms_rev1 >> 16) & 0xff, - (cms_rev1 >> 8) & 0xff, - cms_rev1 & 0xff, - cms_rev2); - } - if ( max >= 0x80860006 ) { - cpuid(0x80860003, - (void *)&cpu_info[0], - (void *)&cpu_info[4], - (void *)&cpu_info[8], - (void *)&cpu_info[12]); - cpuid(0x80860004, - (void *)&cpu_info[16], - (void *)&cpu_info[20], - (void *)&cpu_info[24], - (void *)&cpu_info[28]); - cpuid(0x80860005, - (void *)&cpu_info[32], - (void *)&cpu_info[36], - (void *)&cpu_info[40], - (void *)&cpu_info[44]); - cpuid(0x80860006, - (void *)&cpu_info[48], - (void *)&cpu_info[52], - (void *)&cpu_info[56], - (void *)&cpu_info[60]); - cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); - } - - /* Unhide possibly hidden capability flags */ - rdmsr(0x80860004, cap_mask, uk); - wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); - wrmsr(0x80860004, cap_mask, uk); - - /* All Transmeta CPUs have a constant TSC */ - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - - /* If we can run i686 user-space code, call us an i686 */ -#define USER686 ((1 << X86_FEATURE_TSC)|\ - (1 << X86_FEATURE_CX8)|\ - (1 << X86_FEATURE_CMOV)) - if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) - c->x86 = 6; - -#ifdef CONFIG_SYSCTL - /* randomize_va_space slows us down enormously; - it probably triggers retranslation of x86->native bytecode */ - randomize_va_space = 0; -#endif -} - -static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - -static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { - .c_vendor = "Transmeta", - .c_ident = { "GenuineTMx86", "TransmetaCPU" }, - .c_init = init_transmeta, - .c_identify = transmeta_identify, -}; - -int __init transmeta_init_cpu(void) -{ - cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c deleted file mode 100644 index a7a4e75bdcd..00000000000 --- a/arch/i386/kernel/cpu/umc.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include -#include -#include "cpu.h" - -/* UMC chips appear to be only either 386 or 486, so no special init takes place. - */ - -static struct cpu_dev umc_cpu_dev __cpuinitdata = { - .c_vendor = "UMC", - .c_ident = { "UMC UMC UMC" }, - .c_models = { - { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = - { - [1] = "U5D", - [2] = "U5S", - } - }, - }, -}; - -int __init umc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; - return 0; -} diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile new file mode 100644 index 00000000000..6687f6d5ad2 --- /dev/null +++ b/arch/x86/kernel/cpu/Makefile @@ -0,0 +1,20 @@ +# +# Makefile for x86-compatible CPU details and quirks +# + +obj-y := common.o proc.o bugs.o + +obj-y += amd.o +obj-y += cyrix.o +obj-y += centaur.o +obj-y += transmeta.o +obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o +obj-y += nexgen.o +obj-y += umc.o + +obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ + +obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ +obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ + +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c new file mode 100644 index 00000000000..3e91d3ee26e --- /dev/null +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -0,0 +1,50 @@ + +/* + * Routines to indentify additional cpu features that are scattered in + * cpuid space. + */ + +#include + +#include + +struct cpuid_bit { + u16 feature; + u8 reg; + u8 bit; + u32 level; +}; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ + u32 max_level; + u32 regs[4]; + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { 0, 0, 0, 0 } + }; + + for (cb = cpuid_bits; cb->feature; cb++) { + + /* Verify that the level is valid */ + max_level = cpuid_eax(cb->level & 0xffff0000); + if (max_level < cb->level || + max_level > (cb->level | 0xffff)) + continue; + + cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], + ®s[CR_ECX], ®s[CR_EDX]); + + if (regs[cb->reg] & (1 << cb->bit)) + set_bit(cb->feature, c->x86_capability); + } +} diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c new file mode 100644 index 00000000000..dcf6bbb1c7c --- /dev/null +++ b/arch/x86/kernel/cpu/amd.c @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +#ifdef CONFIG_X86_LOCAL_APIC +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + +int force_mwait __cpuinitdata; + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + int r; + +#ifdef CONFIG_SMP + unsigned long long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 4: + /* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model==9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } + break; + case 5: + if( c->x86_model < 6 ) + { + /* Based on AMD doc 20734R - June 2000 */ + if ( c->x86_model == 0 ) { + clear_bit(X86_FEATURE_APIC, c->x86_capability); + set_bit(X86_FEATURE_PGE, c->x86_capability); + } + break; + } + + if ( c->x86_model == 6 && c->x86_mask == 1 ) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model== 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if(mbytes>508) + mbytes=508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF)==0) { + unsigned long flags; + l=(1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + break; + } + + if ((c->x86_model == 8 && c->x86_mask >7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if(mbytes>4092) + mbytes=4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000)==0) { + unsigned long flags; + l=((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); + break; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; + case 6: /* An Athlon/Duron */ + + /* Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_bit(X86_FEATURE_XMM, c->x86_capability); + } + } + + /* It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + break; + } + + switch (c->x86) { + case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: + set_bit(X86_FEATURE_K8, c->x86_capability); + break; + case 6: + set_bit(X86_FEATURE_K7, c->x86_capability); + break; + } + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); + + display_cacheinfo(c); + + if (cpuid_eax(0x80000000) >= 0x80000008) { + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + } + + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } + +#ifdef CONFIG_X86_HT + /* + * On a AMD multi core setup the lower bits of the APIC id + * distingush the cores. + */ + if (c->x86_max_cores > 1) { + int cpu = smp_processor_id(); + unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; + printk(KERN_INFO "CPU %d(%d) -> Core %d\n", + cpu, c->x86_max_cores, c->cpu_core_id); + } +#endif + + if (cpuid_eax(0x80000000) >= 0x80000006) { + if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } + +#ifdef CONFIG_X86_LOCAL_APIC + if (amd_apic_timer_broken()) + local_apic_timer_disabled = 1; +#endif + + if (c->x86 == 0x10 && !force_mwait) + clear_bit(X86_FEATURE_MWAIT, c->x86_capability); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_bit(X86_FEATURE_MCE, c->x86_capability); +} + +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_init = init_amd, + .c_size_cache = amd_size_cache, +}; + +int __init amd_init_cpu(void) +{ + cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c new file mode 100644 index 00000000000..59266f03d1c --- /dev/null +++ b/arch/x86/kernel/cpu/bugs.c @@ -0,0 +1,192 @@ +/* + * arch/i386/cpu/bugs.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static int __init no_halt(char *s) +{ + boot_cpu_data.hlt_works_ok = 0; + return 1; +} + +__setup("no-hlt", no_halt); + +static int __init mca_pentium(char *s) +{ + mca_pentium_flag = 1; + return 1; +} + +__setup("mca-pentium", mca_pentium); + +static int __init no_387(char *s) +{ + boot_cpu_data.hard_math = 0; + write_cr0(0xE | read_cr0()); + return 1; +} + +__setup("no387", no_387); + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + if (!boot_cpu_data.hard_math) { +#ifndef CONFIG_MATH_EMULATION + printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); + printk(KERN_EMERG "Giving up.\n"); + for (;;) ; +#endif + return; + } + +/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ + /* Test for the divl bug.. */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&boot_cpu_data.fdiv_bug) + : "m" (*&x), "m" (*&y)); + if (boot_cpu_data.fdiv_bug) + printk("Hmm, FPU with FDIV bug.\n"); +} + +static void __init check_hlt(void) +{ + if (paravirt_enabled()) + return; + + printk(KERN_INFO "Checking 'hlt' instruction... "); + if (!boot_cpu_data.hlt_works_ok) { + printk("disabled\n"); + return; + } + halt(); + halt(); + halt(); + halt(); + printk("OK.\n"); +} + +/* + * Most 386 processors have a bug where a POPAD can lock the + * machine even from user space. + */ + +static void __init check_popad(void) +{ +#ifndef CONFIG_X86_POPAD_OK + int res, inp = (int) &res; + + printk(KERN_INFO "Checking for popad bug... "); + __asm__ __volatile__( + "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " + : "=&a" (res) + : "d" (inp) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); + else printk( "OK.\n" ); +#endif +} + +/* + * Check whether we are able to run this kernel safely on SMP. + * + * - In order to run on a i386, we need to be compiled for i386 + * (for due to lack of "invlpg" and working WP on a i386) + * - In order to run on anything without a TSC, we need to be + * compiled for a i486. + * - In order to support the local APIC on a buggy Pentium machine, + * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, + * which happens implicitly if compiled for a Pentium or lower + * (unless an advanced selection of CPU features is used) as an + * otherwise config implies a properly working local APIC without + * the need to do extra reads from the APIC. +*/ + +static void __init check_config(void) +{ +/* + * We'd better not be a i386 if we're configured to use some + * i486+ only features! (WP works in supervisor mode and the + * new "invlpg" and "bswap" instructions) + */ +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) + if (boot_cpu_data.x86 == 3) + panic("Kernel requires i486+ for 'invlpg' and other features"); +#endif + +/* + * If we configured ourselves for a TSC, we'd better have one! + */ +#ifdef CONFIG_X86_TSC + if (!cpu_has_tsc && !tsc_disable) + panic("Kernel compiled for Pentium+, requires TSC feature!"); +#endif + +/* + * If we were told we had a good local APIC, check for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL + && cpu_has_apic + && boot_cpu_data.x86 == 5 + && boot_cpu_data.x86_model == 2 + && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) + panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); +#endif +} + + +void __init check_bugs(void) +{ + identify_boot_cpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_config(); + check_fpu(); + check_hlt(); + check_popad(); + init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + alternative_instructions(); +} diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c new file mode 100644 index 00000000000..473eac883c7 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur.c @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include "cpu.h" + +#ifdef CONFIG_X86_OOSTORE + +static u32 __cpuinit power2(u32 x) +{ + u32 s=1; + while(s<=x) + s<<=1; + return s>>=1; +} + + +/* + * Set up an actual MCR + */ + +static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) +{ + u32 lo, hi; + + hi = base & ~0xFFF; + lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ + lo &= ~0xFFF; /* Remove the ctrl value bits */ + lo |= key; /* Attribute we wish to set */ + wrmsr(reg+MSR_IDT_MCR0, lo, hi); + mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ +} + +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ + +static u32 __cpuinit ramtop(void) /* 16388 */ +{ + int i; + u32 top = 0; + u32 clip = 0xFFFFFFFFUL; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophy already + */ + + if (e820.map[i].type == E820_RESERVED) + { + if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* Everything below 'top' should be RAM except for the ISA hole. + Because of the limited MCR's we want to map NV/ACPI into our + MCR range for gunk in RAM + + Clip might cause us to MCR insufficient RAM but that is an + acceptable failure mode and should only bite obscure boxes with + a VESA hole at 15Mb + + The second case Clip sometimes kicks in is when the EBDA is marked + as reserved. Again we fail safe with reasonable results + */ + + if(top>clip) + top=clip; + + return top; +} + +/* + * Compute a set of MCR's to give maximum coverage + */ + +static int __cpuinit centaur_mcr_compute(int nr, int key) +{ + u32 mem = ramtop(); + u32 root = power2(mem); + u32 base = root; + u32 top = root; + u32 floor = 0; + int ct = 0; + + while(ct high && fspace > low) + { + centaur_mcr_insert(ct, floor, fspace, key); + floor += fspace; + } + else if(high > low) + { + centaur_mcr_insert(ct, top, high, key); + top += high; + } + else if(low > 0) + { + base -= low; + centaur_mcr_insert(ct, base, low, key); + } + else break; + ct++; + } + /* + * We loaded ct values. We now need to set the mask. The caller + * must do this bit. + */ + + return ct; +} + +static void __cpuinit centaur_create_optimal_mcr(void) +{ + int i; + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining and weak write ordered. + * + * To experiment with: Linux never uses stack operations for + * mmio spaces so we could globally enable stack operation wc + * + * Load the registers with type 31 - full write combining, all + * writes weakly ordered. + */ + int used = centaur_mcr_compute(6, 31); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +static void __cpuinit winchip2_create_optimal_mcr(void) +{ + u32 lo, hi; + int i; + + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining, weak store ordered. + * + * Load the registers with type 25 + * 8 - weak write ordering + * 16 - weak read ordering + * 1 - write combining + */ + + int used = centaur_mcr_compute(6, 25); + + /* + * Mark the registers we are using. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + for(i=0;i>17) & 7; + lo |= key<<6; /* replace with unlock key */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +static void __cpuinit winchip2_protect_mcr(void) +{ + u32 lo, hi; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} +#endif /* CONFIG_X86_OOSTORE */ + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ + +static void __cpuinit init_c3(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Centaur Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= ACE_FCR; /* enable ACE unit */ + wrmsr (MSR_VIA_FCR, lo, hi); + printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + } + + /* enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr (MSR_VIA_RNG, lo, hi); + lo |= RNG_ENABLE; /* enable RNG unit */ + wrmsr (MSR_VIA_RNG, lo, hi); + printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + } + + /* store Centaur Extended Feature Flags as + * word 5 of the CPU capability bit array + */ + c->x86_capability[5] = cpuid_edx(0xC0000001); + } + + /* Cyrix III family needs CX8 & PGE explicity enabled. */ + if (c->x86_model >=6 && c->x86_model <= 9) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= (1<<1 | 1<<7); + wrmsr (MSR_VIA_FCR, lo, hi); + set_bit(X86_FEATURE_CX8, c->x86_capability); + } + + /* Before Nehemiah, the C3's had 3dNOW! */ + if (c->x86_model >=6 && c->x86_model <9) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + + get_model_name(c); + display_cacheinfo(c); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + enum { + ECX8=1<<1, + EIERRINT=1<<2, + DPM=1<<3, + DMCE=1<<4, + DSTPCLK=1<<5, + ELINEAR=1<<6, + DSMC=1<<7, + DTLOCK=1<<8, + EDCTLB=1<<8, + EMMX=1<<9, + DPDC=1<<11, + EBRPRED=1<<12, + DIC=1<<13, + DDC=1<<14, + DNA=1<<15, + ERETSTK=1<<16, + E2MMX=1<<19, + EAMD3D=1<<20, + }; + + char *name; + u32 fcr_set=0; + u32 fcr_clr=0; + u32 lo,hi,newlo; + u32 aa,bb,cc,dd; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + switch (c->x86) { + + case 5: + switch(c->x86_model) { + case 4: + name="C6"; + fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr=DPDC; + printk(KERN_NOTICE "Disabling bugged TSC.\n"); + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#ifdef CONFIG_X86_OOSTORE + centaur_create_optimal_mcr(); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + + The C6 original lacks weak read order + + Note 0x120 is write only on Winchip 1 */ + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); +#endif + break; + case 8: + switch(c->x86_mask) { + default: + name="2"; + break; + case 7 ... 9: + name="2A"; + break; + case 10 ... 15: + name="2B"; + break; + } + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + case 9: + name="3"; + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + default: + name="??"; + } + + rdmsr(MSR_IDT_FCR1, lo, hi); + newlo=(lo|fcr_set) & (~fcr_clr); + + if (newlo!=lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); + wrmsr(MSR_IDT_FCR1, newlo, hi ); + } else { + printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + } + /* Emulate MTRRs using Centaur's MCR. */ + set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + /* Report CX8 */ + set_bit(X86_FEATURE_CX8, c->x86_capability); + /* Set 3DNow! on Winchip 2 and above. */ + if (c->x86_model >=8) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + /* See if we can find out some more. */ + if ( cpuid_eax(0x80000000) >= 0x80000005 ) { + /* Yes, we can. */ + cpuid(0x80000005,&aa,&bb,&cc,&dd); + /* Add L1 data and code cache sizes. */ + c->x86_cache_size = (cc>>24)+(dd>>24); + } + sprintf( c->x86_model_id, "WinChip %s", name ); + break; + + case 6: + init_c3(c); + break; + } +} + +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) + size >>= 8; + + /* VIA also screwed up Nehemiah stepping 1, and made + it return '65KB' instead of '64KB' + - Note, it seems this may only be in engineering samples. */ + if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) + size -=1; + + return size; +} + +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { + .c_vendor = "Centaur", + .c_ident = { "CentaurHauls" }, + .c_init = init_centaur, + .c_size_cache = centaur_size_cache, +}; + +int __init centaur_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c new file mode 100644 index 00000000000..d506201d397 --- /dev/null +++ b/arch/x86/kernel/cpu/common.c @@ -0,0 +1,733 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + +#include "cpu.h" + +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, + [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, + [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, + [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ + [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, + [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ + + [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, +} }; +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; + +struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; + +extern int disable_pse; + +static void __cpuinit default_init(struct cpuinfo_x86 * c) +{ + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; + +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c,l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + +/* Naming convention should be: [()] */ +/* This table only is used unless init_() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +{ + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v,cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v,cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + if (!early) + this_cpu = cpu_devs[i]; + return; + } + } + } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; +} + + +static int __init x86_fxsr_setup(char * s) +{ + /* Tell all the other CPU's to not use it... */ + disable_x86_fxsr = 1; + + /* + * ... and clear the bits early in the boot_cpu_data + * so that the bootup process doesn't try to do this + * either. + */ + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + + +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init cpu_detect(struct cpuinfo_x86 *c) +{ + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + c->x86 = 4; + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; + if (cap0 & (1<<19)) + c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; + } +} + +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) +{ + u32 tfms, xlvl; + int ebx; + + if (have_cpuid_p()) { + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c, 0); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; +#ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + init_scattered_cpuid_features(c); + } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; +#endif +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { + /* Disable processor serial number */ + unsigned long lo,hi; + rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_bit(X86_FEATURE_PN, c->x86_capability); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; + c->x86_clflush_size = 32; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if (!have_cpuid_p()) { + /* First of all, decide if this is a 486 or higher */ + /* It's a 486 if we can modify the AC flag */ + if ( flag_is_changeable_p(X86_EFLAGS_AC) ) + c->x86 = 4; + else + c->x86 = 3; + } + + generic_identify(c); + + printk(KERN_DEBUG "CPU: After generic identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + if (this_cpu->c_identify) { + this_cpu->c_identify(c); + + printk(KERN_DEBUG "CPU: After vendor identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + } + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* TSC disabled? */ + if ( tsc_disable ) + clear_bit(X86_FEATURE_TSC, c->x86_capability); + + /* FXSR disabled? */ + if (disable_x86_fxsr) { + clear_bit(X86_FEATURE_FXSR, c->x86_capability); + clear_bit(X86_FEATURE_XMM, c->x86_capability); + } + + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + if (disable_pse) + clear_bit(X86_FEATURE_PSE, c->x86_capability); + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After all inits, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + /* Init Machine Check Exception if available. */ + mcheck_init(c); +} + +void __init identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); + sysenter_setup(); + enable_sep_cpu(); + mtrr_bp_init(); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + enable_sep_cpu(); + mtrr_ap_init(); +} + +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif + +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +/* This is hacky. :) + * We're emulating future behavior. + * In the future, the cpu-specific init functions will be called implicitly + * via the magic of initcalls. + * They will insert themselves into the cpu_devs structure. + * Then, when cpu_init() is called, we can just iterate over that array. + */ + +extern int intel_cpu_init(void); +extern int cyrix_init_cpu(void); +extern int nsc_init_cpu(void); +extern int amd_init_cpu(void); +extern int centaur_init_cpu(void); +extern int transmeta_init_cpu(void); +extern int nexgen_init_cpu(void); +extern int umc_init_cpu(void); + +void __init early_cpu_init(void) +{ + intel_cpu_init(); + cyrix_init_cpu(); + nsc_init_cpu(); + amd_init_cpu(); + centaur_init_cpu(); + transmeta_init_cpu(); + nexgen_init_cpu(); + umc_init_cpu(); + early_cpu_detect(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif +} + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->xfs = __KERNEL_PERCPU; + return regs; +} + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct Xgt_desc_struct gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_esp0(t, thread); + set_tss_desc(cpu,t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h new file mode 100644 index 00000000000..2f6432cef6f --- /dev/null +++ b/arch/x86/kernel/cpu/cpu.h @@ -0,0 +1,28 @@ + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* attempt to consolidate cpu attributes */ +struct cpu_dev { + char * c_vendor; + + /* some have two possibilities for cpuid string */ + char * c_ident[2]; + + struct cpu_model_info c_models[4]; + + void (*c_init)(struct cpuinfo_x86 * c); + void (*c_identify)(struct cpuinfo_x86 * c); + unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); +}; + +extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; + +extern int get_model_name(struct cpuinfo_x86 *c); +extern void display_cacheinfo(struct cpuinfo_x86 *c); + +extern void early_intel_workaround(struct cpuinfo_x86 *c); + diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c new file mode 100644 index 00000000000..122d2d75aa9 --- /dev/null +++ b/arch/x86/kernel/cpu/cyrix.c @@ -0,0 +1,463 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +/* + * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU + */ +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned char ccr2, ccr3; + unsigned long flags; + + /* we test for DEVID by checking whether CCR3 is writable */ + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, ccr3 ^ 0x80); + getCx86(0xc0); /* dummy to change bus */ + + if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ + ccr2 = getCx86(CX86_CCR2); + setCx86(CX86_CCR2, ccr2 ^ 0x04); + getCx86(0xc0); /* dummy */ + + if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ + *dir0 = 0xfd; + else { /* Cx486S A step */ + setCx86(CX86_CCR2, ccr2); + *dir0 = 0xfe; + } + } + else { + setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ + + /* read DIR0 and DIR1 CPU registers */ + *dir0 = getCx86(CX86_DIR0); + *dir1 = getCx86(CX86_DIR1); + } + local_irq_restore(flags); +} + +/* + * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in + * order to identify the Cyrix CPU model after we're out of setup.c + * + * Actually since bugs.h doesn't even reference this perhaps someone should + * fix the documentation ??? + */ +static unsigned char Cx86_dir0_msb __cpuinitdata = 0; + +static char Cx86_model[][9] __cpuinitdata = { + "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", + "M II ", "Unknown" +}; +static char Cx486_name[][5] __cpuinitdata = { + "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", + "SRx2", "DRx2" +}; +static char Cx486S_name[][4] __cpuinitdata = { + "S", "S2", "Se", "S2e" +}; +static char Cx486D_name[][4] __cpuinitdata = { + "DX", "DX2", "?", "?", "?", "DX4" +}; +static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __cpuinitdata = "12??43"; +static char cyrix_model_mult2[] __cpuinitdata = "12233445"; + +/* + * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old + * BIOSes for compatibility with DOS games. This makes the udelay loop + * work correctly, and improves performance. + * + * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP + */ + +extern void calibrate_delay(void) __init; + +static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) +{ + unsigned long flags; + + if (Cx86_dir0_msb == 3) { + unsigned char ccr3, ccr5; + + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ccr5 = getCx86(CX86_CCR5); + if (ccr5 & 2) + setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + + if (ccr5 & 2) { /* possible wrong calibration done */ + printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + calibrate_delay(); + c->loops_per_jiffy = loops_per_jiffy; + } + } +} + + +static void __cpuinit set_cx86_reorder(void) +{ + u8 ccr3; + + printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ + + /* Load/Store Serialize to mem access disable (=reorder it)  */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + /* set load/store serialize from 1GB to 4GB */ + ccr3 |= 0xe0; + setCx86(CX86_CCR3, ccr3); +} + +static void __cpuinit set_cx86_memwb(void) +{ + u32 cr0; + + printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + + /* CCR2 bit 2: unlock NW bit */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + /* set 'Not Write-through' */ + cr0 = 0x20000000; + write_cr0(read_cr0() | cr0); + /* CCR2 bit 2: lock NW bit and set WT1 */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); +} + +static void __cpuinit set_cx86_inc(void) +{ + unsigned char ccr3; + + printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ + /* PCR1 -- Performance Control */ + /* Incrementor on, whatever that is */ + setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + /* PCR0 -- Performance Control */ + /* Incrementor Margin 10 */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ +} + +/* + * Configure later MediaGX and/or Geode processor. + */ + +static void __cpuinit geode_configure(void) +{ + unsigned long flags; + u8 ccr3; + local_irq_save(flags); + + /* Suspend on halt power saving and enable #SUSP pin */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + + + /* FPU fast, DTE cache, Mem bypass */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + set_cx86_memwb(); + set_cx86_reorder(); + set_cx86_inc(); + + local_irq_restore(flags); +} + + +static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; + char *buf = c->x86_model_id; + const char *p = NULL; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ + if ( test_bit(1*32+24, c->x86_capability) ) { + clear_bit(1*32+24, c->x86_capability); + set_bit(X86_FEATURE_CXMMX, c->x86_capability); + } + + do_cyrix_devid(&dir0, &dir1); + + check_cx686_slop(c); + + Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ + + /* common case step number/rev -- exceptions handled below */ + c->x86_model = (dir1 >> 4) + 1; + c->x86_mask = dir1 & 0xf; + + /* Now cook; the original recipe is by Channing Corn, from Cyrix. + * We do the same thing for each generation: we work out + * the model, multiplier and stepping. Black magic included, + * to make the silicon step/rev numbers match the printed ones. + */ + + switch (dir0_msn) { + unsigned char tmp; + + case 0: /* Cx486SLC/DLC/SRx/DRx */ + p = Cx486_name[dir0_lsn & 7]; + break; + + case 1: /* Cx486S/DX/DX2/DX4 */ + p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] + : Cx486S_name[dir0_lsn & 3]; + break; + + case 2: /* 5x86 */ + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + p = Cx86_cb+2; + break; + + case 3: /* 6x86/6x86L */ + Cx86_cb[1] = ' '; + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + if (dir1 > 0x21) { /* 686L */ + Cx86_cb[0] = 'L'; + p = Cx86_cb; + (c->x86_model)++; + } else /* 686 */ + p = Cx86_cb+1; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + /* 6x86's contain this bug */ + c->coma_bug = 1; + break; + + case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ +#ifdef CONFIG_PCI + { + u32 vendor, device; + /* It isn't really a PCI quirk directly, but the cure is the + same. The MediaGX has deep magic SMM stuff that handles the + SB emulation. It thows away the fifo on disable_dma() which + is wrong and ruins the audio. + + Bug2: VSA1 has a wrap bug so that using maximum sized DMA + causes bad things. According to NatSemi VSA2 has another + bug to do with 'hlt'. I've not seen any boards using VSA2 + and X doesn't seem to support it either so who cares 8). + VSA1 we work around however. + */ + + printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + isa_dma_bridge_buggy = 2; + + /* We do this before the PCI layer is running. However we + are safe here as we know the bridge must be a Cyrix + companion and must be present */ + vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); + device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); + + /* + * The 5510/5520 companion chips have a funky PIT. + */ + if (vendor == PCI_VENDOR_ID_CYRIX && + (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) + mark_tsc_unstable("cyrix 5510/5520 detected"); + } +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + + /* GXm supports extended cpuid levels 'ala' AMD */ + if (c->cpuid_level == 2) { + /* Enable cxMMX extensions (GX1 Datasheet 54) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + + /* + * GXm : 0x30 ... 0x5f GXm datasheet 51 + * GXlv: 0x6x GXlv datasheet 54 + * ? : 0x7x + * GX1 : 0x8x GX1 datasheet 56 + */ + if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) + geode_configure(); + get_model_name(c); /* get CPU marketing name */ + return; + } + else { /* MediaGX */ + Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; + p = Cx86_cb+2; + c->x86_model = (dir1 & 0x20) ? 1 : 2; + } + break; + + case 5: /* 6x86MX/M II */ + if (dir1 > 7) + { + dir0_msn++; /* M II */ + /* Enable MMX extensions (App note 108) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + } + else + { + c->coma_bug = 1; /* 6x86MX, it has the bug. */ + } + tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; + Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; + p = Cx86_cb+tmp; + if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) + (c->x86_model)++; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + break; + + case 0xf: /* Cyrix 486 without DEVID registers */ + switch (dir0_lsn) { + case 0xd: /* either a 486SLC or DLC w/o DEVID */ + dir0_msn = 0; + p = Cx486_name[(c->hard_math) ? 1 : 0]; + break; + + case 0xe: /* a 486S A step */ + dir0_msn = 0; + p = Cx486S_name[0]; + break; + } + break; + + default: /* unknown (shouldn't happen, we know everyone ;-) */ + dir0_msn = 7; + break; + } + strcpy(buf, Cx86_model[dir0_msn & 7]); + if (p) strcat(buf, p); + return; +} + +/* + * Handle National Semiconductor branded processors + */ +static void __cpuinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC acquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* + * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected + * by the fact that they preserve the flags across the division of 5/2. + * PII and PPro exhibit this behavior too, but they have cpuid available. + */ + +/* + * Perform the Cyrix 5/2 test. A Cyrix won't change + * the flags, while other 486 chips will. + */ +static inline int test_cyrix_52div(void) +{ + unsigned int test; + + __asm__ __volatile__( + "sahf\n\t" /* clear flags (%eax = 0x0005) */ + "div %b2\n\t" /* divide 5 by 2 */ + "lahf" /* store flags into %ah */ + : "=a" (test) + : "0" (5), "q" (2) + : "cc"); + + /* AH is 0x02 on Cyrix after the divide.. */ + return (unsigned char) (test >> 8) == 0x02; +} + +static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) +{ + /* Detect Cyrix with disabled CPUID */ + if ( c->x86 == 4 && test_cyrix_52div() ) { + unsigned char dir0, dir1; + + strcpy(c->x86_vendor_id, "CyrixInstead"); + c->x86_vendor = X86_VENDOR_CYRIX; + + /* Actually enable cpuid on the older cyrix */ + + /* Retrieve CPU revisions */ + + do_cyrix_devid(&dir0, &dir1); + + dir0>>=4; + + /* Check it is an affected model */ + + if (dir0 == 5 || dir0 == 3) + { + unsigned char ccr3; + unsigned long flags; + printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + } + } +} + +static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { + .c_vendor = "Cyrix", + .c_ident = { "CyrixInstead" }, + .c_init = init_cyrix, + .c_identify = cyrix_identify, +}; + +int __init cyrix_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; + return 0; +} + +static struct cpu_dev nsc_cpu_dev __cpuinitdata = { + .c_vendor = "NSC", + .c_ident = { "Geode by NSC" }, + .c_init = init_nsc, +}; + +int __init nsc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; + return 0; +} + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c new file mode 100644 index 00000000000..dc4e08147b1 --- /dev/null +++ b/arch/x86/kernel/cpu/intel.c @@ -0,0 +1,333 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cpu.h" + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + +extern int trap_init_f00f_bug(void); + +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_INTEL) + return; + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +} + +/* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + return 0; +} + + +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + unsigned int l2 = 0; + char *p = NULL; + +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled = 0; + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + select_idle_routine(c); + l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); + + c->x86_max_cores = num_cpu_cores(c); + + detect_ht(c); + + /* Work around errata */ + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + + if (c->x86 == 15) { + set_bit(X86_FEATURE_P4, c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); + } + if (c->x86 == 6) + set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } +} + +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_init = init_intel, + .c_size_cache = intel_size_cache, +}; + +__init int intel_cpu_init(void) +{ + cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; + return 0; +} + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +// arch_initcall(intel_cpu_init); + diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c new file mode 100644 index 00000000000..db6c25aa577 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -0,0 +1,806 @@ +/* + * Routines to indentify caches on Intel CPU. + * + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj : Work with CPU hotplug infrastructure. + * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define LVL_1_INST 1 +#define LVL_1_DATA 2 +#define LVL_2 3 +#define LVL_3 4 +#define LVL_TRACE 5 + +struct _cache_table +{ + unsigned char descriptor; + char cache_type; + short size; +}; + +/* all the cache descriptor types we care about (no TLB or trace cache entries) */ +static struct _cache_table cache_table[] __cpuinitdata = +{ + { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ + { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ + { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ + { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ + { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ + { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ + { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ + { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ + { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ + { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ + { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ + { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ + { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ + { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ + { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ + { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ + { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0x00, 0, 0} +}; + + +enum _cache_type +{ + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, + CACHE_TYPE_INST = 2, + CACHE_TYPE_UNIFIED = 3 +}; + +union _cpuid4_leaf_eax { + struct { + enum _cache_type type:5; + unsigned int level:3; + unsigned int is_self_initializing:1; + unsigned int is_fully_associative:1; + unsigned int reserved:4; + unsigned int num_threads_sharing:12; + unsigned int num_cores_on_die:6; + } split; + u32 full; +}; + +union _cpuid4_leaf_ebx { + struct { + unsigned int coherency_line_size:12; + unsigned int physical_line_partition:10; + unsigned int ways_of_associativity:10; + } split; + u32 full; +}; + +union _cpuid4_leaf_ecx { + struct { + unsigned int number_of_sets:32; + } split; + u32 full; +}; + +struct _cpuid4_info { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + cpumask_t shared_cpu_map; +}; + +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +union l3_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned res : 2; + unsigned size_encoded : 14; + }; + unsigned val; +}; + +static const unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, [0xa] = 32, [0xb] = 48, + [0xc] = 64, + [0xf] = 0xffff // ?? +}; + +static const unsigned char levels[] = { 1, 1, 2, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + union l3_cache l3; + union l1_cache *l1 = &l1d; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + + switch (leaf) { + case 1: + l1 = &l1i; + case 0: + if (!l1->val) + return; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + break; + case 2: + if (!l2.val) + return; + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + break; + case 3: + if (!l3.val) + return; + assoc = l3.assoc; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; + break; + default: + return; + } + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + if (leaf == 3) + eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + else + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} + +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) + return -EIO; /* better error ? */ + + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; +} + +static int __cpuinit find_num_cache_leaves(void) +{ + unsigned int eax, ebx, ecx, edx; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ + cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + cache_eax.full = eax; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; +} + +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ + unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_X86_HT + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif + + if (c->cpuid_level > 3) { + static int is_initialized; + + if (is_initialized == 0) { + /* Init num_cache_leaves from boot CPU */ + num_cache_leaves = find_num_cache_leaves(); + is_initialized++; + } + + /* + * Whenever possible use cpuid(4), deterministic cache + * parameters cpuid leaf to find the cache details + */ + for (i = 0; i < num_cache_leaves; i++) { + struct _cpuid4_info this_leaf; + + int retval; + + retval = cpuid4_cache_lookup(i, &this_leaf); + if (retval >= 0) { + switch(this_leaf.eax.split.level) { + case 1: + if (this_leaf.eax.split.type == + CACHE_TYPE_DATA) + new_l1d = this_leaf.size/1024; + else if (this_leaf.eax.split.type == + CACHE_TYPE_INST) + new_l1i = this_leaf.size/1024; + break; + case 2: + new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; + break; + case 3: + new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; + break; + default: + break; + } + } + } + } + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char k = 0; + + /* look up this descriptor in the table */ + while (cache_table[k].descriptor != 0) + { + if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; + switch (cache_table[k].cache_type) { + case LVL_1_INST: + l1i += cache_table[k].size; + break; + case LVL_1_DATA: + l1d += cache_table[k].size; + break; + case LVL_2: + l2 += cache_table[k].size; + break; + case LVL_3: + l3 += cache_table[k].size; + break; + case LVL_TRACE: + trace += cache_table[k].size; + break; + } + + break; + } + + k++; + } + } + } + } + + if (new_l1d) + l1d = new_l1d; + + if (new_l1i) + l1i = new_l1i; + + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l2_id; +#endif + } + + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l3_id; +#endif + } + + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + + return l2; +} + +/* pointer to _cpuid4_info array (for each cache leaf) */ +static struct _cpuid4_info *cpuid4_info[NR_CPUS]; +#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) + +#ifdef CONFIG_SMP +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + unsigned long num_threads_sharing; + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; + + if (num_threads_sharing == 1) + cpu_set(cpu, this_leaf->shared_cpu_map); + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } +} +#else +static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +#endif + +static void free_cache_attributes(unsigned int cpu) +{ + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; +} + +static int __cpuinit detect_cache_attributes(unsigned int cpu) +{ + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + cpuid4_info[cpu] = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (cpuid4_info[cpu] == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + goto out; + + /* Do cpuid and store the results */ + retval = 0; + for (j = 0; j < num_cache_leaves; j++) { + this_leaf = CPUID4_INFO_IDX(cpu, j); + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) + break; + cache_shared_cpu_map_setup(cpu, j); + } + set_cpus_allowed(current, oldmask); + +out: + if (retval) + free_cache_attributes(cpu); + return retval; +} + +#ifdef CONFIG_SYSFS + +#include +#include + +extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ + +/* pointer to kobject for cpuX/cache */ +static struct kobject * cache_kobject[NR_CPUS]; + +struct _index_kobject { + struct kobject kobj; + unsigned int cpu; + unsigned short index; +}; + +/* pointer to array of kobjects for cpuX/cache/indexY */ +static struct _index_kobject *index_kobject[NR_CPUS]; +#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) + +#define show_one_plus(file_name, object, val) \ +static ssize_t show_##file_name \ + (struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ +} + +show_one_plus(level, eax.split.level, 0); +show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); +show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); +show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); +show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); + +static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) +{ + return sprintf (buf, "%luK\n", this_leaf->size / 1024); +} + +static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) +{ + char mask_str[NR_CPUS]; + cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); + return sprintf(buf, "%s\n", mask_str); +} + +static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { + switch(this_leaf->eax.split.type) { + case CACHE_TYPE_DATA: + return sprintf(buf, "Data\n"); + break; + case CACHE_TYPE_INST: + return sprintf(buf, "Instruction\n"); + break; + case CACHE_TYPE_UNIFIED: + return sprintf(buf, "Unified\n"); + break; + default: + return sprintf(buf, "Unknown\n"); + break; + } +} + +struct _cache_attr { + struct attribute attr; + ssize_t (*show)(struct _cpuid4_info *, char *); + ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); +}; + +#define define_one_ro(_name) \ +static struct _cache_attr _name = \ + __ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(physical_line_partition); +define_one_ro(ways_of_associativity); +define_one_ro(number_of_sets); +define_one_ro(size); +define_one_ro(shared_cpu_map); + +static struct attribute * default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &physical_line_partition.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &shared_cpu_map.attr, + NULL +}; + +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? + fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf) : + 0; + return ret; +} + +static ssize_t store(struct kobject * kobj, struct attribute * attr, + const char * buf, size_t count) +{ + return 0; +} + +static struct sysfs_ops sysfs_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type ktype_cache = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs, +}; + +static struct kobj_type ktype_percpu_entry = { + .sysfs_ops = &sysfs_ops, +}; + +static void cpuid4_cache_sysfs_exit(unsigned int cpu) +{ + kfree(cache_kobject[cpu]); + kfree(index_kobject[cpu]); + cache_kobject[cpu] = NULL; + index_kobject[cpu] = NULL; + free_cache_attributes(cpu); +} + +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) +{ + + if (num_cache_leaves == 0) + return -ENOENT; + + detect_cache_attributes(cpu); + if (cpuid4_info[cpu] == NULL) + return -ENOENT; + + /* Allocate all required memory */ + cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (unlikely(cache_kobject[cpu] == NULL)) + goto err_out; + + index_kobject[cpu] = kzalloc( + sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); + if (unlikely(index_kobject[cpu] == NULL)) + goto err_out; + + return 0; + +err_out: + cpuid4_cache_sysfs_exit(cpu); + return -ENOMEM; +} + +/* Add/Remove cache interface for CPU device */ +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i, j; + struct _index_kobject *this_object; + int retval = 0; + + retval = cpuid4_cache_sysfs_init(cpu); + if (unlikely(retval < 0)) + return retval; + + cache_kobject[cpu]->parent = &sys_dev->kobj; + kobject_set_name(cache_kobject[cpu], "%s", "cache"); + cache_kobject[cpu]->ktype = &ktype_percpu_entry; + retval = kobject_register(cache_kobject[cpu]); + + for (i = 0; i < num_cache_leaves; i++) { + this_object = INDEX_KOBJECT_PTR(cpu,i); + this_object->cpu = cpu; + this_object->index = i; + this_object->kobj.parent = cache_kobject[cpu]; + kobject_set_name(&(this_object->kobj), "index%1lu", i); + this_object->kobj.ktype = &ktype_cache; + retval = kobject_register(&(this_object->kobj)); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_unregister( + &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + break; + } + } + return retval; +} + +static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i; + + if (cpuid4_info[cpu] == NULL) + return; + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); + kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, +}; + +static int __cpuinit cache_sysfs_init(void) +{ + int i; + + if (num_cache_leaves == 0) + return 0; + + register_hotcpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; +} + +device_initcall(cache_sysfs_init); + +#endif diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c new file mode 100644 index 00000000000..961fbe1a748 --- /dev/null +++ b/arch/x86/kernel/cpu/nexgen.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +#include "cpu.h" + +/* + * Detect a NexGen CPU running without BIOS hypercode new enough + * to have CPUID. (Thanks to Herbert Oppmann) + */ + +static int __cpuinit deep_magic_nexgen_probe(void) +{ + int ret; + + __asm__ __volatile__ ( + " movw $0x5555, %%ax\n" + " xorw %%dx,%%dx\n" + " movw $2, %%cx\n" + " divw %%cx\n" + " movl $0, %%eax\n" + " jnz 1f\n" + " movl $1, %%eax\n" + "1:\n" + : "=a" (ret) : : "cx", "dx" ); + return ret; +} + +static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) +{ + c->x86_cache_size = 256; /* A few had 1 MB... */ +} + +static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) +{ + /* Detect NexGen with old hypercode */ + if ( deep_magic_nexgen_probe() ) { + strcpy(c->x86_vendor_id, "NexGenDriven"); + } +} + +static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { + .c_vendor = "Nexgen", + .c_ident = { "NexGenDriven" }, + .c_models = { + { .vendor = X86_VENDOR_NEXGEN, + .family = 5, + .model_names = { [1] = "Nx586" } + }, + }, + .c_init = init_nexgen, + .c_identify = nexgen_identify, +}; + +int __init nexgen_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c new file mode 100644 index 00000000000..93fecd4b03d --- /dev/null +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -0,0 +1,713 @@ +/* local apic based NMI watchdog for various CPUs. + This file also handles reservation of performance counters for coordination + with other users (like oprofile). + + Note that these events normally don't tick when the CPU idles. This means + the frequency varies with CPU load. + + Original code for K7/P6 written by Keith Owens */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nmi_watchdog_ctlblk { + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; + +/* Interface defining a CPU specific perfctr watchdog */ +struct wd_ops { + int (*reserve)(void); + void (*unreserve)(void); + int (*setup)(unsigned nmi_hz); + void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (*stop)(void); + unsigned perfctr; + unsigned evntsel; + u64 checkbit; +}; + +static struct wd_ops *wd_ops; + +/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) + */ +#define NMI_MAX_COUNTER_BITS 66 + +/* perfctr_nmi_owner tracks the ownership of the perfctr registers: + * evtsel_nmi_owner tracks the ownership of the event selection + * - different performance counters/ event selection may be reserved for + * different subsystems this reservation system just tries to coordinate + * things a little + */ +static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); +static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); + +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; +} + +/* converts an msr to an appropriate reservation bit */ +/* returns the bit offset of the event selection register */ +static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; + +} + +/* checks for a bit availability (hack for oprofile) */ +int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) +{ + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +/* checks the an msr for availability */ +int avail_to_resrv_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +int reserve_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, perfctr_nmi_owner)) + return 1; + return 0; +} + +void release_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, perfctr_nmi_owner); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, evntsel_nmi_owner)) + return 1; + return 0; +} + +void release_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, evntsel_nmi_owner); +} + +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); +EXPORT_SYMBOL(reserve_perfctr_nmi); +EXPORT_SYMBOL(release_perfctr_nmi); +EXPORT_SYMBOL(reserve_evntsel_nmi); +EXPORT_SYMBOL(release_evntsel_nmi); + +void disable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) + return; + + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + wd_ops->unreserve(); + + BUG_ON(atomic_read(&nmi_active) != 0); +} + +void enable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (!wd_ops) + return; + if (!wd_ops->reserve()) { + printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); + return; + } + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + touch_nmi_watchdog(); +} + +/* + * Activate the NMI watchdog via the local APIC. + */ + +static unsigned int adjust_for_32bit_ctr(unsigned int hz) +{ + u64 counter_val; + unsigned int retval = hz; + + /* + * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + counter_val = (u64)cpu_khz * 1000; + do_div(counter_val, retval); + if (counter_val > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + retval = count + 1; + } + return retval; +} + +static void +write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(perfctr_msr, 0 - count); +} + +static void write_watchdog_counter32(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsr(perfctr_msr, (u32)(-count), 0); +} + +/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface + nicely stable so there is not much variety */ + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void single_msr_stop_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int single_msr_reserve(void) +{ + if (!reserve_perfctr_nmi(wd_ops->perfctr)) + return 0; + + if (!reserve_evntsel_nmi(wd_ops->evntsel)) { + release_perfctr_nmi(wd_ops->perfctr); + return 0; + } + return 1; +} + +static void single_msr_unreserve(void) +{ + release_evntsel_nmi(wd_ops->evntsel); + release_perfctr_nmi(wd_ops->perfctr); +} + +static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops k7_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL<<47, +}; + +/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ + +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* P6/ARCH_PERFMON has 32 bit counter write */ + write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); +} + +static struct wd_ops p6_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL<<39, +}; + +/* Intel P4 performance counters. By far the most complicated of all. */ + +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +#define P4_CCCR_OVF (1<<31) + +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ + +static int setup_p4_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; + unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + +#ifdef CONFIG_SMP + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else +#endif + ht_num = 0; + + /* performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + } + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + return 1; +} + +static void stop_p4_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int p4_reserve(void) +{ + if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) + return 0; +#ifdef CONFIG_SMP + if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) + goto fail1; +#endif + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail2; + /* RED-PEN why is ESCR1 not reserved here? */ + return 1; + fail2: +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); + fail1: +#endif + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); + return 0; +} + +static void p4_unreserve(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); +#endif + release_evntsel_nmi(MSR_P4_CRU_ESCR0); + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); +} + +static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + unsigned dummy; + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops p4_wd_ops = { + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, + /* RED-PEN this is wrong for the other sibling */ + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL<<39, +}; + +/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully + all future Intel CPUs. */ + +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static int setup_intel_arch_watchdog(unsigned nmi_hz) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + return 1; +} + +static struct wd_ops intel_arch_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, +}; + +static struct wd_ops coreduo_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, +}; + +static void probe_nmi_watchdog(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && + boot_cpu_data.x86 != 16) + return; + wd_ops = &k7_wd_ops; + break; + case X86_VENDOR_INTEL: + /* Work around Core Duo (Yonah) errata AE49 where perfctr1 + doesn't have a working enable bit. */ + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { + wd_ops = &coreduo_wd_ops; + break; + } + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + wd_ops = &intel_arch_wd_ops; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + return; + + wd_ops = &p6_wd_ops; + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + return; + + wd_ops = &p4_wd_ops; + break; + default: + return; + } + break; + } +} + +/* Interface to nmi.c */ + +int lapic_watchdog_init(unsigned nmi_hz) +{ + if (!wd_ops) { + probe_nmi_watchdog(); + if (!wd_ops) + return -1; + + if (!wd_ops->reserve()) { + printk(KERN_ERR + "NMI watchdog: cannot reserve perfctrs\n"); + return -1; + } + } + + if (!(wd_ops->setup(nmi_hz))) { + printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", + raw_smp_processor_id()); + return -1; + } + + return 0; +} + +void lapic_watchdog_stop(void) +{ + if (wd_ops) + wd_ops->stop(); +} + +unsigned lapic_adjust_nmi_hz(unsigned hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) + hz = adjust_for_32bit_ctr(hz); + return hz; +} + +int lapic_wd_event(unsigned nmi_hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 ctr; + rdmsrl(wd->perfctr_msr, ctr); + if (ctr & wd_ops->checkbit) { /* perfctr still running? */ + return 0; + } + wd_ops->rearm(wd, nmi_hz); + return 1; +} + +int lapic_watchdog_ok(void) +{ + return wd_ops != NULL; +} diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c new file mode 100644 index 00000000000..1e31b6caffb --- /dev/null +++ b/arch/x86/kernel/cpu/proc.c @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#include + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ + static const char * const x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + static const char * const x86_power_flags[] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* constant_tsc - moved to flags */ + /* nothing */ + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + n, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", + freq / 1000, (freq % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); +#ifdef CONFIG_X86_HT + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + for (i = 0; i < 32; i++) + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c new file mode 100644 index 00000000000..200fb3f9ebf --- /dev/null +++ b/arch/x86/kernel/cpu/transmeta.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include "cpu.h" + +static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) +{ + unsigned int cap_mask, uk, max, dummy; + unsigned int cms_rev1, cms_rev2; + unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; + char cpu_info[65]; + + get_model_name(c); /* Same as AMD/Cyrix */ + display_cacheinfo(c); + + /* Print CMS and CPU revision */ + max = cpuid_eax(0x80860000); + cpu_rev = 0; + if ( max >= 0x80860001 ) { + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + if (cpu_rev != 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + (cpu_rev >> 24) & 0xff, + (cpu_rev >> 16) & 0xff, + (cpu_rev >> 8) & 0xff, + cpu_rev & 0xff, + cpu_freq); + } + } + if ( max >= 0x80860002 ) { + cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); + if (cpu_rev == 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + new_cpu_rev, cpu_freq); + } + printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + (cms_rev1 >> 24) & 0xff, + (cms_rev1 >> 16) & 0xff, + (cms_rev1 >> 8) & 0xff, + cms_rev1 & 0xff, + cms_rev2); + } + if ( max >= 0x80860006 ) { + cpuid(0x80860003, + (void *)&cpu_info[0], + (void *)&cpu_info[4], + (void *)&cpu_info[8], + (void *)&cpu_info[12]); + cpuid(0x80860004, + (void *)&cpu_info[16], + (void *)&cpu_info[20], + (void *)&cpu_info[24], + (void *)&cpu_info[28]); + cpuid(0x80860005, + (void *)&cpu_info[32], + (void *)&cpu_info[36], + (void *)&cpu_info[40], + (void *)&cpu_info[44]); + cpuid(0x80860006, + (void *)&cpu_info[48], + (void *)&cpu_info[52], + (void *)&cpu_info[56], + (void *)&cpu_info[60]); + cpu_info[64] = '\0'; + printk(KERN_INFO "CPU: %s\n", cpu_info); + } + + /* Unhide possibly hidden capability flags */ + rdmsr(0x80860004, cap_mask, uk); + wrmsr(0x80860004, ~0, uk); + c->x86_capability[0] = cpuid_edx(0x00000001); + wrmsr(0x80860004, cap_mask, uk); + + /* All Transmeta CPUs have a constant TSC */ + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + /* If we can run i686 user-space code, call us an i686 */ +#define USER686 ((1 << X86_FEATURE_TSC)|\ + (1 << X86_FEATURE_CX8)|\ + (1 << X86_FEATURE_CMOV)) + if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) + c->x86 = 6; + +#ifdef CONFIG_SYSCTL + /* randomize_va_space slows us down enormously; + it probably triggers retranslation of x86->native bytecode */ + randomize_va_space = 0; +#endif +} + +static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + +static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { + .c_vendor = "Transmeta", + .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_init = init_transmeta, + .c_identify = transmeta_identify, +}; + +int __init transmeta_init_cpu(void) +{ + cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c new file mode 100644 index 00000000000..a7a4e75bdcd --- /dev/null +++ b/arch/x86/kernel/cpu/umc.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include "cpu.h" + +/* UMC chips appear to be only either 386 or 486, so no special init takes place. + */ + +static struct cpu_dev umc_cpu_dev __cpuinitdata = { + .c_vendor = "UMC", + .c_ident = { "UMC UMC UMC" }, + .c_models = { + { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = + { + [1] = "U5D", + [2] = "U5S", + } + }, + }, +}; + +int __init umc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; + return 0; +} diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index e7480509103..690aebf37c3 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -53,11 +53,11 @@ bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o topology-y += ../../i386/kernel/topology.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o -intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o -addon_cpuid_features-y += ../../i386/kernel/cpu/addon_cpuid_features.o +intel_cacheinfo-y += ../../x86/kernel/cpu/intel_cacheinfo.o +addon_cpuid_features-y += ../../x86/kernel/cpu/addon_cpuid_features.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o alternative-y += ../../i386/kernel/alternative.o pcspeaker-y += ../../i386/kernel/pcspeaker.o -perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o +perfctr-watchdog-y += ../../x86/kernel/cpu/perfctr-watchdog.o -- cgit v1.2.3-70-g09d2 From 9a163ed8e0552fdcffe405d2ea7134819a81456e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:17:01 +0200 Subject: i386: move kernel Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 11 +- arch/i386/kernel/.gitignore | 1 - arch/i386/kernel/Makefile | 5 - arch/i386/kernel/Makefile_32 | 88 - arch/i386/kernel/alternative.c | 450 ----- arch/i386/kernel/apic_32.c | 1566 ---------------- arch/i386/kernel/apm_32.c | 2403 ------------------------- arch/i386/kernel/asm-offsets.c | 5 - arch/i386/kernel/asm-offsets_32.c | 147 -- arch/i386/kernel/bootflag.c | 98 - arch/i386/kernel/cpuid.c | 242 --- arch/i386/kernel/crash_32.c | 137 -- arch/i386/kernel/crash_dump_32.c | 74 - arch/i386/kernel/doublefault_32.c | 70 - arch/i386/kernel/e820_32.c | 944 ---------- arch/i386/kernel/early_printk.c | 2 - arch/i386/kernel/efi_32.c | 712 -------- arch/i386/kernel/efi_stub_32.S | 122 -- arch/i386/kernel/entry_32.S | 1112 ------------ arch/i386/kernel/geode_32.c | 155 -- arch/i386/kernel/head_32.S | 578 ------ arch/i386/kernel/hpet_32.c | 553 ------ arch/i386/kernel/i386_ksyms_32.c | 30 - arch/i386/kernel/i387_32.c | 546 ------ arch/i386/kernel/i8237.c | 72 - arch/i386/kernel/i8253_32.c | 206 --- arch/i386/kernel/i8259_32.c | 420 ----- arch/i386/kernel/init_task_32.c | 46 - arch/i386/kernel/io_apic_32.c | 2847 ------------------------------ arch/i386/kernel/ioport_32.c | 153 -- arch/i386/kernel/irq_32.c | 343 ---- arch/i386/kernel/kprobes_32.c | 751 -------- arch/i386/kernel/ldt_32.c | 250 --- arch/i386/kernel/machine_kexec_32.c | 171 -- arch/i386/kernel/mca_32.c | 470 ----- arch/i386/kernel/microcode.c | 850 --------- arch/i386/kernel/module_32.c | 152 -- arch/i386/kernel/mpparse_32.c | 1132 ------------ arch/i386/kernel/msr.c | 224 --- arch/i386/kernel/nmi_32.c | 468 ----- arch/i386/kernel/numaq_32.c | 89 - arch/i386/kernel/paravirt_32.c | 392 ---- arch/i386/kernel/pci-dma_32.c | 177 -- arch/i386/kernel/pcspeaker.c | 20 - arch/i386/kernel/process_32.c | 951 ---------- arch/i386/kernel/ptrace_32.c | 723 -------- arch/i386/kernel/quirks.c | 49 - arch/i386/kernel/reboot_32.c | 413 ----- arch/i386/kernel/reboot_fixups_32.c | 68 - arch/i386/kernel/relocate_kernel_32.S | 252 --- arch/i386/kernel/scx200_32.c | 131 -- arch/i386/kernel/setup_32.c | 653 ------- arch/i386/kernel/sigframe_32.h | 21 - arch/i386/kernel/signal_32.c | 667 ------- arch/i386/kernel/smp_32.c | 707 -------- arch/i386/kernel/smpboot_32.c | 1322 -------------- arch/i386/kernel/smpcommon_32.c | 81 - arch/i386/kernel/srat_32.c | 360 ---- arch/i386/kernel/summit_32.c | 180 -- arch/i386/kernel/sys_i386_32.c | 265 --- arch/i386/kernel/syscall_table_32.S | 326 ---- arch/i386/kernel/sysenter_32.c | 348 ---- arch/i386/kernel/time_32.c | 236 --- arch/i386/kernel/topology.c | 77 - arch/i386/kernel/trampoline_32.S | 85 - arch/i386/kernel/traps_32.c | 1250 ------------- arch/i386/kernel/tsc_32.c | 413 ----- arch/i386/kernel/tsc_sync.c | 1 - arch/i386/kernel/vm86_32.c | 843 --------- arch/i386/kernel/vmi_32.c | 981 ---------- arch/i386/kernel/vmiclock_32.c | 320 ---- arch/i386/kernel/vmlinux.lds.S | 5 - arch/i386/kernel/vmlinux_32.lds.S | 213 --- arch/i386/kernel/vsyscall-int80_32.S | 53 - arch/i386/kernel/vsyscall-note_32.S | 45 - arch/i386/kernel/vsyscall-sigreturn_32.S | 143 -- arch/i386/kernel/vsyscall-sysenter_32.S | 122 -- arch/i386/kernel/vsyscall_32.S | 15 - arch/i386/kernel/vsyscall_32.lds.S | 67 - arch/um/sys-i386/sys_call_table.S | 2 +- arch/x86/kernel/.gitignore | 1 + arch/x86/kernel/Makefile | 5 + arch/x86/kernel/Makefile_32 | 88 + arch/x86/kernel/alternative.c | 450 +++++ arch/x86/kernel/apic_32.c | 1566 ++++++++++++++++ arch/x86/kernel/apm_32.c | 2403 +++++++++++++++++++++++++ arch/x86/kernel/asm-offsets.c | 5 + arch/x86/kernel/asm-offsets_32.c | 147 ++ arch/x86/kernel/bootflag.c | 98 + arch/x86/kernel/cpuid.c | 242 +++ arch/x86/kernel/crash_32.c | 137 ++ arch/x86/kernel/crash_dump_32.c | 74 + arch/x86/kernel/doublefault_32.c | 70 + arch/x86/kernel/e820_32.c | 944 ++++++++++ arch/x86/kernel/early_printk.c | 2 + arch/x86/kernel/efi_32.c | 712 ++++++++ arch/x86/kernel/efi_stub_32.S | 122 ++ arch/x86/kernel/entry_32.S | 1112 ++++++++++++ arch/x86/kernel/geode_32.c | 155 ++ arch/x86/kernel/head_32.S | 578 ++++++ arch/x86/kernel/hpet_32.c | 553 ++++++ arch/x86/kernel/i386_ksyms_32.c | 30 + arch/x86/kernel/i387_32.c | 546 ++++++ arch/x86/kernel/i8237.c | 72 + arch/x86/kernel/i8253_32.c | 206 +++ arch/x86/kernel/i8259_32.c | 420 +++++ arch/x86/kernel/init_task_32.c | 46 + arch/x86/kernel/io_apic_32.c | 2847 ++++++++++++++++++++++++++++++ arch/x86/kernel/ioport_32.c | 153 ++ arch/x86/kernel/irq_32.c | 343 ++++ arch/x86/kernel/kprobes_32.c | 751 ++++++++ arch/x86/kernel/ldt_32.c | 250 +++ arch/x86/kernel/machine_kexec_32.c | 171 ++ arch/x86/kernel/mca_32.c | 470 +++++ arch/x86/kernel/microcode.c | 850 +++++++++ arch/x86/kernel/module_32.c | 152 ++ arch/x86/kernel/mpparse_32.c | 1132 ++++++++++++ arch/x86/kernel/msr.c | 224 +++ arch/x86/kernel/nmi_32.c | 468 +++++ arch/x86/kernel/numaq_32.c | 89 + arch/x86/kernel/paravirt_32.c | 392 ++++ arch/x86/kernel/pci-dma_32.c | 177 ++ arch/x86/kernel/pcspeaker.c | 20 + arch/x86/kernel/process_32.c | 951 ++++++++++ arch/x86/kernel/ptrace_32.c | 723 ++++++++ arch/x86/kernel/quirks.c | 49 + arch/x86/kernel/reboot_32.c | 413 +++++ arch/x86/kernel/reboot_fixups_32.c | 68 + arch/x86/kernel/relocate_kernel_32.S | 252 +++ arch/x86/kernel/scx200_32.c | 131 ++ arch/x86/kernel/setup_32.c | 653 +++++++ arch/x86/kernel/sigframe_32.h | 21 + arch/x86/kernel/signal_32.c | 667 +++++++ arch/x86/kernel/smp_32.c | 707 ++++++++ arch/x86/kernel/smpboot_32.c | 1322 ++++++++++++++ arch/x86/kernel/smpcommon_32.c | 81 + arch/x86/kernel/srat_32.c | 360 ++++ arch/x86/kernel/summit_32.c | 180 ++ arch/x86/kernel/sys_i386_32.c | 265 +++ arch/x86/kernel/syscall_table_32.S | 326 ++++ arch/x86/kernel/sysenter_32.c | 348 ++++ arch/x86/kernel/time_32.c | 236 +++ arch/x86/kernel/topology.c | 77 + arch/x86/kernel/trampoline_32.S | 85 + arch/x86/kernel/traps_32.c | 1250 +++++++++++++ arch/x86/kernel/tsc_32.c | 413 +++++ arch/x86/kernel/tsc_sync.c | 1 + arch/x86/kernel/vm86_32.c | 843 +++++++++ arch/x86/kernel/vmi_32.c | 981 ++++++++++ arch/x86/kernel/vmiclock_32.c | 320 ++++ arch/x86/kernel/vmlinux.lds.S | 5 + arch/x86/kernel/vmlinux_32.lds.S | 213 +++ arch/x86/kernel/vsyscall-int80_32.S | 53 + arch/x86/kernel/vsyscall-note_32.S | 45 + arch/x86/kernel/vsyscall-sigreturn_32.S | 143 ++ arch/x86/kernel/vsyscall-sysenter_32.S | 122 ++ arch/x86/kernel/vsyscall_32.S | 15 + arch/x86/kernel/vsyscall_32.lds.S | 67 + arch/x86/mach-generic/Makefile | 2 +- arch/x86/mach-voyager/Makefile | 2 +- arch/x86_64/ia32/vsyscall-sigreturn.S | 2 +- arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/Makefile_64 | 18 +- 163 files changed, 31682 insertions(+), 31675 deletions(-) delete mode 100644 arch/i386/kernel/.gitignore delete mode 100644 arch/i386/kernel/Makefile delete mode 100644 arch/i386/kernel/Makefile_32 delete mode 100644 arch/i386/kernel/alternative.c delete mode 100644 arch/i386/kernel/apic_32.c delete mode 100644 arch/i386/kernel/apm_32.c delete mode 100644 arch/i386/kernel/asm-offsets.c delete mode 100644 arch/i386/kernel/asm-offsets_32.c delete mode 100644 arch/i386/kernel/bootflag.c delete mode 100644 arch/i386/kernel/cpuid.c delete mode 100644 arch/i386/kernel/crash_32.c delete mode 100644 arch/i386/kernel/crash_dump_32.c delete mode 100644 arch/i386/kernel/doublefault_32.c delete mode 100644 arch/i386/kernel/e820_32.c delete mode 100644 arch/i386/kernel/early_printk.c delete mode 100644 arch/i386/kernel/efi_32.c delete mode 100644 arch/i386/kernel/efi_stub_32.S delete mode 100644 arch/i386/kernel/entry_32.S delete mode 100644 arch/i386/kernel/geode_32.c delete mode 100644 arch/i386/kernel/head_32.S delete mode 100644 arch/i386/kernel/hpet_32.c delete mode 100644 arch/i386/kernel/i386_ksyms_32.c delete mode 100644 arch/i386/kernel/i387_32.c delete mode 100644 arch/i386/kernel/i8237.c delete mode 100644 arch/i386/kernel/i8253_32.c delete mode 100644 arch/i386/kernel/i8259_32.c delete mode 100644 arch/i386/kernel/init_task_32.c delete mode 100644 arch/i386/kernel/io_apic_32.c delete mode 100644 arch/i386/kernel/ioport_32.c delete mode 100644 arch/i386/kernel/irq_32.c delete mode 100644 arch/i386/kernel/kprobes_32.c delete mode 100644 arch/i386/kernel/ldt_32.c delete mode 100644 arch/i386/kernel/machine_kexec_32.c delete mode 100644 arch/i386/kernel/mca_32.c delete mode 100644 arch/i386/kernel/microcode.c delete mode 100644 arch/i386/kernel/module_32.c delete mode 100644 arch/i386/kernel/mpparse_32.c delete mode 100644 arch/i386/kernel/msr.c delete mode 100644 arch/i386/kernel/nmi_32.c delete mode 100644 arch/i386/kernel/numaq_32.c delete mode 100644 arch/i386/kernel/paravirt_32.c delete mode 100644 arch/i386/kernel/pci-dma_32.c delete mode 100644 arch/i386/kernel/pcspeaker.c delete mode 100644 arch/i386/kernel/process_32.c delete mode 100644 arch/i386/kernel/ptrace_32.c delete mode 100644 arch/i386/kernel/quirks.c delete mode 100644 arch/i386/kernel/reboot_32.c delete mode 100644 arch/i386/kernel/reboot_fixups_32.c delete mode 100644 arch/i386/kernel/relocate_kernel_32.S delete mode 100644 arch/i386/kernel/scx200_32.c delete mode 100644 arch/i386/kernel/setup_32.c delete mode 100644 arch/i386/kernel/sigframe_32.h delete mode 100644 arch/i386/kernel/signal_32.c delete mode 100644 arch/i386/kernel/smp_32.c delete mode 100644 arch/i386/kernel/smpboot_32.c delete mode 100644 arch/i386/kernel/smpcommon_32.c delete mode 100644 arch/i386/kernel/srat_32.c delete mode 100644 arch/i386/kernel/summit_32.c delete mode 100644 arch/i386/kernel/sys_i386_32.c delete mode 100644 arch/i386/kernel/syscall_table_32.S delete mode 100644 arch/i386/kernel/sysenter_32.c delete mode 100644 arch/i386/kernel/time_32.c delete mode 100644 arch/i386/kernel/topology.c delete mode 100644 arch/i386/kernel/trampoline_32.S delete mode 100644 arch/i386/kernel/traps_32.c delete mode 100644 arch/i386/kernel/tsc_32.c delete mode 100644 arch/i386/kernel/tsc_sync.c delete mode 100644 arch/i386/kernel/vm86_32.c delete mode 100644 arch/i386/kernel/vmi_32.c delete mode 100644 arch/i386/kernel/vmiclock_32.c delete mode 100644 arch/i386/kernel/vmlinux.lds.S delete mode 100644 arch/i386/kernel/vmlinux_32.lds.S delete mode 100644 arch/i386/kernel/vsyscall-int80_32.S delete mode 100644 arch/i386/kernel/vsyscall-note_32.S delete mode 100644 arch/i386/kernel/vsyscall-sigreturn_32.S delete mode 100644 arch/i386/kernel/vsyscall-sysenter_32.S delete mode 100644 arch/i386/kernel/vsyscall_32.S delete mode 100644 arch/i386/kernel/vsyscall_32.lds.S create mode 100644 arch/x86/kernel/.gitignore create mode 100644 arch/x86/kernel/Makefile create mode 100644 arch/x86/kernel/Makefile_32 create mode 100644 arch/x86/kernel/alternative.c create mode 100644 arch/x86/kernel/apic_32.c create mode 100644 arch/x86/kernel/apm_32.c create mode 100644 arch/x86/kernel/asm-offsets.c create mode 100644 arch/x86/kernel/asm-offsets_32.c create mode 100644 arch/x86/kernel/bootflag.c create mode 100644 arch/x86/kernel/cpuid.c create mode 100644 arch/x86/kernel/crash_32.c create mode 100644 arch/x86/kernel/crash_dump_32.c create mode 100644 arch/x86/kernel/doublefault_32.c create mode 100644 arch/x86/kernel/e820_32.c create mode 100644 arch/x86/kernel/early_printk.c create mode 100644 arch/x86/kernel/efi_32.c create mode 100644 arch/x86/kernel/efi_stub_32.S create mode 100644 arch/x86/kernel/entry_32.S create mode 100644 arch/x86/kernel/geode_32.c create mode 100644 arch/x86/kernel/head_32.S create mode 100644 arch/x86/kernel/hpet_32.c create mode 100644 arch/x86/kernel/i386_ksyms_32.c create mode 100644 arch/x86/kernel/i387_32.c create mode 100644 arch/x86/kernel/i8237.c create mode 100644 arch/x86/kernel/i8253_32.c create mode 100644 arch/x86/kernel/i8259_32.c create mode 100644 arch/x86/kernel/init_task_32.c create mode 100644 arch/x86/kernel/io_apic_32.c create mode 100644 arch/x86/kernel/ioport_32.c create mode 100644 arch/x86/kernel/irq_32.c create mode 100644 arch/x86/kernel/kprobes_32.c create mode 100644 arch/x86/kernel/ldt_32.c create mode 100644 arch/x86/kernel/machine_kexec_32.c create mode 100644 arch/x86/kernel/mca_32.c create mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/module_32.c create mode 100644 arch/x86/kernel/mpparse_32.c create mode 100644 arch/x86/kernel/msr.c create mode 100644 arch/x86/kernel/nmi_32.c create mode 100644 arch/x86/kernel/numaq_32.c create mode 100644 arch/x86/kernel/paravirt_32.c create mode 100644 arch/x86/kernel/pci-dma_32.c create mode 100644 arch/x86/kernel/pcspeaker.c create mode 100644 arch/x86/kernel/process_32.c create mode 100644 arch/x86/kernel/ptrace_32.c create mode 100644 arch/x86/kernel/quirks.c create mode 100644 arch/x86/kernel/reboot_32.c create mode 100644 arch/x86/kernel/reboot_fixups_32.c create mode 100644 arch/x86/kernel/relocate_kernel_32.S create mode 100644 arch/x86/kernel/scx200_32.c create mode 100644 arch/x86/kernel/setup_32.c create mode 100644 arch/x86/kernel/sigframe_32.h create mode 100644 arch/x86/kernel/signal_32.c create mode 100644 arch/x86/kernel/smp_32.c create mode 100644 arch/x86/kernel/smpboot_32.c create mode 100644 arch/x86/kernel/smpcommon_32.c create mode 100644 arch/x86/kernel/srat_32.c create mode 100644 arch/x86/kernel/summit_32.c create mode 100644 arch/x86/kernel/sys_i386_32.c create mode 100644 arch/x86/kernel/syscall_table_32.S create mode 100644 arch/x86/kernel/sysenter_32.c create mode 100644 arch/x86/kernel/time_32.c create mode 100644 arch/x86/kernel/topology.c create mode 100644 arch/x86/kernel/trampoline_32.S create mode 100644 arch/x86/kernel/traps_32.c create mode 100644 arch/x86/kernel/tsc_32.c create mode 100644 arch/x86/kernel/tsc_sync.c create mode 100644 arch/x86/kernel/vm86_32.c create mode 100644 arch/x86/kernel/vmi_32.c create mode 100644 arch/x86/kernel/vmiclock_32.c create mode 100644 arch/x86/kernel/vmlinux.lds.S create mode 100644 arch/x86/kernel/vmlinux_32.lds.S create mode 100644 arch/x86/kernel/vsyscall-int80_32.S create mode 100644 arch/x86/kernel/vsyscall-note_32.S create mode 100644 arch/x86/kernel/vsyscall-sigreturn_32.S create mode 100644 arch/x86/kernel/vsyscall-sysenter_32.S create mode 100644 arch/x86/kernel/vsyscall_32.S create mode 100644 arch/x86/kernel/vsyscall_32.lds.S (limited to 'arch/x86/kernel') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 397cfedb4b1..9c1da722964 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -17,6 +17,13 @@ # 20050320 Kianusch Sayah Karadji # Added support for GEODE CPU +# Fill in SRCARCH +SRCARCH := x86 + +archprepare: + @mkdir -p ${objtree}/arch/x86/kernel + + HAS_BIARCH := $(call cc-option-yn, -m32) ifeq ($(HAS_BIARCH),y) AS := $(AS) --32 @@ -99,10 +106,10 @@ core-$(CONFIG_XEN) += arch/x86/xen/ # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default -head-y := arch/i386/kernel/head_32.o arch/i386/kernel/init_task_32.o +head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o libs-y += arch/x86/lib/ -core-y += arch/i386/kernel/ \ +core-y += arch/x86/kernel/ \ arch/x86/mm/ \ $(mcore-y)/ \ arch/x86/crypto/ diff --git a/arch/i386/kernel/.gitignore b/arch/i386/kernel/.gitignore deleted file mode 100644 index 40836ad9079..00000000000 --- a/arch/i386/kernel/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vsyscall.lds diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile deleted file mode 100644 index d3ebd169982..00000000000 --- a/arch/i386/kernel/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/kernel/Makefile_32 -else -include ${srctree}/arch/x86_64/kernel/Makefile_64 -endif diff --git a/arch/i386/kernel/Makefile_32 b/arch/i386/kernel/Makefile_32 deleted file mode 100644 index 5096f486d38..00000000000 --- a/arch/i386/kernel/Makefile_32 +++ /dev/null @@ -1,88 +0,0 @@ -# -# Makefile for the linux kernel. -# - -extra-y := head_32.o init_task_32.o vmlinux.lds - -obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ - ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ - pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253_32.o tsc_32.o - -obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-y += ../../x86/kernel/cpu/ -obj-y += ../../x86/kernel/acpi/ -obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o -obj-$(CONFIG_MCA) += mca_32.o -obj-$(CONFIG_X86_MSR) += msr.o -obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_APM) += apm_32.o -obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o -obj-$(CONFIG_SMP) += smpcommon_32.o -obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o -obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o -obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o -obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o -obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash_32.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o -obj-$(CONFIG_KPROBES) += kprobes_32.o -obj-$(CONFIG_MODULES) += module_32.o -obj-y += sysenter_32.o vsyscall_32.o -obj-$(CONFIG_ACPI_SRAT) += srat_32.o -obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o -obj-$(CONFIG_VM86) += vm86_32.o -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_HPET_TIMER) += hpet_32.o -obj-$(CONFIG_K8_NB) += k8.o -obj-$(CONFIG_MGEODE_LX) += geode_32.o - -obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o -obj-$(CONFIG_PARAVIRT) += paravirt_32.o -obj-y += pcspeaker.o - -obj-$(CONFIG_SCx200) += scx200_32.o - -# vsyscall_32.o contains the vsyscall DSO images as __initdata. -# We must build both images before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) -targets += vsyscall-note_32.o vsyscall_32.lds - -# The DSO images are built using a special linker script. -quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ - -Wl,-T,$(filter-out FORCE,$^) -o $@ - -export CPPFLAGS_vsyscall_32.lds += -P -C -U$(ARCH) - -vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -SYSCFLAGS_vsyscall-sysenter_32.so = $(vsyscall-flags) -SYSCFLAGS_vsyscall-int80_32.so = $(vsyscall-flags) - -$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \ -$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - -# We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. -extra-y += vsyscall-syms.o -$(obj)/built-in.o: $(obj)/vsyscall-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o - -SYSCFLAGS_vsyscall-syms.o = -r -$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - -k8-y += ../../x86_64/kernel/k8.o -stacktrace-y += ../../x86_64/kernel/stacktrace.o - diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c deleted file mode 100644 index bd72d94e713..00000000000 --- a/arch/i386/kernel/alternative.c +++ /dev/null @@ -1,450 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_PATCH_LEN (255-1) - -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - -static int debug_alternative; - -static int __init debug_alt(char *str) -{ - debug_alternative = 1; - return 1; -} -__setup("debug-alternative", debug_alt); - -static int noreplace_smp; - -static int __init setup_noreplace_smp(char *str) -{ - noreplace_smp = 1; - return 1; -} -__setup("noreplace-smp", setup_noreplace_smp); - -#ifdef CONFIG_PARAVIRT -static int noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) - -#ifdef GENERIC_NOP1 -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -extern unsigned char intelnops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef K8_NOP1 -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef K7_NOP1 -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); -extern unsigned char k7nops[]; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef CONFIG_X86_64 - -extern char __vsyscall_0; -static inline unsigned char** find_nop_table(void) -{ - return k8_nops; -} - -#else /* CONFIG_X86_64 */ - -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -static unsigned char** find_nop_table(void) -{ - unsigned char **noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; -} - -#endif /* CONFIG_X86_64 */ - -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void add_nops(void *insns, unsigned int len) -{ - unsigned char **noptable = find_nop_table(); - - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); - insns += noplen; - len -= noplen; - } -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern u8 *__smp_locks[], *__smp_locks_end[]; - -/* Replace instructions with better alternatives for this CPU type. - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ - -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) -{ - struct alt_instr *a; - char insnbuf[MAX_PATCH_LEN]; - - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); - for (a = start; a < end; a++) { - u8 *instr = a->instr; - BUG_ON(a->replacementlen > a->instrlen); - BUG_ON(a->instrlen > sizeof(insnbuf)); - if (!boot_cpu_has(a->cpuid)) - continue; -#ifdef CONFIG_X86_64 - /* vsyscall code is not mapped yet. resolve it manually. */ - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); - DPRINTK("%s: vsyscall fixup: %p => %p\n", - __FUNCTION__, a->instr, instr); - } -#endif - memcpy(insnbuf, a->replacement, a->replacementlen); - add_nops(insnbuf + a->replacementlen, - a->instrlen - a->replacementlen); - text_poke(instr, insnbuf, a->instrlen); - } -} - -#ifdef CONFIG_SMP - -static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) -{ - u8 **ptr; - - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) - continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ - }; -} - -static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) -{ - u8 **ptr; - char insn[1]; - - if (noreplace_smp) - return; - - add_nops(insn, 1); - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) - continue; - text_poke(*ptr, insn, 1); - }; -} - -struct smp_alt_module { - /* what is this ??? */ - struct module *mod; - char *name; - - /* ptrs to lock prefixes */ - u8 **locks; - u8 **locks_end; - - /* .text segment, needed to avoid patching init code ;) */ - u8 *text; - u8 *text_end; - - struct list_head next; -}; -static LIST_HEAD(smp_alt_modules); -static DEFINE_SPINLOCK(smp_alt); - -void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) -{ - struct smp_alt_module *smp; - unsigned long flags; - - if (noreplace_smp) - return; - - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } - - smp = kzalloc(sizeof(*smp), GFP_KERNEL); - if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ - - smp->mod = mod; - smp->name = name; - smp->locks = locks; - smp->locks_end = locks_end; - smp->text = text; - smp->text_end = text_end; - DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __FUNCTION__, smp->locks, smp->locks_end, - smp->text, smp->text_end, smp->name); - - spin_lock_irqsave(&smp_alt, flags); - list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); - spin_unlock_irqrestore(&smp_alt, flags); -} - -void alternatives_smp_module_del(struct module *mod) -{ - struct smp_alt_module *item; - unsigned long flags; - - if (smp_alt_once || noreplace_smp) - return; - - spin_lock_irqsave(&smp_alt, flags); - list_for_each_entry(item, &smp_alt_modules, next) { - if (mod != item->mod) - continue; - list_del(&item->next); - spin_unlock_irqrestore(&smp_alt, flags); - DPRINTK("%s: %s\n", __FUNCTION__, item->name); - kfree(item); - return; - } - spin_unlock_irqrestore(&smp_alt, flags); -} - -void alternatives_smp_switch(int smp) -{ - struct smp_alt_module *mod; - unsigned long flags; - -#ifdef CONFIG_LOCKDEP - /* - * A not yet fixed binutils section handling bug prevents - * alternatives-replacement from working reliably, so turn - * it off: - */ - printk("lockdep: not fixing up alternatives.\n"); - return; -#endif - - if (noreplace_smp || smp_alt_once) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); - - spin_lock_irqsave(&smp_alt, flags); - if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); - clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_lock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } - spin_unlock_irqrestore(&smp_alt, flags); -} - -#endif - -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{ - struct paravirt_patch_site *p; - char insnbuf[MAX_PATCH_LEN]; - - if (noreplace_paravirt) - return; - - for (p = start; p < end; p++) { - unsigned int used; - - BUG_ON(p->len > MAX_PATCH_LEN); - /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); - used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); - - BUG_ON(used > p->len); - - /* Pad the rest with nops */ - add_nops(insnbuf + used, p->len - used); - text_poke(p->instr, insnbuf, p->len); - } -} -extern struct paravirt_patch_site __start_parainstructions[], - __stop_parainstructions[]; -#endif /* CONFIG_PARAVIRT */ - -void __init alternative_instructions(void) -{ - unsigned long flags; - - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ - stop_nmi(); -#ifdef CONFIG_X86_MCE - stop_mce(); -#endif - - local_irq_save(flags); - apply_alternatives(__alt_instructions, __alt_instructions_end); - - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - -#ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); - } else { - alternatives_smp_module_add(NULL, "core kernel", - __smp_locks, __smp_locks_end, - _text, _etext); - alternatives_smp_switch(0); - } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - local_irq_restore(flags); - - restart_nmi(); -#ifdef CONFIG_X86_MCE - restart_mce(); -#endif -} - -/* - * Warning: - * When you use this code to patch more than one byte of an instruction - * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these instructions. - * And on the local CPU you need to be protected again NMI or MCE handlers - * seeing an inconsistent instruction while you patch. - */ -void __kprobes text_poke(void *addr, unsigned char *opcode, int len) -{ - memcpy(addr, opcode, len); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ -} diff --git a/arch/i386/kernel/apic_32.c b/arch/i386/kernel/apic_32.c deleted file mode 100644 index 3d67ae18d76..00000000000 --- a/arch/i386/kernel/apic_32.c +++ /dev/null @@ -1,1566 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "io_ports.h" - -/* - * Sanity check - */ -#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F -# error SPURIOUS_APIC_VECTOR definition error -#endif - -/* - * Knob to control our willingness to enable the local APIC. - * - * -1=force-disable, +1=force-enable - */ -static int enable_local_apic __initdata = 0; - -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk - or using CPU MSR check */ -int local_apic_timer_disabled; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -/* - * Debug level, exported for io_apic.c - */ -int apic_verbosity; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a seperate chip - */ -static inline int lapic_is_integrated(void) -{ - return APIC_INTEGRATED(lapic_get_version()); -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -void apic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -unsigned long safe_apic_wait_icr_idle(void) -{ - unsigned long send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void enable_NMI_through_LVT0 (void * dummy) -{ - unsigned int v = APIC_DM_NMI; - - /* Level triggered for 82489DX */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT0, v); -} - -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v = apic_read(APIC_LVR); - - /* 82489DXs do not report # of LVT entries. */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor is set to 16 */ -#define APIC_DIVISOR 16 - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write_around(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - - if (!oneshot) - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); -} - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write_around(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __devinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (local_apic_timer_disabled) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta ) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - local_apic_timer_verify_ok = 1; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - local_apic_timer_verify_ok = 0; - } else - local_irq_enable(); - - if (!local_apic_timer_verify_ok) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() == 1) - return; - } else { - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); - } - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __devinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - per_cpu(irq_stat, cpu).apic_timer_irqs++; - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ - -void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt = lapic_get_maxlvt(); - unsigned long v; - - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); - -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned long value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); - - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - clear_local_APIC(); - - if (enabled_via_apicbase) - disable_local_APIC(); - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic()) - return; - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned long value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); -} - -/** - * setup_local_APIC - setup the local APIC - */ -void __devinit setup_local_APIC(void) -{ - unsigned long oldvalue, value, maxlvt, integrated; - int i, j; - - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } - - integrated = lapic_is_integrated(); - - /* - * Double-check whether this APIC is really registered. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write_around(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); - } - - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, value); - - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); -} - -/* - * Detect and initialize APIC - */ -static int __init detect_init_APIC (void) -{ - u32 h, l, features; - - /* Disabled by kernel option? */ - if (enable_local_apic < 0) - return -1; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (enable_local_apic <= 0) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) - nmi_watchdog = NMI_LOCAL_APIC; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ - unsigned long apic_phys; - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } - } else { -fake_ioapic_page: - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int __init APIC_init_uniprocessor (void) -{ - if (enable_local_apic < 0) - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return -1; - } - - verify_local_APIC(); - - connect_bsp_APIC(); - - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -#ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); -#endif - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); - - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif - setup_boot_clock(); - - return 0; -} - -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); - -static int __init parse_nolapic(char *arg) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -early_param("nolapic", parse_nolapic); - -static int __init parse_disable_lapic_timer(char *arg) -{ - local_apic_timer_disabled = 1; - return 0; -} -early_param("nolapic_timer", parse_disable_lapic_timer); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init apic_set_verbosity(char *str) -{ - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - return 1; -} - -__setup("apic=", apic_set_verbosity); - - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -void smp_spurious_interrupt(struct pt_regs *regs) -{ - unsigned long v; - - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -void smp_error_interrupt(struct pt_regs *regs) -{ - unsigned long v, v1; - - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write_around(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write_around(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled - */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write_around(APIC_LVT1, value); - } -} - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __devinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ diff --git a/arch/i386/kernel/apm_32.c b/arch/i386/kernel/apm_32.c deleted file mode 100644 index f02a8aca826..00000000000 --- a/arch/i386/kernel/apm_32.c +++ /dev/null @@ -1,2403 +0,0 @@ -/* -*- linux-c -*- - * APM BIOS driver for Linux - * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au) - * - * Initial development of this driver was funded by NEC Australia P/L - * and NEC Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * October 1995, Rik Faith (faith@cs.unc.edu): - * Minor enhancements and updates (to the patch set) for 1.3.x - * Documentation - * January 1996, Rik Faith (faith@cs.unc.edu): - * Make /proc/apm easy to format (bump driver version) - * March 1996, Rik Faith (faith@cs.unc.edu): - * Prohibit APM BIOS calls unless apm_enabled. - * (Thanks to Ulrich Windl ) - * April 1996, Stephen Rothwell (sfr@canb.auug.org.au) - * Version 1.0 and 1.1 - * May 1996, Version 1.2 - * Feb 1998, Version 1.3 - * Feb 1998, Version 1.4 - * Aug 1998, Version 1.5 - * Sep 1998, Version 1.6 - * Nov 1998, Version 1.7 - * Jan 1999, Version 1.8 - * Jan 1999, Version 1.9 - * Oct 1999, Version 1.10 - * Nov 1999, Version 1.11 - * Jan 2000, Version 1.12 - * Feb 2000, Version 1.13 - * Nov 2000, Version 1.14 - * Oct 2001, Version 1.15 - * Jan 2002, Version 1.16 - * Oct 2002, Version 1.16ac - * - * History: - * 0.6b: first version in official kernel, Linux 1.3.46 - * 0.7: changed /proc/apm format, Linux 1.3.58 - * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59 - * 0.9: only call bios if bios is present, Linux 1.3.72 - * 1.0: use fixed device number, consolidate /proc/apm into this file, - * Linux 1.3.85 - * 1.1: support user-space standby and suspend, power off after system - * halted, Linux 1.3.98 - * 1.2: When resetting RTC after resume, take care so that the time - * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth - * ); improve interaction between - * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4 - * 1.2a:Simple change to stop mysterious bug reports with SMP also added - * levels to the printk calls. APM is not defined for SMP machines. - * The new replacment for it is, but Linux doesn't yet support this. - * Alan Cox Linux 2.1.55 - * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's - * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by - * Dean Gaudet . - * C. Scott Ananian Linux 2.1.87 - * 1.5: Fix segment register reloading (in case of bad segments saved - * across BIOS call). - * Stephen Rothwell - * 1.6: Cope with complier/assembler differences. - * Only try to turn off the first display device. - * Fix OOPS at power off with no APM BIOS by Jan Echternach - * - * Stephen Rothwell - * 1.7: Modify driver's cached copy of the disabled/disengaged flags - * to reflect current state of APM BIOS. - * Chris Rankin - * Reset interrupt 0 timer to 100Hz after suspend - * Chad Miller - * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE - * Richard Gooch - * Allow boot time disabling of APM - * Make boot messages far less verbose by default - * Make asm safer - * Stephen Rothwell - * 1.8: Add CONFIG_APM_RTC_IS_GMT - * Richard Gooch - * change APM_NOINTS to CONFIG_APM_ALLOW_INTS - * remove dependency on CONFIG_PROC_FS - * Stephen Rothwell - * 1.9: Fix small typo. - * Try to cope with BIOS's that need to have all display - * devices blanked and not just the first one. - * Ross Paterson - * Fix segment limit setting it has always been wrong as - * the segments needed to have byte granularity. - * Mark a few things __init. - * Add hack to allow power off of SMP systems by popular request. - * Use CONFIG_SMP instead of __SMP__ - * Ignore BOUNCES for three seconds. - * Stephen Rothwell - * 1.10: Fix for Thinkpad return code. - * Merge 2.2 and 2.3 drivers. - * Remove APM dependencies in arch/i386/kernel/process.c - * Remove APM dependencies in drivers/char/sysrq.c - * Reset time across standby. - * Allow more inititialisation on SMP. - * Remove CONFIG_APM_POWER_OFF and make it boot time - * configurable (default on). - * Make debug only a boot time parameter (remove APM_DEBUG). - * Try to blank all devices on any error. - * 1.11: Remove APM dependencies in drivers/char/console.c - * Check nr_running to detect if we are idle (from - * Borislav Deianov ) - * Fix for bioses that don't zero the top part of the - * entrypoint offset (Mario Sitta ) - * (reported by Panos Katsaloulis ). - * Real mode power off patch (Walter Hofmann - * ). - * 1.12: Remove CONFIG_SMP as the compiler will optimize - * the code away anyway (smp_num_cpus == 1 in UP) - * noted by Artur Skawina . - * Make power off under SMP work again. - * Fix thinko with initial engaging of BIOS. - * Make sure power off only happens on CPU 0 - * (Paul "Rusty" Russell ). - * Do error notification to user mode if BIOS calls fail. - * Move entrypoint offset fix to ...boot/setup.S - * where it belongs (Cosmos ). - * Remove smp-power-off. SMP users must now specify - * "apm=power-off" on the kernel command line. Suggested - * by Jim Avera , modified by Alan Cox - * . - * Register the /proc/apm entry even on SMP so that - * scripts that check for it before doing power off - * work (Jim Avera ). - * 1.13: Changes for new pm_ interfaces (Andy Henroid - * ). - * Modularize the code. - * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS - * is now the way life works). - * Fix thinko in suspend() (wrong return). - * Notify drivers on critical suspend. - * Make kapmd absorb more idle time (Pavel Machek - * modified by sfr). - * Disable interrupts while we are suspended (Andy Henroid - * fixed by sfr). - * Make power off work on SMP again (Tony Hoyle - * and ) modified by sfr. - * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore - * interval is now configurable. - * 1.14: Make connection version persist across module unload/load. - * Enable and engage power management earlier. - * Disengage power management on module unload. - * Changed to use the sysrq-register hack for registering the - * power off function called by magic sysrq based upon discussions - * in irc://irc.openprojects.net/#kernelnewbies - * (Crutcher Dunnavant ). - * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable. - * (Arjan van de Ven ) modified by sfr. - * Work around byte swap bug in one of the Vaio's BIOS's - * (Marc Boucher ). - * Exposed the disable flag to dmi so that we can handle known - * broken APM (Alan Cox ). - * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin - * calling it - instead idle. (Alan Cox ) - * If an APM idle fails log it and idle sensibly - * 1.15: Don't queue events to clients who open the device O_WRONLY. - * Don't expect replies from clients who open the device O_RDONLY. - * (Idea from Thomas Hood) - * Minor waitqueue cleanups. (John Fremlin ) - * 1.16: Fix idle calling. (Andreas Steinmetz et al.) - * Notify listeners of standby or suspend events before notifying - * drivers. Return EBUSY to ioctl() if suspend is rejected. - * (Russell King and Thomas Hood) - * Ignore first resume after we generate our own resume event - * after a suspend (Thomas Hood) - * Daemonize now gets rid of our controlling terminal (sfr). - * CONFIG_APM_CPU_IDLE now just affects the default value of - * idle_threshold (sfr). - * Change name of kernel apm daemon (as it no longer idles) (sfr). - * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we - * make _all_ APM calls on the CPU#0. Fix unsafe sign bug. - * TODO: determine if its "boot CPU" or "CPU0" we want to lock to. - * - * APM 1.1 Reference: - * - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.1, September 1993. - * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01. - * - * [This document is available free from Intel by calling 800.628.8686 (fax - * 916.356.6100) or 800.548.4725; or via anonymous ftp from - * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also - * available from Microsoft by calling 206.882.8080.] - * - * APM 1.2 Reference: - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.2, February 1996. - * - * [This document is available from Microsoft at: - * http://www.microsoft.com/whdc/archive/amp_12.mspx] - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "io_ports.h" - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) -extern int (*console_blank_hook)(int); -#endif - -/* - * The apm_bios device is one of the misc char devices. - * This is its minor number. - */ -#define APM_MINOR_DEV 134 - -/* - * See Documentation/Config.help for the configuration options. - * - * Various options can be changed at boot time as follows: - * (We allow underscores for compatibility with the modules code) - * apm=on/off enable/disable APM - * [no-]allow[-_]ints allow interrupts during BIOS calls - * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call - * [no-]realmode[-_]power[-_]off switch to real mode before - * powering off - * [no-]debug log some debugging messages - * [no-]power[-_]off power off on shutdown - * [no-]smp Use apm even on an SMP box - * bounce[-_]interval= number of ticks to ignore suspend - * bounces - * idle[-_]threshold= System idle percentage above which to - * make APM BIOS idle calls. Set it to - * 100 to disable. - * idle[-_]period= Period (in 1/100s of a second) over - * which the idle percentage is - * calculated. - */ - -/* KNOWN PROBLEM MACHINES: - * - * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant - * [Confirmed by TI representative] - * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification - * [Confirmed by BIOS disassembly] - * [This may work now ...] - * P: Toshiba 1950S: battery life information only gets updated after resume - * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking - * broken in BIOS [Reported by Garst R. Reese ] - * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP - * Neale Banks December 2000 - * - * Legend: U = unusable with APM patches - * P = partially usable with APM patches - */ - -/* - * Define as 1 to make the driver always call the APM BIOS busy - * routine even if the clock was not reported as slowed by the - * idle routine. Otherwise, define as 0. - */ -#define ALWAYS_CALL_BUSY 1 - -/* - * Define to make the APM BIOS calls zero all data segment registers (so - * that an incorrect BIOS implementation will cause a kernel panic if it - * tries to write to arbitrary memory). - */ -#define APM_ZERO_SEGS - -#include "apm.h" - -/* - * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. - * This patched by Chad Miller , original code by - * David Chen - */ -#undef INIT_TIMER_AFTER_SUSPEND - -#ifdef INIT_TIMER_AFTER_SUSPEND -#include -#include -#include -#endif - -/* - * Need to poll the APM BIOS every second - */ -#define APM_CHECK_TIMEOUT (HZ) - -/* - * Ignore suspend events for this amount of time after a resume - */ -#define DEFAULT_BOUNCE_INTERVAL (3 * HZ) - -/* - * Maximum number of events stored - */ -#define APM_MAX_EVENTS 20 - -/* - * The per-file APM data - */ -struct apm_user { - int magic; - struct apm_user * next; - unsigned int suser: 1; - unsigned int writer: 1; - unsigned int reader: 1; - unsigned int suspend_wait: 1; - int suspend_result; - int suspends_pending; - int standbys_pending; - int suspends_read; - int standbys_read; - int event_head; - int event_tail; - apm_event_t events[APM_MAX_EVENTS]; -}; - -/* - * The magic number in apm_user - */ -#define APM_BIOS_MAGIC 0x4101 - -/* - * idle percentage above which bios idle calls are done - */ -#ifdef CONFIG_APM_CPU_IDLE -#define DEFAULT_IDLE_THRESHOLD 95 -#else -#define DEFAULT_IDLE_THRESHOLD 100 -#endif -#define DEFAULT_IDLE_PERIOD (100 / 3) - -/* - * Local variables - */ -static struct { - unsigned long offset; - unsigned short segment; -} apm_bios_entry; -static int clock_slowed; -static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; -static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; -static int set_pm_idle; -static int suspends_pending; -static int standbys_pending; -static int ignore_sys_suspend; -static int ignore_normal_resume; -static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; - -static int debug __read_mostly; -static int smp __read_mostly; -static int apm_disabled = -1; -#ifdef CONFIG_SMP -static int power_off; -#else -static int power_off = 1; -#endif -#ifdef CONFIG_APM_REAL_MODE_POWER_OFF -static int realmode_power_off = 1; -#else -static int realmode_power_off; -#endif -#ifdef CONFIG_APM_ALLOW_INTS -static int allow_ints = 1; -#else -static int allow_ints; -#endif -static int broken_psr; - -static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); -static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); -static struct apm_user * user_list; -static DEFINE_SPINLOCK(user_list_lock); -static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; - -static const char driver_version[] = "1.16ac"; /* no spaces */ - -static struct task_struct *kapmd_task; - -/* - * APM event names taken from the APM 1.2 specification. These are - * the message codes that the BIOS uses to tell us about events - */ -static const char * const apm_event_name[] = { - "system standby", - "system suspend", - "normal resume", - "critical resume", - "low battery", - "power status change", - "update time", - "critical suspend", - "user standby", - "user suspend", - "system standby resume", - "capabilities change" -}; -#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name) - -typedef struct lookup_t { - int key; - char * msg; -} lookup_t; - -/* - * The BIOS returns a set of standard error codes in AX when the - * carry flag is set. - */ - -static const lookup_t error_table[] = { -/* N/A { APM_SUCCESS, "Operation succeeded" }, */ - { APM_DISABLED, "Power management disabled" }, - { APM_CONNECTED, "Real mode interface already connected" }, - { APM_NOT_CONNECTED, "Interface not connected" }, - { APM_16_CONNECTED, "16 bit interface already connected" }, -/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */ - { APM_32_CONNECTED, "32 bit interface already connected" }, - { APM_32_UNSUPPORTED, "32 bit interface not supported" }, - { APM_BAD_DEVICE, "Unrecognized device ID" }, - { APM_BAD_PARAM, "Parameter out of range" }, - { APM_NOT_ENGAGED, "Interface not engaged" }, - { APM_BAD_FUNCTION, "Function not supported" }, - { APM_RESUME_DISABLED, "Resume timer disabled" }, - { APM_BAD_STATE, "Unable to enter requested state" }, -/* N/A { APM_NO_EVENTS, "No events pending" }, */ - { APM_NO_ERROR, "BIOS did not set a return code" }, - { APM_NOT_PRESENT, "No APM present" } -}; -#define ERROR_COUNT ARRAY_SIZE(error_table) - -/** - * apm_error - display an APM error - * @str: information string - * @err: APM BIOS return code - * - * Write a meaningful log entry to the kernel log in the event of - * an APM error. - */ - -static void apm_error(char *str, int err) -{ - int i; - - for (i = 0; i < ERROR_COUNT; i++) - if (error_table[i].key == err) break; - if (i < ERROR_COUNT) - printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); - else - printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", - str, err); -} - -/* - * Lock APM functionality to physical CPU 0 - */ - -#ifdef CONFIG_SMP - -static cpumask_t apm_save_cpus(void) -{ - cpumask_t x = current->cpus_allowed; - /* Some bioses don't like being called from CPU != 0 */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); - return x; -} - -static inline void apm_restore_cpus(cpumask_t mask) -{ - set_cpus_allowed(current, mask); -} - -#else - -/* - * No CPU lockdown needed on a uniprocessor - */ - -#define apm_save_cpus() (current->cpus_allowed) -#define apm_restore_cpus(x) (void)(x) - -#endif - -/* - * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and - * apm_info.allow_ints, we are being really paranoid here! Not only - * are interrupts disabled, but all the segment registers (except SS) - * are saved and zeroed this means that if the BIOS tries to reference - * any data without explicitly loading the segment registers, the kernel - * will fault immediately rather than have some unforeseen circumstances - * for the rest of the kernel. And it will be very obvious! :-) Doing - * this depends on CS referring to the same physical memory as DS so that - * DS can be zeroed before the call. Unfortunately, we can't do anything - * about the stack segment/pointer. Also, we tell the compiler that - * everything could change. - * - * Also, we KNOW that for the non error case of apm_bios_call, there - * is no useful data returned in the low order 8 bits of eax. - */ - -static inline unsigned long __apm_irq_save(void) -{ - unsigned long flags; - local_save_flags(flags); - if (apm_info.allow_ints) { - if (irqs_disabled_flags(flags)) - local_irq_enable(); - } else - local_irq_disable(); - - return flags; -} - -#define apm_irq_save(flags) \ - do { flags = __apm_irq_save(); } while (0) - -static inline void apm_irq_restore(unsigned long flags) -{ - if (irqs_disabled_flags(flags)) - local_irq_disable(); - else if (irqs_disabled()) - local_irq_enable(); -} - -#ifdef APM_ZERO_SEGS -# define APM_DECL_SEGS \ - unsigned int saved_fs; unsigned int saved_gs; -# define APM_DO_SAVE_SEGS \ - savesegment(fs, saved_fs); savesegment(gs, saved_gs) -# define APM_DO_RESTORE_SEGS \ - loadsegment(fs, saved_fs); loadsegment(gs, saved_gs) -#else -# define APM_DECL_SEGS -# define APM_DO_SAVE_SEGS -# define APM_DO_RESTORE_SEGS -#endif - -/** - * apm_bios_call - Make an APM BIOS 32bit call - * @func: APM function to execute - * @ebx_in: EBX register for call entry - * @ecx_in: ECX register for call entry - * @eax: EAX register return - * @ebx: EBX register return - * @ecx: ECX register return - * @edx: EDX register return - * @esi: ESI register return - * - * Make an APM call using the 32bit protected mode interface. The - * caller is responsible for knowing if APM BIOS is configured and - * enabled. This call can disable interrupts for a long period of - * time on some laptops. The return value is in AH and the carry - * flag is loaded into AL. If there is an error, then the error - * code is returned in AH (bits 8-15 of eax) and this function - * returns non-zero. - */ - -static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) -{ - APM_DECL_SEGS - unsigned long flags; - cpumask_t cpus; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - - cpus = apm_save_cpus(); - - cpu = get_cpu(); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - apm_restore_cpus(cpus); - - return *eax & 0xff; -} - -/** - * apm_bios_call_simple - make a simple APM BIOS 32bit call - * @func: APM function to invoke - * @ebx_in: EBX register value for BIOS call - * @ecx_in: ECX register value for BIOS call - * @eax: EAX register on return from the BIOS call - * - * Make a BIOS call that returns one value only, or just status. - * If there is an error, then the error code is returned in AH - * (bits 8-15 of eax) and this function returns non-zero. This is - * used for simpler BIOS operations. This call may hold interrupts - * off for a long time on some laptops. - */ - -static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) -{ - u8 error; - APM_DECL_SEGS - unsigned long flags; - cpumask_t cpus; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - - cpus = apm_save_cpus(); - - cpu = get_cpu(); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - apm_restore_cpus(cpus); - return error; -} - -/** - * apm_driver_version - APM driver version - * @val: loaded with the APM version on return - * - * Retrieve the APM version supported by the BIOS. This is only - * supported for APM 1.1 or higher. An error indicates APM 1.0 is - * probably present. - * - * On entry val should point to a value indicating the APM driver - * version with the high byte being the major and the low byte the - * minor number both in BCD - * - * On return it will hold the BIOS revision supported in the - * same format. - */ - -static int apm_driver_version(u_short *val) -{ - u32 eax; - - if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) - return (eax >> 8) & 0xff; - *val = eax; - return APM_SUCCESS; -} - -/** - * apm_get_event - get an APM event from the BIOS - * @event: pointer to the event - * @info: point to the event information - * - * The APM BIOS provides a polled information for event - * reporting. The BIOS expects to be polled at least every second - * when events are pending. When a message is found the caller should - * poll until no more messages are present. However, this causes - * problems on some laptops where a suspend event notification is - * not cleared until it is acknowledged. - * - * Additional information is returned in the info pointer, providing - * that APM 1.2 is in use. If no messges are pending the value 0x80 - * is returned (No power management events pending). - */ - -static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 dummy; - - if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, - &dummy, &dummy)) - return (eax >> 8) & 0xff; - *event = ebx; - if (apm_info.connection_version < 0x0102) - *info = ~0; /* indicate info not valid */ - else - *info = ecx; - return APM_SUCCESS; -} - -/** - * set_power_state - set the power management state - * @what: which items to transition - * @state: state to transition to - * - * Request an APM change of state for one or more system devices. The - * processor state must be transitioned last of all. what holds the - * class of device in the upper byte and the device number (0xFF for - * all) for the object to be transitioned. - * - * The state holds the state to transition to, which may in fact - * be an acceptance of a BIOS requested state change. - */ - -static int set_power_state(u_short what, u_short state) -{ - u32 eax; - - if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) - return (eax >> 8) & 0xff; - return APM_SUCCESS; -} - -/** - * set_system_power_state - set system wide power state - * @state: which state to enter - * - * Transition the entire system into a new APM power state. - */ - -static int set_system_power_state(u_short state) -{ - return set_power_state(APM_DEVICE_ALL, state); -} - -/** - * apm_do_idle - perform power saving - * - * This function notifies the BIOS that the processor is (in the view - * of the OS) idle. It returns -1 in the event that the BIOS refuses - * to handle the idle request. On a success the function returns 1 - * if the BIOS did clock slowing or 0 otherwise. - */ - -static int apm_do_idle(void) -{ - u32 eax; - u8 ret = 0; - int idled = 0; - int polling; - - polling = !!(current_thread_info()->status & TS_POLLING); - if (polling) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - } - if (!need_resched()) { - idled = 1; - ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); - } - if (polling) - current_thread_info()->status |= TS_POLLING; - - if (!idled) - return 0; - - if (ret) { - static unsigned long t; - - /* This always fails on some SMP boards running UP kernels. - * Only report the failure the first 5 times. - */ - if (++t < 5) - { - printk(KERN_DEBUG "apm_do_idle failed (%d)\n", - (eax >> 8) & 0xff); - t = jiffies; - } - return -1; - } - clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0; - return clock_slowed; -} - -/** - * apm_do_busy - inform the BIOS the CPU is busy - * - * Request that the BIOS brings the CPU back to full performance. - */ - -static void apm_do_busy(void) -{ - u32 dummy; - - if (clock_slowed || ALWAYS_CALL_BUSY) { - (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); - clock_slowed = 0; - } -} - -/* - * If no process has really been interested in - * the CPU for some time, we want to call BIOS - * power management - we probably want - * to conserve power. - */ -#define IDLE_CALC_LIMIT (HZ * 100) -#define IDLE_LEAKY_MAX 16 - -static void (*original_pm_idle)(void) __read_mostly; - -/** - * apm_cpu_idle - cpu idling for APM capable Linux - * - * This is the idling function the kernel executes when APM is available. It - * tries to do BIOS powermanagement based on the average system idle time. - * Furthermore it calls the system default idle routine. - */ - -static void apm_cpu_idle(void) -{ - static int use_apm_idle; /* = 0 */ - static unsigned int last_jiffies; /* = 0 */ - static unsigned int last_stime; /* = 0 */ - - int apm_idle_done = 0; - unsigned int jiffies_since_last_check = jiffies - last_jiffies; - unsigned int bucket; - -recalc: - if (jiffies_since_last_check > IDLE_CALC_LIMIT) { - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } else if (jiffies_since_last_check > idle_period) { - unsigned int idle_percentage; - - idle_percentage = current->stime - last_stime; - idle_percentage *= 100; - idle_percentage /= jiffies_since_last_check; - use_apm_idle = (idle_percentage > idle_threshold); - if (apm_info.forbid_idle) - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } - - bucket = IDLE_LEAKY_MAX; - - while (!need_resched()) { - if (use_apm_idle) { - unsigned int t; - - t = jiffies; - switch (apm_do_idle()) { - case 0: apm_idle_done = 1; - if (t != jiffies) { - if (bucket) { - bucket = IDLE_LEAKY_MAX; - continue; - } - } else if (bucket) { - bucket--; - continue; - } - break; - case 1: apm_idle_done = 1; - break; - default: /* BIOS refused */ - break; - } - } - if (original_pm_idle) - original_pm_idle(); - else - default_idle(); - jiffies_since_last_check = jiffies - last_jiffies; - if (jiffies_since_last_check > idle_period) - goto recalc; - } - - if (apm_idle_done) - apm_do_busy(); -} - -/** - * apm_power_off - ask the BIOS to power off - * - * Handle the power off sequence. This is the one piece of code we - * will execute even on SMP machines. In order to deal with BIOS - * bugs we support real mode APM BIOS power off calls. We also make - * the SMP call on CPU0 as some systems will only honour this call - * on their first cpu. - */ - -static void apm_power_off(void) -{ - unsigned char po_bios_call[] = { - 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ - 0x8e, 0xd0, /* movw ax,ss */ - 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ - 0xb8, 0x07, 0x53, /* movw $0x5307,ax */ - 0xbb, 0x01, 0x00, /* movw $0x0001,bx */ - 0xb9, 0x03, 0x00, /* movw $0x0003,cx */ - 0xcd, 0x15 /* int $0x15 */ - }; - - /* Some bioses don't like being called from CPU != 0 */ - if (apm_info.realmode_power_off) - { - (void)apm_save_cpus(); - machine_real_restart(po_bios_call, sizeof(po_bios_call)); - } - else - (void) set_system_power_state(APM_STATE_OFF); -} - -#ifdef CONFIG_APM_DO_ENABLE - -/** - * apm_enable_power_management - enable BIOS APM power management - * @enable: enable yes/no - * - * Enable or disable the APM BIOS power services. - */ - -static int apm_enable_power_management(int enable) -{ - u32 eax; - - if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) - return APM_NOT_ENGAGED; - if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, - enable, &eax)) - return (eax >> 8) & 0xff; - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISABLED; - else - apm_info.bios.flags |= APM_BIOS_DISABLED; - return APM_SUCCESS; -} -#endif - -/** - * apm_get_power_status - get current power state - * @status: returned status - * @bat: battery info - * @life: estimated life - * - * Obtain the current power status from the APM BIOS. We return a - * status which gives the rough battery status, and current power - * source. The bat value returned give an estimate as a percentage - * of life and a status value for the battery. The estimated life - * if reported is a lifetime in secodnds/minutes at current powwer - * consumption. - */ - -static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 dummy; - - if (apm_info.get_power_status_broken) - return APM_32_UNSUPPORTED; - if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, - &eax, &ebx, &ecx, &edx, &dummy)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; - if (apm_info.get_power_status_swabinminutes) { - *life = swab16((u16)edx); - *life |= 0x8000; - } else - *life = edx; - return APM_SUCCESS; -} - -#if 0 -static int apm_get_battery_status(u_short which, u_short *status, - u_short *bat, u_short *life, u_short *nbat) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - - if (apm_info.connection_version < 0x0102) { - /* pretend we only have one battery. */ - if (which != 1) - return APM_BAD_DEVICE; - *nbat = 1; - return apm_get_power_status(status, bat, life); - } - - if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, - &ebx, &ecx, &edx, &esi)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; - *life = edx; - *nbat = esi; - return APM_SUCCESS; -} -#endif - -/** - * apm_engage_power_management - enable PM on a device - * @device: identity of device - * @enable: on/off - * - * Activate or deactive power management on either a specific device - * or the entire system (%APM_DEVICE_ALL). - */ - -static int apm_engage_power_management(u_short device, int enable) -{ - u32 eax; - - if ((enable == 0) && (device == APM_DEVICE_ALL) - && (apm_info.bios.flags & APM_BIOS_DISABLED)) - return APM_DISABLED; - if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) - return (eax >> 8) & 0xff; - if (device == APM_DEVICE_ALL) { - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; - else - apm_info.bios.flags |= APM_BIOS_DISENGAGED; - } - return APM_SUCCESS; -} - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - -/** - * apm_console_blank - blank the display - * @blank: on/off - * - * Attempt to blank the console, firstly by blanking just video device - * zero, and if that fails (some BIOSes don't support it) then it blanks - * all video devices. Typically the BIOS will do laptop backlight and - * monitor powerdown for us. - */ - -static int apm_console_blank(int blank) -{ - int error = APM_NOT_ENGAGED; /* silence gcc */ - int i; - u_short state; - static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; - - state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - - for (i = 0; i < ARRAY_SIZE(dev); i++) { - error = set_power_state(dev[i], state); - - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - - if (error == APM_NOT_ENGAGED) - break; - } - - if (error == APM_NOT_ENGAGED) { - static int tried; - int eng_error; - if (tried++ == 0) { - eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (eng_error) { - apm_error("set display", error); - apm_error("engage interface", eng_error); - return 0; - } else - return apm_console_blank(blank); - } - } - apm_error("set display", error); - return 0; -} -#endif - -static int queue_empty(struct apm_user *as) -{ - return as->event_head == as->event_tail; -} - -static apm_event_t get_queued_event(struct apm_user *as) -{ - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - return as->events[as->event_tail]; -} - -static void queue_event(apm_event_t event, struct apm_user *sender) -{ - struct apm_user * as; - - spin_lock(&user_list_lock); - if (user_list == NULL) - goto out; - for (as = user_list; as != NULL; as = as->next) { - if ((as == sender) || (!as->reader)) - continue; - if (++as->event_head >= APM_MAX_EVENTS) - as->event_head = 0; - - if (as->event_head == as->event_tail) { - static int notified; - - if (notified++ == 0) - printk(KERN_ERR "apm: an event queue overflowed\n"); - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - } - as->events[as->event_head] = event; - if ((!as->suser) || (!as->writer)) - continue; - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_pending++; - suspends_pending++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_pending++; - standbys_pending++; - break; - } - } - wake_up_interruptible(&apm_waitqueue); -out: - spin_unlock(&user_list_lock); -} - -static void reinit_timer(void) -{ -#ifdef INIT_TIMER_AFTER_SUSPEND - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - /* set the clock to HZ */ - outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8, PIT_CH0); /* MSB */ - udelay(10); - spin_unlock_irqrestore(&i8253_lock, flags); -#endif -} - -static int suspend(int vetoable) -{ - int err; - struct apm_user *as; - - if (pm_send_all(PM_SUSPEND, (void *)3)) { - /* Vetoed */ - if (vetoable) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - err = -EBUSY; - ignore_sys_suspend = 0; - printk(KERN_WARNING "apm: suspend was vetoed.\n"); - goto out; - } - printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n"); - } - - device_suspend(PMSG_SUSPEND); - local_irq_disable(); - device_power_down(PMSG_SUSPEND); - - local_irq_enable(); - - save_processor_state(); - err = set_system_power_state(APM_STATE_SUSPEND); - ignore_normal_resume = 1; - restore_processor_state(); - - local_irq_disable(); - reinit_timer(); - - if (err == APM_NO_ERROR) - err = APM_SUCCESS; - if (err != APM_SUCCESS) - apm_error("suspend", err); - err = (err == APM_SUCCESS) ? 0 : -EIO; - device_power_up(); - local_irq_enable(); - device_resume(); - pm_send_all(PM_RESUME, (void *)0); - queue_event(APM_NORMAL_RESUME, NULL); - out: - spin_lock(&user_list_lock); - for (as = user_list; as != NULL; as = as->next) { - as->suspend_wait = 0; - as->suspend_result = err; - } - spin_unlock(&user_list_lock); - wake_up_interruptible(&apm_suspend_waitqueue); - return err; -} - -static void standby(void) -{ - int err; - - local_irq_disable(); - device_power_down(PMSG_SUSPEND); - local_irq_enable(); - - err = set_system_power_state(APM_STATE_STANDBY); - if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) - apm_error("standby", err); - - local_irq_disable(); - device_power_up(); - local_irq_enable(); -} - -static apm_event_t get_event(void) -{ - int error; - apm_event_t event = APM_NO_EVENTS; /* silence gcc */ - apm_eventinfo_t info; - - static int notified; - - /* we don't use the eventinfo */ - error = apm_get_event(&event, &info); - if (error == APM_SUCCESS) - return event; - - if ((error != APM_NO_EVENTS) && (notified++ == 0)) - apm_error("get_event", error); - - return 0; -} - -static void check_events(void) -{ - apm_event_t event; - static unsigned long last_resume; - static int ignore_bounce; - - while ((event = get_event()) != 0) { - if (debug) { - if (event <= NR_APM_EVENT_NAME) - printk(KERN_DEBUG "apm: received %s notify\n", - apm_event_name[event - 1]); - else - printk(KERN_DEBUG "apm: received unknown " - "event 0x%02x\n", event); - } - if (ignore_bounce - && ((jiffies - last_resume) > bounce_interval)) - ignore_bounce = 0; - - switch (event) { - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - queue_event(event, NULL); - if (standbys_pending <= 0) - standby(); - break; - - case APM_USER_SUSPEND: -#ifdef CONFIG_APM_IGNORE_USER_SUSPEND - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; -#endif - case APM_SYS_SUSPEND: - if (ignore_bounce) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; - } - /* - * If we are already processing a SUSPEND, - * then further SUSPEND events from the BIOS - * will be ignored. We also return here to - * cope with the fact that the Thinkpads keep - * sending a SUSPEND event until something else - * happens! - */ - if (ignore_sys_suspend) - return; - ignore_sys_suspend = 1; - queue_event(event, NULL); - if (suspends_pending <= 0) - (void) suspend(1); - break; - - case APM_NORMAL_RESUME: - case APM_CRITICAL_RESUME: - case APM_STANDBY_RESUME: - ignore_sys_suspend = 0; - last_resume = jiffies; - ignore_bounce = 1; - if ((event != APM_NORMAL_RESUME) - || (ignore_normal_resume == 0)) { - device_resume(); - pm_send_all(PM_RESUME, (void *)0); - queue_event(event, NULL); - } - ignore_normal_resume = 0; - break; - - case APM_CAPABILITY_CHANGE: - case APM_LOW_BATTERY: - case APM_POWER_STATUS_CHANGE: - queue_event(event, NULL); - /* If needed, notify drivers here */ - break; - - case APM_UPDATE_TIME: - break; - - case APM_CRITICAL_SUSPEND: - /* - * We are not allowed to reject a critical suspend. - */ - (void) suspend(0); - break; - } - } -} - -static void apm_event_handler(void) -{ - static int pending_count = 4; - int err; - - if ((standbys_pending > 0) || (suspends_pending > 0)) { - if ((apm_info.connection_version > 0x100) && - (pending_count-- <= 0)) { - pending_count = 4; - if (debug) - printk(KERN_DEBUG "apm: setting state busy\n"); - err = set_system_power_state(APM_STATE_BUSY); - if (err) - apm_error("busy", err); - } - } else - pending_count = 4; - check_events(); -} - -/* - * This is the APM thread main loop. - */ - -static void apm_mainloop(void) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&apm_waitqueue, &wait); - set_current_state(TASK_INTERRUPTIBLE); - for (;;) { - schedule_timeout(APM_CHECK_TIMEOUT); - if (kthread_should_stop()) - break; - /* - * Ok, check all events, check for idle (and mark us sleeping - * so as not to count towards the load average).. - */ - set_current_state(TASK_INTERRUPTIBLE); - apm_event_handler(); - } - remove_wait_queue(&apm_waitqueue, &wait); -} - -static int check_apm_user(struct apm_user *as, const char *func) -{ - if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { - printk(KERN_ERR "apm: %s passed bad filp\n", func); - return 1; - } - return 0; -} - -static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) -{ - struct apm_user * as; - int i; - apm_event_t event; - - as = fp->private_data; - if (check_apm_user(as, "read")) - return -EIO; - if ((int)count < sizeof(apm_event_t)) - return -EINVAL; - if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK)) - return -EAGAIN; - wait_event_interruptible(apm_waitqueue, !queue_empty(as)); - i = count; - while ((i >= sizeof(event)) && !queue_empty(as)) { - event = get_queued_event(as); - if (copy_to_user(buf, &event, sizeof(event))) { - if (i < count) - break; - return -EFAULT; - } - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_read++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_read++; - break; - } - buf += sizeof(event); - i -= sizeof(event); - } - if (i < count) - return count - i; - if (signal_pending(current)) - return -ERESTARTSYS; - return 0; -} - -static unsigned int do_poll(struct file *fp, poll_table * wait) -{ - struct apm_user * as; - - as = fp->private_data; - if (check_apm_user(as, "poll")) - return 0; - poll_wait(fp, &apm_waitqueue, wait); - if (!queue_empty(as)) - return POLLIN | POLLRDNORM; - return 0; -} - -static int do_ioctl(struct inode * inode, struct file *filp, - u_int cmd, u_long arg) -{ - struct apm_user * as; - - as = filp->private_data; - if (check_apm_user(as, "ioctl")) - return -EIO; - if ((!as->suser) || (!as->writer)) - return -EPERM; - switch (cmd) { - case APM_IOC_STANDBY: - if (as->standbys_read > 0) { - as->standbys_read--; - as->standbys_pending--; - standbys_pending--; - } else - queue_event(APM_USER_STANDBY, as); - if (standbys_pending <= 0) - standby(); - break; - case APM_IOC_SUSPEND: - if (as->suspends_read > 0) { - as->suspends_read--; - as->suspends_pending--; - suspends_pending--; - } else - queue_event(APM_USER_SUSPEND, as); - if (suspends_pending <= 0) { - return suspend(1); - } else { - as->suspend_wait = 1; - wait_event_interruptible(apm_suspend_waitqueue, - as->suspend_wait == 0); - return as->suspend_result; - } - break; - default: - return -EINVAL; - } - return 0; -} - -static int do_release(struct inode * inode, struct file * filp) -{ - struct apm_user * as; - - as = filp->private_data; - if (check_apm_user(as, "release")) - return 0; - filp->private_data = NULL; - if (as->standbys_pending > 0) { - standbys_pending -= as->standbys_pending; - if (standbys_pending <= 0) - standby(); - } - if (as->suspends_pending > 0) { - suspends_pending -= as->suspends_pending; - if (suspends_pending <= 0) - (void) suspend(1); - } - spin_lock(&user_list_lock); - if (user_list == as) - user_list = as->next; - else { - struct apm_user * as1; - - for (as1 = user_list; - (as1 != NULL) && (as1->next != as); - as1 = as1->next) - ; - if (as1 == NULL) - printk(KERN_ERR "apm: filp not in user list\n"); - else - as1->next = as->next; - } - spin_unlock(&user_list_lock); - kfree(as); - return 0; -} - -static int do_open(struct inode * inode, struct file * filp) -{ - struct apm_user * as; - - as = kmalloc(sizeof(*as), GFP_KERNEL); - if (as == NULL) { - printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", - sizeof(*as)); - return -ENOMEM; - } - as->magic = APM_BIOS_MAGIC; - as->event_tail = as->event_head = 0; - as->suspends_pending = as->standbys_pending = 0; - as->suspends_read = as->standbys_read = 0; - /* - * XXX - this is a tiny bit broken, when we consider BSD - * process accounting. If the device is opened by root, we - * instantly flag that we used superuser privs. Who knows, - * we might close the device immediately without doing a - * privileged operation -- cevans - */ - as->suser = capable(CAP_SYS_ADMIN); - as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE; - as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ; - spin_lock(&user_list_lock); - as->next = user_list; - user_list = as; - spin_unlock(&user_list_lock); - filp->private_data = as; - return 0; -} - -static int proc_apm_show(struct seq_file *m, void *v) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - unsigned short ac_line_status = 0xff; - unsigned short battery_status = 0xff; - unsigned short battery_flag = 0xff; - int percentage = -1; - int time_units = -1; - char *units = "?"; - - if ((num_online_cpus() == 1) && - !(error = apm_get_power_status(&bx, &cx, &dx))) { - ac_line_status = (bx >> 8) & 0xff; - battery_status = bx & 0xff; - if ((cx & 0xff) != 0xff) - percentage = cx & 0xff; - - if (apm_info.connection_version > 0x100) { - battery_flag = (cx >> 8) & 0xff; - if (dx != 0xffff) { - units = (dx & 0x8000) ? "min" : "sec"; - time_units = dx & 0x7fff; - } - } - } - /* Arguments, with symbols from linux/apm_bios.h. Information is - from the Get Power Status (0x0a) call unless otherwise noted. - - 0) Linux driver version (this will change if format changes) - 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2. - 2) APM flags from APM Installation Check (0x00): - bit 0: APM_16_BIT_SUPPORT - bit 1: APM_32_BIT_SUPPORT - bit 2: APM_IDLE_SLOWS_CLOCK - bit 3: APM_BIOS_DISABLED - bit 4: APM_BIOS_DISENGAGED - 3) AC line status - 0x00: Off-line - 0x01: On-line - 0x02: On backup power (BIOS >= 1.1 only) - 0xff: Unknown - 4) Battery status - 0x00: High - 0x01: Low - 0x02: Critical - 0x03: Charging - 0x04: Selected battery not present (BIOS >= 1.2 only) - 0xff: Unknown - 5) Battery flag - bit 0: High - bit 1: Low - bit 2: Critical - bit 3: Charging - bit 7: No system battery - 0xff: Unknown - 6) Remaining battery life (percentage of charge): - 0-100: valid - -1: Unknown - 7) Remaining battery life (time units): - Number of remaining minutes or seconds - -1: Unknown - 8) min = minutes; sec = seconds */ - - seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", - driver_version, - (apm_info.bios.version >> 8) & 0xff, - apm_info.bios.version & 0xff, - apm_info.bios.flags, - ac_line_status, - battery_status, - battery_flag, - percentage, - time_units, - units); - return 0; -} - -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_file_ops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int apm(void *unused) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - char * power_stat; - char * bat_stat; - -#ifdef CONFIG_SMP - /* 2002/08/01 - WT - * This is to avoid random crashes at boot time during initialization - * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. - * Some bioses don't like being called from CPU != 0. - * Method suggested by Ingo Molnar. - */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); -#endif - - if (apm_info.connection_version == 0) { - apm_info.connection_version = apm_info.bios.version; - if (apm_info.connection_version > 0x100) { - /* - * We only support BIOSs up to version 1.2 - */ - if (apm_info.connection_version > 0x0102) - apm_info.connection_version = 0x0102; - error = apm_driver_version(&apm_info.connection_version); - if (error != APM_SUCCESS) { - apm_error("driver version", error); - /* Fall back to an APM 1.0 connection. */ - apm_info.connection_version = 0x100; - } - } - } - - if (debug) - printk(KERN_INFO "apm: Connection version %d.%d\n", - (apm_info.connection_version >> 8) & 0xff, - apm_info.connection_version & 0xff); - -#ifdef CONFIG_APM_DO_ENABLE - if (apm_info.bios.flags & APM_BIOS_DISABLED) { - /* - * This call causes my NEC UltraLite Versa 33/C to hang if it - * is booted with PM disabled but not in the docking station. - * Unfortunate ... - */ - error = apm_enable_power_management(1); - if (error) { - apm_error("enable power management", error); - return -1; - } - } -#endif - - if ((apm_info.bios.flags & APM_BIOS_DISENGAGED) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (error) { - apm_error("engage power management", error); - return -1; - } - } - - if (debug && (num_online_cpus() == 1 || smp )) { - error = apm_get_power_status(&bx, &cx, &dx); - if (error) - printk(KERN_INFO "apm: power status not available\n"); - else { - switch ((bx >> 8) & 0xff) { - case 0: power_stat = "off line"; break; - case 1: power_stat = "on line"; break; - case 2: power_stat = "on backup power"; break; - default: power_stat = "unknown"; break; - } - switch (bx & 0xff) { - case 0: bat_stat = "high"; break; - case 1: bat_stat = "low"; break; - case 2: bat_stat = "critical"; break; - case 3: bat_stat = "charging"; break; - default: bat_stat = "unknown"; break; - } - printk(KERN_INFO - "apm: AC %s, battery status %s, battery life ", - power_stat, bat_stat); - if ((cx & 0xff) == 0xff) - printk("unknown\n"); - else - printk("%d%%\n", cx & 0xff); - if (apm_info.connection_version > 0x100) { - printk(KERN_INFO - "apm: battery flag 0x%02x, battery life ", - (cx >> 8) & 0xff); - if (dx == 0xffff) - printk("unknown\n"); - else - printk("%d %s\n", dx & 0x7fff, - (dx & 0x8000) ? - "minutes" : "seconds"); - } - } - } - - /* Install our power off handler.. */ - if (power_off) - pm_power_off = apm_power_off; - - if (num_online_cpus() == 1 || smp) { -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = apm_console_blank; -#endif - apm_mainloop(); -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = NULL; -#endif - } - - return 0; -} - -#ifndef MODULE -static int __init apm_setup(char *str) -{ - int invert; - - while ((str != NULL) && (*str != '\0')) { - if (strncmp(str, "off", 3) == 0) - apm_disabled = 1; - if (strncmp(str, "on", 2) == 0) - apm_disabled = 0; - if ((strncmp(str, "bounce-interval=", 16) == 0) || - (strncmp(str, "bounce_interval=", 16) == 0)) - bounce_interval = simple_strtol(str + 16, NULL, 0); - if ((strncmp(str, "idle-threshold=", 15) == 0) || - (strncmp(str, "idle_threshold=", 15) == 0)) - idle_threshold = simple_strtol(str + 15, NULL, 0); - if ((strncmp(str, "idle-period=", 12) == 0) || - (strncmp(str, "idle_period=", 12) == 0)) - idle_period = simple_strtol(str + 12, NULL, 0); - invert = (strncmp(str, "no-", 3) == 0) || - (strncmp(str, "no_", 3) == 0); - if (invert) - str += 3; - if (strncmp(str, "debug", 5) == 0) - debug = !invert; - if ((strncmp(str, "power-off", 9) == 0) || - (strncmp(str, "power_off", 9) == 0)) - power_off = !invert; - if (strncmp(str, "smp", 3) == 0) - { - smp = !invert; - idle_threshold = 100; - } - if ((strncmp(str, "allow-ints", 10) == 0) || - (strncmp(str, "allow_ints", 10) == 0)) - apm_info.allow_ints = !invert; - if ((strncmp(str, "broken-psr", 10) == 0) || - (strncmp(str, "broken_psr", 10) == 0)) - apm_info.get_power_status_broken = !invert; - if ((strncmp(str, "realmode-power-off", 18) == 0) || - (strncmp(str, "realmode_power_off", 18) == 0)) - apm_info.realmode_power_off = !invert; - str = strchr(str, ','); - if (str != NULL) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("apm=", apm_setup); -#endif - -static const struct file_operations apm_bios_fops = { - .owner = THIS_MODULE, - .read = do_read, - .poll = do_poll, - .ioctl = do_ioctl, - .open = do_open, - .release = do_release, -}; - -static struct miscdevice apm_device = { - APM_MINOR_DEV, - "apm_bios", - &apm_bios_fops -}; - - -/* Simple "print if true" callback */ -static int __init print_if_true(struct dmi_system_id *d) -{ - printk("%s\n", d->ident); - return 0; -} - -/* - * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was - * disabled before the suspend. Linux used to get terribly confused by that. - */ -static int __init broken_ps2_resume(struct dmi_system_id *d) -{ - printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident); - return 0; -} - -/* Some bioses have a broken protected mode poweroff and need to use realmode */ -static int __init set_realmode_power_off(struct dmi_system_id *d) -{ - if (apm_info.realmode_power_off == 0) { - apm_info.realmode_power_off = 1; - printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident); - } - return 0; -} - -/* Some laptops require interrupts to be enabled during APM calls */ -static int __init set_apm_ints(struct dmi_system_id *d) -{ - if (apm_info.allow_ints == 0) { - apm_info.allow_ints = 1; - printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident); - } - return 0; -} - -/* Some APM bioses corrupt memory or just plain do not work */ -static int __init apm_is_horked(struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); - } - return 0; -} - -static int __init apm_is_horked_d850md(struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); - printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); - printk(KERN_INFO "download from support.intel.com \n"); - } - return 0; -} - -/* Some APM bioses hang on APM idle calls */ -static int __init apm_likes_to_melt(struct dmi_system_id *d) -{ - if (apm_info.forbid_idle == 0) { - apm_info.forbid_idle = 1; - printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident); - } - return 0; -} - -/* - * Check for clue free BIOS implementations who use - * the following QA technique - * - * [ Write BIOS Code ]<------ - * | ^ - * < Does it Compile >----N-- - * |Y ^ - * < Does it Boot Win98 >-N-- - * |Y - * [Ship It] - * - * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e) - * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000) - */ -static int __init broken_apm_power(struct dmi_system_id *d) -{ - apm_info.get_power_status_broken = 1; - printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n"); - return 0; -} - -/* - * This bios swaps the APM minute reporting bytes over (Many sony laptops - * have this problem). - */ -static int __init swab_apm_power_in_minutes(struct dmi_system_id *d) -{ - apm_info.get_power_status_swabinminutes = 1; - printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n"); - return 0; -} - -static struct dmi_system_id __initdata apm_dmi_table[] = { - { - print_if_true, - KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, - }, - { /* Handle problems with APM on the C600 */ - broken_ps2_resume, "Dell Latitude C600", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, - }, - { /* Allow interrupts during suspend on Dell Latitude laptops*/ - set_apm_ints, "Dell Latitude", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), } - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* Allow interrupts during suspend on Dell Inspiron laptops*/ - set_apm_ints, "Dell Inspiron", { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, - }, - { /* Handle problems with APM on Inspiron 5000e */ - broken_apm_power, "Dell Inspiron 5000e", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A04"), - DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, - }, - { /* Handle problems with APM on Inspiron 2500 */ - broken_apm_power, "Dell Inspiron 2500", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A12"), - DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Dimension 4100", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), - DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* Allow interrupts during suspend on Compaq Laptops*/ - set_apm_ints, "Compaq 12XL125", - { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, - }, - { /* Allow interrupts during APM or the clock goes slow */ - set_apm_ints, "ASUSTeK", - { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, - }, - { /* APM blows on shutdown */ - apm_is_horked, "ABIT KX7-333[R]", - { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"), - DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, - }, - { /* APM crashes */ - apm_is_horked, "Trigem Delhi3", - { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"), - DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, - }, - { /* APM crashes */ - apm_is_horked, "Fujitsu-Siemens", - { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"), - DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, - }, - { /* APM crashes */ - apm_is_horked_d850md, "Intel D850MD", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, - }, - { /* APM crashes */ - apm_is_horked, "Intel D810EMO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell XPS-Z", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, - }, - { /* APM crashes */ - apm_is_horked, "Sharp PC-PJ/AX", - { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), - DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), - DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"), - DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "Jabil AMD", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "AMI Bios", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206H"), - DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505VX */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"), - DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-XG29 */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"), - DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"), - DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"), - DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"), - DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"), - DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-F104K */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"), - DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, - }, - - { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"), - DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"), - DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"), - DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, - }, - { /* broken PM poweroff bios */ - set_realmode_power_off, "Award Software v4.60 PGMA", - { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), - DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, - }, - - /* Generic per vendor APM settings */ - - { /* Allow interrupts during suspend on IBM laptops */ - set_apm_ints, "IBM", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, - }, - - { } -}; - -/* - * Just start the APM thread. We do NOT want to do APM BIOS - * calls from anything but the APM thread, if for no other reason - * than the fact that we don't trust the APM BIOS. This way, - * most common APM BIOS problems that lead to protection errors - * etc will have at least some level of being contained... - * - * In short, if something bad happens, at least we have a choice - * of just killing the apm thread.. - */ -static int __init apm_init(void) -{ - struct proc_dir_entry *apm_proc; - struct desc_struct *gdt; - int err; - - dmi_check_system(apm_dmi_table); - - if (apm_info.bios.version == 0 || paravirt_enabled()) { - printk(KERN_INFO "apm: BIOS not found.\n"); - return -ENODEV; - } - printk(KERN_INFO - "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", - ((apm_info.bios.version >> 8) & 0xff), - (apm_info.bios.version & 0xff), - apm_info.bios.flags, - driver_version); - if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { - printk(KERN_INFO "apm: no 32 bit BIOS support\n"); - return -ENODEV; - } - - if (allow_ints) - apm_info.allow_ints = 1; - if (broken_psr) - apm_info.get_power_status_broken = 1; - if (realmode_power_off) - apm_info.realmode_power_off = 1; - /* User can override, but default is to trust DMI */ - if (apm_disabled != -1) - apm_info.disabled = apm_disabled; - - /* - * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1 - * but is reportedly a 1.0 BIOS. - */ - if (apm_info.bios.version == 0x001) - apm_info.bios.version = 0x100; - - /* BIOS < 1.2 doesn't set cseg_16_len */ - if (apm_info.bios.version < 0x102) - apm_info.bios.cseg_16_len = 0; /* 64k */ - - if (debug) { - printk(KERN_INFO "apm: entry %x:%x cseg16 %x dseg %x", - apm_info.bios.cseg, apm_info.bios.offset, - apm_info.bios.cseg_16, apm_info.bios.dseg); - if (apm_info.bios.version > 0x100) - printk(" cseg len %x, dseg len %x", - apm_info.bios.cseg_len, - apm_info.bios.dseg_len); - if (apm_info.bios.version > 0x101) - printk(" cseg16 len %x", apm_info.bios.cseg_16_len); - printk("\n"); - } - - if (apm_info.disabled) { - printk(KERN_NOTICE "apm: disabled on user request.\n"); - return -ENODEV; - } - if ((num_online_cpus() > 1) && !power_off && !smp) { - printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); - apm_info.disabled = 1; - return -ENODEV; - } - if (PM_IS_ACTIVE()) { - printk(KERN_NOTICE "apm: overridden by ACPI.\n"); - apm_info.disabled = 1; - return -ENODEV; - } -#ifdef CONFIG_PM_LEGACY - pm_active = 1; -#endif - - /* - * Set up a segment that references the real mode segment 0x40 - * that extends up to the end of page zero (that we have reserved). - * This is for buggy BIOS's that refer to (real mode) segment 0x40 - * even though they are called in protected mode. - */ - set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); - _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); - - /* - * Set up the long jump entry point to the APM BIOS, which is called - * from inline assembly. - */ - apm_bios_entry.offset = apm_info.bios.offset; - apm_bios_entry.segment = APM_CS; - - /* - * The APM 1.1 BIOS is supposed to provide limit information that it - * recognizes. Many machines do this correctly, but many others do - * not restrict themselves to their claimed limit. When this happens, - * they will cause a segmentation violation in the kernel at boot time. - * Most BIOS's, however, will respect a 64k limit, so we use that. - * - * Note we only set APM segments on CPU zero, since we pin the APM - * code to that CPU. - */ - gdt = get_cpu_gdt_table(0); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); - - apm_proc = create_proc_entry("apm", 0, NULL); - if (apm_proc) - apm_proc->proc_fops = &apm_file_ops; - - kapmd_task = kthread_create(apm, NULL, "kapmd"); - if (IS_ERR(kapmd_task)) { - printk(KERN_ERR "apm: disabled - Unable to start kernel " - "thread.\n"); - err = PTR_ERR(kapmd_task); - kapmd_task = NULL; - remove_proc_entry("apm", NULL); - return err; - } - wake_up_process(kapmd_task); - - if (num_online_cpus() > 1 && !smp ) { - printk(KERN_NOTICE - "apm: disabled - APM is not SMP safe (power off active).\n"); - return 0; - } - - /* - * Note we don't actually care if the misc_device cannot be registered. - * this driver can do its job without it, even if userspace can't - * control it. just log the error - */ - if (misc_register(&apm_device)) - printk(KERN_WARNING "apm: Could not register misc device.\n"); - - if (HZ != 100) - idle_period = (idle_period * HZ) / 100; - if (idle_threshold < 100) { - original_pm_idle = pm_idle; - pm_idle = apm_cpu_idle; - set_pm_idle = 1; - } - - return 0; -} - -static void __exit apm_exit(void) -{ - int error; - - if (set_pm_idle) { - pm_idle = original_pm_idle; - /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - cpu_idle_wait(); - } - if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 0); - if (error) - apm_error("disengage power management", error); - } - misc_deregister(&apm_device); - remove_proc_entry("apm", NULL); - if (power_off) - pm_power_off = NULL; - if (kapmd_task) { - kthread_stop(kapmd_task); - kapmd_task = NULL; - } -#ifdef CONFIG_PM_LEGACY - pm_active = 0; -#endif -} - -module_init(apm_init); -module_exit(apm_exit); - -MODULE_AUTHOR("Stephen Rothwell"); -MODULE_DESCRIPTION("Advanced Power Management"); -MODULE_LICENSE("GPL"); -module_param(debug, bool, 0644); -MODULE_PARM_DESC(debug, "Enable debug mode"); -module_param(power_off, bool, 0444); -MODULE_PARM_DESC(power_off, "Enable power off"); -module_param(bounce_interval, int, 0444); -MODULE_PARM_DESC(bounce_interval, - "Set the number of ticks to ignore suspend bounces"); -module_param(allow_ints, bool, 0444); -MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls"); -module_param(broken_psr, bool, 0444); -MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call"); -module_param(realmode_power_off, bool, 0444); -MODULE_PARM_DESC(realmode_power_off, - "Switch to real mode before powering off"); -module_param(idle_threshold, int, 0444); -MODULE_PARM_DESC(idle_threshold, - "System idle percentage above which to make APM BIOS idle calls"); -module_param(idle_period, int, 0444); -MODULE_PARM_DESC(idle_period, - "Period (in sec/100) over which to caculate the idle percentage"); -module_param(smp, bool, 0444); -MODULE_PARM_DESC(smp, - "Set this to enable APM use on an SMP platform. Use with caution on older systems"); -MODULE_ALIAS_MISCDEV(APM_MINOR_DEV); diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c deleted file mode 100644 index cfa82c899f4..00000000000 --- a/arch/i386/kernel/asm-offsets.c +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef CONFIG_X86_32 -# include "asm-offsets_32.c" -#else -# include "asm-offsets_64.c" -#endif diff --git a/arch/i386/kernel/asm-offsets_32.c b/arch/i386/kernel/asm-offsets_32.c deleted file mode 100644 index 8029742c0fc..00000000000 --- a/arch/i386/kernel/asm-offsets_32.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - */ - -#include -#include -#include -#include -#include -#include -#include "sigframe_32.h" -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_LGUEST_GUEST -#include -#include "../../../drivers/lguest/lg.h" -#endif - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - -/* workaround for a warning with -Wmissing-prototypes */ -void foo(void); - -void foo(void) -{ - OFFSET(SIGCONTEXT_eax, sigcontext, eax); - OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); - OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); - OFFSET(SIGCONTEXT_edx, sigcontext, edx); - OFFSET(SIGCONTEXT_esi, sigcontext, esi); - OFFSET(SIGCONTEXT_edi, sigcontext, edi); - OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); - OFFSET(SIGCONTEXT_esp, sigcontext, esp); - OFFSET(SIGCONTEXT_eip, sigcontext, eip); - BLANK(); - - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); - OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); - BLANK(); - - OFFSET(TI_task, thread_info, task); - OFFSET(TI_exec_domain, thread_info, exec_domain); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); - OFFSET(TI_preempt_count, thread_info, preempt_count); - OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_restart_block, thread_info, restart_block); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - - OFFSET(GDS_size, Xgt_desc_struct, size); - OFFSET(GDS_address, Xgt_desc_struct, address); - OFFSET(GDS_pad, Xgt_desc_struct, pad); - BLANK(); - - OFFSET(PT_EBX, pt_regs, ebx); - OFFSET(PT_ECX, pt_regs, ecx); - OFFSET(PT_EDX, pt_regs, edx); - OFFSET(PT_ESI, pt_regs, esi); - OFFSET(PT_EDI, pt_regs, edi); - OFFSET(PT_EBP, pt_regs, ebp); - OFFSET(PT_EAX, pt_regs, eax); - OFFSET(PT_DS, pt_regs, xds); - OFFSET(PT_ES, pt_regs, xes); - OFFSET(PT_FS, pt_regs, xfs); - OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); - OFFSET(PT_EIP, pt_regs, eip); - OFFSET(PT_CS, pt_regs, xcs); - OFFSET(PT_EFLAGS, pt_regs, eflags); - OFFSET(PT_OLDESP, pt_regs, esp); - OFFSET(PT_OLDSS, pt_regs, xss); - BLANK(); - - OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); - OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - - OFFSET(pbe_address, pbe, address); - OFFSET(pbe_orig_address, pbe, orig_address); - OFFSET(pbe_next, pbe, next); - - /* Offset from the sysenter stack to tss.esp0 */ - DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - - sizeof(struct tss_struct)); - - DEFINE(PAGE_SIZE_asm, PAGE_SIZE); - DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); - DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); - DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); - DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); - - DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); - - OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); - -#ifdef CONFIG_PARAVIRT - BLANK(); - OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); - OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); - OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); - OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); - OFFSET(PARAVIRT_iret, paravirt_ops, iret); - OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); -#endif - -#ifdef CONFIG_XEN - BLANK(); - OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); - OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); -#endif - -#ifdef CONFIG_LGUEST_GUEST - BLANK(); - OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); - OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); - OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); - OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); - OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp); - OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc); - OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc); - OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt); - OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum); - OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); - OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); -#endif -} diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c deleted file mode 100644 index 0b9860530a6..00000000000 --- a/arch/i386/kernel/bootflag.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Implement 'Simple Boot Flag Specification 2.0' - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - - -#define SBF_RESERVED (0x78) -#define SBF_PNPOS (1<<0) -#define SBF_BOOTING (1<<1) -#define SBF_DIAG (1<<2) -#define SBF_PARITY (1<<7) - - -int sbf_port __initdata = -1; /* set via acpi_boot_init() */ - - -static int __init parity(u8 v) -{ - int x = 0; - int i; - - for(i=0;i<8;i++) - { - x^=(v&1); - v>>=1; - } - return x; -} - -static void __init sbf_write(u8 v) -{ - unsigned long flags; - if(sbf_port != -1) - { - v &= ~SBF_PARITY; - if(!parity(v)) - v|=SBF_PARITY; - - printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v); - - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(v, sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - } -} - -static u8 __init sbf_read(void) -{ - u8 v; - unsigned long flags; - if(sbf_port == -1) - return 0; - spin_lock_irqsave(&rtc_lock, flags); - v = CMOS_READ(sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - return v; -} - -static int __init sbf_value_valid(u8 v) -{ - if(v&SBF_RESERVED) /* Reserved bits */ - return 0; - if(!parity(v)) - return 0; - return 1; -} - -static int __init sbf_init(void) -{ - u8 v; - if(sbf_port == -1) - return 0; - v = sbf_read(); - if(!sbf_value_valid(v)) - printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v); - - v &= ~SBF_RESERVED; - v &= ~SBF_BOOTING; - v &= ~SBF_DIAG; -#if defined(CONFIG_ISAPNP) - v |= SBF_PNPOS; -#endif - sbf_write(v); - return 0; -} - -module_init(sbf_init); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c deleted file mode 100644 index 5c2faa10e9f..00000000000 --- a/arch/i386/kernel/cpuid.c +++ /dev/null @@ -1,242 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * cpuid.c - * - * x86 CPUID access device - * - * This device is accessed by lseek() to the appropriate CPUID level - * and then read in chunks of 16 bytes. A larger size means multiple - * reads of consecutive levels. - * - * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static struct class *cpuid_class; - -#ifdef CONFIG_SMP - -struct cpuid_command { - u32 reg; - u32 *data; -}; - -static void cpuid_smp_cpuid(void *cmd_block) -{ - struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; - - cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], - &cmd->data[3]); -} - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - struct cpuid_command cmd; - - preempt_disable(); - if (cpu == smp_processor_id()) { - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); - } else { - cmd.reg = reg; - cmd.data = data; - - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); - } - preempt_enable(); -} -#else /* ! CONFIG_SMP */ - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); -} - -#endif /* ! CONFIG_SMP */ - -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - - lock_kernel(); - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - - unlock_kernel(); - return ret; -} - -static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t * ppos) -{ - char __user *tmp = buf; - u32 data[4]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - - if (count % 16) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 16) { - do_cpuid(cpu, reg, data); - if (copy_to_user(tmp, &data, 16)) - return -EFAULT; - tmp += 16; - *ppos = reg++; - } - - return tmp - buf; -} - -static int cpuid_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (c->cpuid_level < 0) - return -EIO; /* CPUID not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations cpuid_fops = { - .owner = THIS_MODULE, - .llseek = cpuid_seek, - .read = cpuid_read, - .open = cpuid_open, -}; - -static int cpuid_device_create(int i) -{ - int err = 0; - struct device *dev; - - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; -} - -static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuid_device_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = -{ - .notifier_call = cpuid_class_cpu_callback, -}; - -static int __init cpuid_init(void) -{ - int i, err = 0; - i = 0; - - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { - printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", - CPUID_MAJOR); - err = -EBUSY; - goto out; - } - cpuid_class = class_create(THIS_MODULE, "cpuid"); - if (IS_ERR(cpuid_class)) { - err = PTR_ERR(cpuid_class); - goto out_chrdev; - } - for_each_online_cpu(i) { - err = cpuid_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&cpuid_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); - } - class_destroy(cpuid_class); -out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); -out: - return err; -} - -static void __exit cpuid_exit(void) -{ - int cpu = 0; - - for_each_online_cpu(cpu) - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); - class_destroy(cpuid_class); - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); - unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); -} - -module_init(cpuid_init); -module_exit(cpuid_exit); - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic CPUID driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/i386/kernel/crash_32.c b/arch/i386/kernel/crash_32.c deleted file mode 100644 index 53589d1b1a0..00000000000 --- a/arch/i386/kernel/crash_32.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Architecture specific (i386) functions for kexec based crash dumps. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * - * Copyright (C) IBM Corporation, 2004. All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - - -/* This keeps a track of which one is crashing cpu. */ -static int crashing_cpu; - -#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) -{ - struct pt_regs *regs; - struct pt_regs fixed_regs; - int cpu; - - if (val != DIE_NMI_IPI) - return NOTIFY_OK; - - regs = ((struct die_args *)data)->regs; - cpu = raw_smp_processor_id(); - - /* Don't do anything if this handler is invoked on crashing cpu. - * Otherwise, system will completely hang. Crashing cpu can get - * an NMI if system was initially booted with nmi_watchdog parameter. - */ - if (cpu == crashing_cpu) - return NOTIFY_STOP; - local_irq_disable(); - - if (!user_mode_vm(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } - crash_save_cpu(regs, cpu); - disable_local_APIC(); - atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - halt(); - for (;;) - cpu_relax(); - - return 1; -} - -static void smp_send_nmi_allbutself(void) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); -} - -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, -}; - -static void nmi_shootdown_cpus(void) -{ - unsigned long msecs; - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) - return; /* return what? */ - /* Ensure the new callback function is set before sending - * out the NMI - */ - wmb(); - - smp_send_nmi_allbutself(); - - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - - /* Leave the nmi callback set */ - disable_local_APIC(); -} -#else -static void nmi_shootdown_cpus(void) -{ - /* There are no cpus to shootdown */ -} -#endif - -void machine_crash_shutdown(struct pt_regs *regs) -{ - /* This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means shooting down the other cpus in - * an SMP system. - */ - /* The kernel is broken so disable interrupts */ - local_irq_disable(); - - /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = safe_smp_processor_id(); - nmi_shootdown_cpus(); - lapic_shutdown(); -#if defined(CONFIG_X86_IO_APIC) - disable_IO_APIC(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); -} diff --git a/arch/i386/kernel/crash_dump_32.c b/arch/i386/kernel/crash_dump_32.c deleted file mode 100644 index 3f532df488b..00000000000 --- a/arch/i386/kernel/crash_dump_32.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * kernel/crash_dump.c - Memory preserving reboot related code. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * Copyright (C) IBM Corporation, 2004. All rights reserved - */ - -#include -#include -#include - -#include - -static void *kdump_buf_page; - -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - void *vaddr; - - if (!csize) - return 0; - - vaddr = kmap_atomic_pfn(pfn, KM_PTE0); - - if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr, KM_PTE0); - } else { - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Kdump buffer page not" - " allocated\n"); - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr, KM_PTE0); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; - } - - return csize; -} - -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" - " page\n"); - ret = -ENOMEM; - } - - return ret; -} -arch_initcall(kdump_buf_page_init); diff --git a/arch/i386/kernel/doublefault_32.c b/arch/i386/kernel/doublefault_32.c deleted file mode 100644 index 40978af630e..00000000000 --- a/arch/i386/kernel/doublefault_32.c +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define DOUBLEFAULT_STACKSIZE (1024) -static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - -#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) - -static void doublefault_fn(void) -{ - struct Xgt_desc_struct gdt_desc = {0, 0}; - unsigned long gdt, tss; - - store_gdt(&gdt_desc); - gdt = gdt_desc.address; - - printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); - - if (ptr_ok(gdt)) { - gdt += GDT_ENTRY_TSS << 3; - tss = *(u16 *)(gdt+2); - tss += *(u8 *)(gdt+4) << 16; - tss += *(u8 *)(gdt+7) << 24; - printk(KERN_EMERG "double fault, tss at %08lx\n", tss); - - if (ptr_ok(tss)) { - struct i386_hw_tss *t = (struct i386_hw_tss *)tss; - - printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp); - - printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->eax, t->ebx, t->ecx, t->edx); - printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", - t->esi, t->edi); - } - } - - for (;;) - cpu_relax(); -} - -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .esp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .eip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .eflags = X86_EFLAGS_SF | 0x2, - .esp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa(swapper_pg_dir) - } -}; diff --git a/arch/i386/kernel/e820_32.c b/arch/i386/kernel/e820_32.c deleted file mode 100644 index 3c86b979a40..00000000000 --- a/arch/i386/kernel/e820_32.c +++ /dev/null @@ -1,944 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_EFI -int efi_enabled = 0; -EXPORT_SYMBOL(efi_enabled); -#endif - -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif -extern int user_defined_memmap; -struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -#define ROMSIGNATURE 0xaa55 - -static int __init romsignature(const unsigned char *rom) -{ - const unsigned short * const ptr = (const unsigned short *)rom; - unsigned short sig; - - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; -} - -static int __init romchecksum(const unsigned char *rom, unsigned long length) -{ - unsigned char sum, c; - - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) - sum += c; - return !length && !sum; -} - -static void __init probe_roms(void) -{ - const unsigned char *rom; - unsigned long start, length, upper; - unsigned char c; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); - else - legacy_init_iomem_resources(&code_resource, &data_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - if (!efi_enabled) { - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; - } -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; itype > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - unsigned long long start = biosmap->addr; - unsigned long long size = biosmap->size; - unsigned long long end = start + size; - unsigned long type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } - add_memory_region(start, size, type); - } while (biosmap++,--nr_map); - return 0; -} - -/* - * Callback for efi_memory_walk. - */ -static int __init -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfn = arg, pfn; - - if (start < end) { - pfn = PFN_UP(end -1); - if (pfn > *max_pfn) - *max_pfn = pfn; - } - return 0; -} - -static int __init -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) -{ - memory_present(0, PFN_UP(start), PFN_DOWN(end)); - return 0; -} - -/* - * Find the highest page frame number we have available - */ -void __init find_max_pfn(void) -{ - int i; - - max_pfn = 0; - if (efi_enabled) { - efi_memmap_walk(efi_find_max_pfn, &max_pfn); - efi_memmap_walk(efi_memory_present_wrapper, NULL); - return; - } - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - -/* - * Free all available memory for boot time allocation. Used - * as a callback function by efi_memory_walk() - */ - -static int __init -free_available_memory(unsigned long start, unsigned long end, void *arg) -{ - /* check max_low_pfn */ - if (start >= (max_low_pfn << PAGE_SHIFT)) - return 0; - if (end >= (max_low_pfn << PAGE_SHIFT)) - end = max_low_pfn << PAGE_SHIFT; - if (start < end) - free_bootmem(start, end - start); - - return 0; -} -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - if (efi_enabled) { - efi_memmap_walk(free_available_memory, NULL); - return; - } - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the bigest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - -static __init __always_inline void efi_limit_regions(unsigned long long size) -{ - unsigned long long current_addr = 0; - efi_memory_desc_t *md, *next_md; - void *p, *p1; - int i, j; - - j = 0; - p1 = memmap.map; - for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { - md = p; - next_md = p1; - current_addr = md->phys_addr + - PFN_PHYS(md->num_pages); - if (is_available_memory(md)) { - if (md->phys_addr >= size) continue; - memcpy(next_md, md, memmap.desc_size); - if (current_addr >= size) { - next_md->num_pages -= - PFN_UP(current_addr-size); - } - p1 += memmap.desc_size; - next_md = p1; - j++; - } else if ((md->attribute & EFI_MEMORY_RUNTIME) == - EFI_MEMORY_RUNTIME) { - /* In order to make runtime services - * available we have to include runtime - * memory regions in memory map */ - memcpy(next_md, md, memmap.desc_size); - p1 += memmap.desc_size; - next_md = p1; - j++; - } - } - memmap.nr_map = j; - memmap.map_end = memmap.map + - (memmap.nr_map * memmap.desc_size); -} - -void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr; - int i; - - print_memory_map("limit_regions start"); - if (efi_enabled) { - efi_limit_regions(size); - return; - } - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - print_memory_map("limit_regions endfor"); - return; - } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c deleted file mode 100644 index 92f812ba275..00000000000 --- a/arch/i386/kernel/early_printk.c +++ /dev/null @@ -1,2 +0,0 @@ - -#include "../../x86_64/kernel/early_printk.c" diff --git a/arch/i386/kernel/efi_32.c b/arch/i386/kernel/efi_32.c deleted file mode 100644 index 2452c6fbe99..00000000000 --- a/arch/i386/kernel/efi_32.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * Extensible Firmware Interface - * - * Based on Extensible Firmware Interface Specification version 1.0 - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2002 Hewlett-Packard Co. - * David Mosberger-Tang - * Stephane Eranian - * - * All EFI Runtime Services are not implemented yet as EFI only - * supports physical mode addressing on SoftSDV. This is to be fixed - * in a future version. --drummond 1999-07-20 - * - * Implemented EFI runtime services and virtual mode calls. --davidm - * - * Goutham Rao: - * Skip non-WB memory and ignore empty memory ranges. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#define EFI_DEBUG 0 -#define PFX "EFI: " - -extern efi_status_t asmlinkage efi_call_phys(void *, ...); - -struct efi efi; -EXPORT_SYMBOL(efi); -static struct efi efi_phys; -struct efi_memory_map memmap; - -/* - * We require an early boot_ioremap mapping mechanism initially - */ -extern void * boot_ioremap(unsigned long, unsigned long); - -/* - * To make EFI call EFI runtime service in physical addressing mode we need - * prelog/epilog before/after the invocation to disable interrupt, to - * claim EFI runtime service handler exclusively and to duplicate a memory in - * low memory space say 0 - 3G. - */ - -static unsigned long efi_rt_eflags; -static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; - -static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) -{ - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct gdt_descr; - - spin_lock(&efi_rt_lock); - local_irq_save(efi_rt_eflags); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - local_flush_tlb(); - - gdt_descr.address = __pa(get_cpu_gdt_table(0)); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -} - -static void efi_call_phys_epilog(void) __releases(efi_rt_lock) -{ - unsigned long cr4; - struct Xgt_desc_struct gdt_descr; - - gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - local_flush_tlb(); - - local_irq_restore(efi_rt_eflags); - spin_unlock(&efi_rt_lock); -} - -static efi_status_t -phys_efi_set_virtual_address_map(unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - efi_call_phys_epilog(); - return status; -} - -static efi_status_t -phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys(efi_phys.get_time, tm, tc); - efi_call_phys_epilog(); - return status; -} - -inline int efi_set_rtc_mmss(unsigned long nowtime) -{ - int real_seconds, real_minutes; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - spin_lock(&efi_rt_lock); - status = efi.get_time(&eft, &cap); - spin_unlock(&efi_rt_lock); - if (status != EFI_SUCCESS) - panic("Ooops, efitime: can't read time!\n"); - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - - if (((abs(real_minutes - eft.minute) + 15)/30) & 1) - real_minutes += 30; - real_minutes %= 60; - - eft.minute = real_minutes; - eft.second = real_seconds; - - if (status != EFI_SUCCESS) { - printk("Ooops: efitime: can't read time!\n"); - return -1; - } - return 0; -} -/* - * This is used during kernel init before runtime - * services have been remapped and also during suspend, therefore, - * we'll need to call both in physical and virtual modes. - */ -inline unsigned long efi_get_time(void) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - if (efi.get_time) { - /* if we are in virtual mode use remapped function */ - status = efi.get_time(&eft, &cap); - } else { - /* we are in physical mode */ - status = phys_efi_get_time(&eft, &cap); - } - - if (status != EFI_SUCCESS) - printk("Oops: efitime: can't read time status: 0x%lx\n",status); - - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); -} - -int is_available_memory(efi_memory_desc_t * md) -{ - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; - - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - return 1; - } - return 0; -} - -/* - * We need to map the EFI memory map again after paging_init(). - */ -void __init efi_map_memmap(void) -{ - memmap.map = NULL; - - memmap.map = bt_ioremap((unsigned long) memmap.phys_map, - (memmap.nr_map * memmap.desc_size)); - if (memmap.map == NULL) - printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); - - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); -} - -#if EFI_DEBUG -static void __init print_efi_memmap(void) -{ - efi_memory_desc_t *md; - void *p; - int i; - - for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { - md = p; - printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", - i, md->type, md->attribute, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - (md->num_pages >> (20 - EFI_PAGE_SHIFT))); - } -} -#endif /* EFI_DEBUG */ - -/* - * Walks the EFI memory map and calls CALLBACK once for each EFI - * memory descriptor that has memory that is available for kernel use. - */ -void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) -{ - int prev_valid = 0; - struct range { - unsigned long start; - unsigned long end; - } uninitialized_var(prev), curr; - efi_memory_desc_t *md; - unsigned long start, end; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if ((md->num_pages == 0) || (!is_available_memory(md))) - continue; - - curr.start = md->phys_addr; - curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); - - if (!prev_valid) { - prev = curr; - prev_valid = 1; - } else { - if (curr.start < prev.start) - printk(KERN_INFO PFX "Unordered memory map\n"); - if (prev.end == curr.start) - prev.end = curr.end; - else { - start = - (unsigned long) (PAGE_ALIGN(prev.start)); - end = (unsigned long) (prev.end & PAGE_MASK); - if ((end > start) - && (*callback) (start, end, arg) < 0) - return; - prev = curr; - } - } - } - if (prev_valid) { - start = (unsigned long) PAGE_ALIGN(prev.start); - end = (unsigned long) (prev.end & PAGE_MASK); - if (end > start) - (*callback) (start, end, arg); - } -} - -void __init efi_init(void) -{ - efi_config_table_t *config_tables; - efi_runtime_services_t *runtime; - efi_char16_t *c16; - char vendor[100] = "unknown"; - unsigned long num_config_tables; - int i = 0; - - memset(&efi, 0, sizeof(efi) ); - memset(&efi_phys, 0, sizeof(efi_phys)); - - efi_phys.systab = EFI_SYSTAB; - memmap.phys_map = EFI_MEMMAP; - memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; - memmap.desc_version = EFI_MEMDESC_VERSION; - memmap.desc_size = EFI_MEMDESC_SIZE; - - efi.systab = (efi_system_table_t *) - boot_ioremap((unsigned long) efi_phys.systab, - sizeof(efi_system_table_t)); - /* - * Verify the EFI Table - */ - if (efi.systab == NULL) - printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); - if ((efi.systab->hdr.revision >> 16) == 0) - printk(KERN_ERR PFX "Warning: EFI system table version " - "%d.%02d, expected 1.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* - * Grab some details from the system table - */ - num_config_tables = efi.systab->nr_tables; - config_tables = (efi_config_table_t *)efi.systab->tables; - runtime = efi.systab->runtime; - - /* - * Show what we know for posterity - */ - c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); - if (c16) { - for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) - vendor[i] = *c16++; - vendor[i] = '\0'; - } else - printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); - - printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = (efi_config_table_t *) - boot_ioremap((unsigned long) config_tables, - num_config_tables * sizeof(efi_config_table_t)); - - if (config_tables == NULL) - printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); - - efi.mps = EFI_INVALID_TABLE_ADDR; - efi.acpi = EFI_INVALID_TABLE_ADDR; - efi.acpi20 = EFI_INVALID_TABLE_ADDR; - efi.smbios = EFI_INVALID_TABLE_ADDR; - efi.sal_systab = EFI_INVALID_TABLE_ADDR; - efi.boot_info = EFI_INVALID_TABLE_ADDR; - efi.hcdp = EFI_INVALID_TABLE_ADDR; - efi.uga = EFI_INVALID_TABLE_ADDR; - - for (i = 0; i < num_config_tables; i++) { - if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = config_tables[i].table; - printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = config_tables[i].table; - printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = config_tables[i].table; - printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = config_tables[i].table; - printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = config_tables[i].table; - printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { - efi.uga = config_tables[i].table; - printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); - } - } - printk("\n"); - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - */ - - runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) - runtime, - sizeof(efi_runtime_services_t)); - if (runtime != NULL) { - /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map - * is invoked. - */ - efi_phys.get_time = (efi_get_time_t *) runtime->get_time; - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; - } else - printk(KERN_ERR PFX "Could not map the runtime service table!\n"); - - /* Map the EFI memory map for use until paging_init() */ - memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); - if (memmap.map == NULL) - printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); - - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); - -#if EFI_DEBUG - print_efi_memmap(); -#endif -} - -static inline void __init check_range_for_systab(efi_memory_desc_t *md) -{ - if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < md->phys_addr + - ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { - unsigned long addr; - - addr = md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab; - efi.systab = (efi_system_table_t *)addr; - } -} - -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ - -#define efi_call_virt(f, args...) \ - ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) - -static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - return efi_call_virt(get_time, tm, tc); -} - -static efi_status_t virt_efi_set_time (efi_time_t *tm) -{ - return efi_call_virt(set_time, tm); -} - -static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) -{ - return efi_call_virt(get_wakeup_time, enabled, pending, tm); -} - -static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled, - efi_time_t *tm) -{ - return efi_call_virt(set_wakeup_time, enabled, tm); -} - -static efi_status_t virt_efi_get_variable (efi_char16_t *name, - efi_guid_t *vendor, u32 *attr, - unsigned long *data_size, void *data) -{ - return efi_call_virt(get_variable, name, vendor, attr, data_size, data); -} - -static efi_status_t virt_efi_get_next_variable (unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) -{ - return efi_call_virt(get_next_variable, name_size, name, vendor); -} - -static efi_status_t virt_efi_set_variable (efi_char16_t *name, - efi_guid_t *vendor, - unsigned long attr, - unsigned long data_size, void *data) -{ - return efi_call_virt(set_variable, name, vendor, attr, data_size, data); -} - -static efi_status_t virt_efi_get_next_high_mono_count (u32 *count) -{ - return efi_call_virt(get_next_high_mono_count, count); -} - -static void virt_efi_reset_system (int reset_type, efi_status_t status, - unsigned long data_size, - efi_char16_t *data) -{ - efi_call_virt(reset_system, reset_type, status, data_size, data); -} - -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ - -void __init efi_enter_virtual_mode(void) -{ - efi_memory_desc_t *md; - efi_status_t status; - void *p; - - efi.systab = NULL; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - - md->virt_addr = (unsigned long)ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!(unsigned long)md->virt_addr) { - printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", - (unsigned long)md->phys_addr); - } - /* update the virtual address of the EFI system table */ - check_range_for_systab(md); - } - - BUG_ON(!efi.systab); - - status = phys_efi_set_virtual_address_map( - memmap.desc_size * memmap.nr_map, - memmap.desc_size, - memmap.desc_version, - memmap.phys_map); - - if (status != EFI_SUCCESS) { - printk (KERN_ALERT "You are screwed! " - "Unable to switch EFI into virtual mode " - "(status=%lx)\n", status); - panic("EFI call to SetVirtualAddressMap() failed!"); - } - - /* - * Now that EFI is in virtual mode, update the function - * pointers in the runtime service table to the new virtual addresses. - */ - - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; -} - -void __init -efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource) -{ - struct resource *res; - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > - 0x100000000ULL) - continue; - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (md->type) { - case EFI_RESERVED_TYPE: - res->name = "Reserved Memory"; - break; - case EFI_LOADER_CODE: - res->name = "Loader Code"; - break; - case EFI_LOADER_DATA: - res->name = "Loader Data"; - break; - case EFI_BOOT_SERVICES_DATA: - res->name = "BootServices Data"; - break; - case EFI_BOOT_SERVICES_CODE: - res->name = "BootServices Code"; - break; - case EFI_RUNTIME_SERVICES_CODE: - res->name = "Runtime Service Code"; - break; - case EFI_RUNTIME_SERVICES_DATA: - res->name = "Runtime Service Data"; - break; - case EFI_CONVENTIONAL_MEMORY: - res->name = "Conventional Memory"; - break; - case EFI_UNUSABLE_MEMORY: - res->name = "Unusable Memory"; - break; - case EFI_ACPI_RECLAIM_MEMORY: - res->name = "ACPI Reclaim"; - break; - case EFI_ACPI_MEMORY_NVS: - res->name = "ACPI NVS"; - break; - case EFI_MEMORY_MAPPED_IO: - res->name = "Memory Mapped IO"; - break; - case EFI_MEMORY_MAPPED_IO_PORT_SPACE: - res->name = "Memory Mapped IO Port Space"; - break; - default: - res->name = "Reserved"; - break; - } - res->start = md->phys_addr; - res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res) < 0) - printk(KERN_ERR PFX "Failed to allocate res %s : " - "0x%llx-0x%llx\n", res->name, - (unsigned long long)res->start, - (unsigned long long)res->end); - /* - * We don't know which region contains kernel data so we try - * it repeatedly and let the resource manager test it. - */ - if (md->type == EFI_CONVENTIONAL_MEMORY) { - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Convenience functions to obtain memory types and attributes - */ - -u32 efi_mem_type(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && (phys_addr < - (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) - return md->type; - } - return 0; -} - -u64 efi_mem_attributes(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && (phys_addr < - (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) - return md->attribute; - } - return 0; -} diff --git a/arch/i386/kernel/efi_stub_32.S b/arch/i386/kernel/efi_stub_32.S deleted file mode 100644 index ef00bb77d7e..00000000000 --- a/arch/i386/kernel/efi_stub_32.S +++ /dev/null @@ -1,122 +0,0 @@ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. - */ - -#include -#include - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -ENTRY(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Now I am running with EIP = + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prelog and - * epilog. - */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx -1: - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ - movl %cr0, %edx - andl $0x7fffffff, %edx - movl %edx, %cr0 - jmp 1f -1: - - /* - * 4. Adjust stack pointer. - */ - subl $__PAGE_OFFSET, %esp - - /* - * 5. Call the physical function. - */ - jmp *%ecx - -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp - - /* - * 7. Restore PG bit - */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/i386/kernel/entry_32.S b/arch/i386/kernel/entry_32.S deleted file mode 100644 index 290b7bc82da..00000000000 --- a/arch/i386/kernel/entry_32.S +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'syscall_exit': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "irq_vectors.h" - -/* - * We use macros for low-level operations which need to be overridden - * for paravirtualization. The following will never clobber any registers: - * INTERRUPT_RETURN (aka. "iret") - * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). - * - * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must - * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). - * Allowing a register to be clobbered can shrink the paravirt replacement - * enough to patch inline, increasing performance. - */ - -#define nr_syscalls ((syscall_table_size)/4) - -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 -VM_MASK = 0x00020000 - -#ifdef CONFIG_PREEMPT -#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF -#else -#define preempt_stop(clobbers) -#define resume_kernel restore_nocheck -#endif - -.macro TRACE_IRQS_IRET -#ifdef CONFIG_TRACE_IRQFLAGS - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? - jz 1f - TRACE_IRQS_ON -1: -#endif -.endm - -#ifdef CONFIG_VM86 -#define resume_userspace_sig check_userspace -#else -#define resume_userspace_sig resume_userspace -#endif - -#define SAVE_ALL \ - cld; \ - pushl %fs; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET fs, 0;*/\ - pushl %es; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET es, 0;*/\ - pushl %ds; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET ds, 0;*/\ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET eax, 0;\ - pushl %ebp; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebp, 0;\ - pushl %edi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edi, 0;\ - pushl %esi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET esi, 0;\ - pushl %edx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edx, 0;\ - pushl %ecx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ecx, 0;\ - pushl %ebx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $(__KERNEL_PERCPU), %edx; \ - movl %edx, %fs - -#define RESTORE_INT_REGS \ - popl %ebx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebx;\ - popl %ecx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ecx;\ - popl %edx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edx;\ - popl %esi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE esi;\ - popl %edi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edi;\ - popl %ebp; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebp;\ - popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE eax - -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE ds;*/\ -2: popl %es; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE es;*/\ -3: popl %fs; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE fs;*/\ -.pushsection .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: movl $0,(%esp); \ - jmp 3b; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ -.popsection - -#define RING0_INT_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 3*4;\ - /*CFI_OFFSET cs, -2*4;*/\ - CFI_OFFSET eip, -3*4 - -#define RING0_EC_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 4*4;\ - /*CFI_OFFSET cs, -2*4;*/\ - CFI_OFFSET eip, -3*4 - -#define RING0_PTREGS_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ - CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ - CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ - CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ - CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ - CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ - CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ - CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ - CFI_OFFSET ebx, PT_EBX-PT_OLDESP - -ENTRY(ret_from_fork) - CFI_STARTPROC - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - call schedule_tail - GET_THREAD_INFO(%ebp) - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - pushl $0x0202 # Reset kernel eflags - CFI_ADJUST_CFA_OFFSET 4 - popfl - CFI_ADJUST_CFA_OFFSET -4 - jmp syscall_exit - CFI_ENDPROC -END(ret_from_fork) - -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ - - # userspace resumption stub bypassing syscall exit tracing - ALIGN - RING0_PTREGS_FRAME -ret_from_exception: - preempt_stop(CLBR_ANY) -ret_from_intr: - GET_THREAD_INFO(%ebp) -check_userspace: - movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS - movb PT_CS(%esp), %al - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax - cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace - -ENTRY(resume_userspace) - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending - jmp restore_all -END(ret_from_exception) - -#ifdef CONFIG_PREEMPT -ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_nocheck -need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all - call preempt_schedule_irq - jmp need_resched -END(resume_kernel) -#endif - CFI_ENDPROC - -/* SYSENTER_RETURN points to after the "sysenter" instruction in - the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ - - # sysenter call handler stub -ENTRY(sysenter_entry) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp - movl TSS_sysenter_esp0(%esp),%esp -sysenter_past_esp: - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) - pushl $(__USER_DS) - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ss, 0*/ - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esp, 0 - pushfl - CFI_ADJUST_CFA_OFFSET 4 - pushl $(__USER_CS) - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET cs, 0*/ - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. - */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eip, 0 - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(nr_syscalls), %eax - jae syscall_badsys - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx - jne syscall_exit_work -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp,%ebp - TRACE_IRQS_ON -1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSEXIT - CFI_ENDPROC -.pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) - jmp 1b -.section __ex_table,"a" - .align 4 - .long 1b,2b -.popsection -ENDPROC(sysenter_entry) - - # system call handler stub -ENTRY(system_call) - RING0_INT_FRAME # can't unwind into user space anyway - pushl %eax # save orig_eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(nr_syscalls), %eax - jae syscall_badsys -syscall_call: - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) # store the return value -syscall_exit: - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit - jz no_singlestep - orl $_TIF_SINGLESTEP,TI_flags(%ebp) -no_singlestep: - movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - -restore_all: - movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: PT_OLDSS(%esp) contains the wrong/random values if we - # are returning to the kernel. - # See comments in process.c:copy_thread() for details. - movb PT_OLDSS(%esp), %ah - movb PT_CS(%esp), %al - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax - cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - CFI_REMEMBER_STATE - je ldt_ss # returning to user-space with LDT SS -restore_nocheck: - TRACE_IRQS_IRET -restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code - CFI_ADJUST_CFA_OFFSET -4 -1: INTERRUPT_RETURN -.section .fixup,"ax" -iret_exc: - pushl $0 # no error code - pushl $do_iret_error - jmp error_code -.previous -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous - - CFI_RESTORE_STATE -ldt_ss: - larl PT_OLDSS(%esp), %eax - jnz restore_nocheck - testl $0x00400000, %eax # returning to 32bit stack? - jnz restore_nocheck # allright, normal return - -#ifdef CONFIG_PARAVIRT - /* - * The kernel can't run on a non-flat stack if paravirt mode - * is active. Rather than try to fixup the high bits of - * ESP, bypass this code entirely. This may break DOSemu - * and/or Wine support in a paravirt VM, although the option - * is still available to implement the setting of the high - * 16-bits in the INTERRUPT_RETURN paravirt-op. - */ - cmpl $0, paravirt_ops+PARAVIRT_enabled - jne restore_nocheck -#endif - - /* If returning to userspace with 16bit stack, - * try to fix the higher word of ESP, as the CPU - * won't restore it. - * This is an "official" bug of all the x86-compatible - * CPUs, which we can try to work around to make - * dosemu and wine happy. */ - movl PT_OLDESP(%esp), %eax - movl %esp, %edx - call patch_espfix_desc - pushl $__ESPFIX_SS - CFI_ADJUST_CFA_OFFSET 4 - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - DISABLE_INTERRUPTS(CLBR_EAX) - TRACE_IRQS_OFF - lss (%esp), %esp - CFI_ADJUST_CFA_OFFSET -8 - jmp restore_nocheck - CFI_ENDPROC -ENDPROC(system_call) - - # perform work that needs to be done immediately before resumption - ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway -work_pending: - testb $_TIF_NEED_RESCHED, %cl - jz work_notifysig -work_resched: - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all - testb $_TIF_NEED_RESCHED, %cl - jnz work_resched - -work_notifysig: # deal with pending signals and - # notify-resume requests -#ifdef CONFIG_VM86 - testl $VM_MASK, PT_EFLAGS(%esp) - movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or - # vm86-space - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig - - ALIGN -work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - CFI_ADJUST_CFA_OFFSET 4 - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - movl %eax, %esp -#else - movl %esp, %eax -#endif - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig -END(work_pending) - - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS,PT_EAX(%esp) - movl %esp, %eax - xorl %edx,%edx - call do_syscall_trace - cmpl $0, %eax - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, - # so must skip actual syscall - movl PT_ORIG_EAX(%esp), %eax - cmpl $(nr_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work: - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl - jz work_pending - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call - # schedule() instead - movl %esp, %eax - movl $1, %edx - call do_syscall_trace - jmp resume_userspace -END(syscall_exit_work) - CFI_ENDPROC - - RING0_INT_FRAME # can't unwind into user space anyway -syscall_fault: - pushl %eax # save orig_eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - movl $-EFAULT,PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace -END(syscall_badsys) - CFI_ENDPROC - -#define FIXUP_ESPFIX_STACK \ - /* since we are on a wrong stack, we cant make it a C code :( */ \ - PER_CPU(gdt_page, %ebx); \ - GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ - addl %esp, %eax; \ - pushl $__KERNEL_DS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4; \ - lss (%esp), %esp; \ - CFI_ADJUST_CFA_OFFSET -8; -#define UNWIND_ESPFIX_STACK \ - movl %ss, %eax; \ - /* see if on espfix stack */ \ - cmpw $__ESPFIX_SS, %ax; \ - jne 27f; \ - movl $__KERNEL_DS, %eax; \ - movl %eax, %ds; \ - movl %eax, %es; \ - /* switch to normal stack */ \ - FIXUP_ESPFIX_STACK; \ -27:; - -/* - * Build the entry stubs and pointer table with - * some assembler magic. - */ -.data -ENTRY(interrupt) -.text - -ENTRY(irq_entries_start) - RING0_INT_FRAME -vector=0 -.rept NR_IRQS - ALIGN - .if vector - CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl $~(vector) - CFI_ADJUST_CFA_OFFSET 4 - jmp common_interrupt - .previous - .long 1b - .text -vector=vector+1 -.endr -END(irq_entries_start) - -.previous -END(interrupt) -.previous - -/* - * the CPU automatically disables interrupts when executing an IRQ vector, - * so IRQ-flags tracing has to follow that: - */ - ALIGN -common_interrupt: - SAVE_ALL - TRACE_IRQS_OFF - movl %esp,%eax - call do_IRQ - jmp ret_from_intr -ENDPROC(common_interrupt) - CFI_ENDPROC - -#define BUILD_INTERRUPT(name, nr) \ -ENTRY(name) \ - RING0_INT_FRAME; \ - pushl $~(nr); \ - CFI_ADJUST_CFA_OFFSET 4; \ - SAVE_ALL; \ - TRACE_IRQS_OFF \ - movl %esp,%eax; \ - call smp_##name; \ - jmp ret_from_intr; \ - CFI_ENDPROC; \ -ENDPROC(name) - -/* The include is where all of the SMP etc. interrupts come from */ -#include "entry_arch.h" - -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault - CFI_ADJUST_CFA_OFFSET 4 - ALIGN -error_code: - /* the function address is in %fs's slot on the stack */ - pushl %es - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ - pushl %ds - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ds, 0*/ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eax, 0 - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebp, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx, 0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(page_fault) - -ENTRY(coprocessor_error) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(coprocessor_error) - -ENTRY(simd_coprocessor_error) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_simd_coprocessor_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(simd_coprocessor_error) - -ENTRY(device_not_available) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_CR0_INTO_EAX - testl $0x4, %eax # EM (math emulation bit) - jne device_not_available_emulate - preempt_stop(CLBR_ANY) - call math_state_restore - jmp ret_from_exception -device_not_available_emulate: - pushl $0 # temporary storage for ORIG_EIP - CFI_ADJUST_CFA_OFFSET 4 - call math_emulate - addl $4, %esp - CFI_ADJUST_CFA_OFFSET -4 - jmp ret_from_exception - CFI_ENDPROC -END(device_not_available) - -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_esp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ - CFI_REL_OFFSET eip, 0 - -KPROBE_ENTRY(debug) - RING0_INT_FRAME - cmpl $sysenter_entry,(%esp) - jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) -debug_stack_correct: - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -KPROBE_ENTRY(nmi) - RING0_INT_FRAME - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - je nmi_espfix_stack - cmpl $sysenter_entry,(%esp) - je nmi_stack_fixup - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - jae nmi_stack_correct - cmpl $sysenter_entry,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_nocheck_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl %ss - CFI_ADJUST_CFA_OFFSET 4 - pushl %esp - CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - CFI_ADJUST_CFA_OFFSET 4 - .endr - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 -1: INTERRUPT_RETURN - CFI_ENDPROC -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous -KPROBE_END(nmi) - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) -1: iret -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous -END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) -#endif - -KPROBE_ENTRY(int3) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(int3) - -ENTRY(overflow) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_overflow - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(overflow) - -ENTRY(bounds) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_bounds - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(bounds) - -ENTRY(invalid_op) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_invalid_op - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(invalid_op) - -ENTRY(coprocessor_segment_overrun) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_segment_overrun - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(coprocessor_segment_overrun) - -ENTRY(invalid_TSS) - RING0_EC_FRAME - pushl $do_invalid_TSS - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(invalid_TSS) - -ENTRY(segment_not_present) - RING0_EC_FRAME - pushl $do_segment_not_present - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(segment_not_present) - -ENTRY(stack_segment) - RING0_EC_FRAME - pushl $do_stack_segment - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(stack_segment) - -KPROBE_ENTRY(general_protection) - RING0_EC_FRAME - pushl $do_general_protection - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -KPROBE_END(general_protection) - -ENTRY(alignment_check) - RING0_EC_FRAME - pushl $do_alignment_check - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(alignment_check) - -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(divide_error) - -#ifdef CONFIG_X86_MCE -ENTRY(machine_check) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl machine_check_vector - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(machine_check) -#endif - -ENTRY(spurious_interrupt_bug) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_spurious_interrupt_bug - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(spurious_interrupt_bug) - -ENTRY(kernel_thread_helper) - pushl $0 # fake return address for unwinder - CFI_STARTPROC - movl %edx,%eax - push %edx - CFI_ADJUST_CFA_OFFSET 4 - call *%ebx - push %eax - CFI_ADJUST_CFA_OFFSET 4 - call do_exit - CFI_ENDPROC -ENDPROC(kernel_thread_helper) - -#ifdef CONFIG_XEN -ENTRY(xen_hypervisor_callback) - CFI_STARTPROC - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - - /* Check to see if we got the event in the critical - region in xen_iret_direct, after we've reenabled - events and checked for pending events. This simulates - iret instruction's behaviour where it delivers a - pending interrupt when enabling interrupts. */ - movl PT_EIP(%esp),%eax - cmpl $xen_iret_start_crit,%eax - jb 1f - cmpl $xen_iret_end_crit,%eax - jae 1f - - call xen_iret_crit_fixup - -1: mov %esp, %eax - call xen_evtchn_do_upcall - jmp ret_from_intr - CFI_ENDPROC -ENDPROC(xen_hypervisor_callback) - -# Hypervisor uses this for application faults while it executes. -# We get here for two reasons: -# 1. Fault while reloading DS, ES, FS or GS -# 2. Fault while executing IRET -# Category 1 we fix up by reattempting the load, and zeroing the segment -# register if the load fails. -# Category 2 we fix up by jumping to do_iret_error. We cannot use the -# normal Linux return path in this case because if we use the IRET hypercall -# to pop the stack frame we end up in an infinite loop of failsafe callbacks. -# We distinguish between categories by maintaining a status value in EAX. -ENTRY(xen_failsafe_callback) - CFI_STARTPROC - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl $1,%eax -1: mov 4(%esp),%ds -2: mov 8(%esp),%es -3: mov 12(%esp),%fs -4: mov 16(%esp),%gs - testl %eax,%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - lea 16(%esp),%esp - CFI_ADJUST_CFA_OFFSET -16 - jz 5f - addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl $0 # EAX == 0 => Category 1 (Bad segment) - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - jmp ret_from_exception - CFI_ENDPROC - -.section .fixup,"ax" -6: xorl %eax,%eax - movl %eax,4(%esp) - jmp 1b -7: xorl %eax,%eax - movl %eax,8(%esp) - jmp 2b -8: xorl %eax,%eax - movl %eax,12(%esp) - jmp 3b -9: xorl %eax,%eax - movl %eax,16(%esp) - jmp 4b -.previous -.section __ex_table,"a" - .align 4 - .long 1b,6b - .long 2b,7b - .long 3b,8b - .long 4b,9b -.previous -ENDPROC(xen_failsafe_callback) - -#endif /* CONFIG_XEN */ - -.section .rodata,"a" -#include "syscall_table_32.S" - -syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/geode_32.c b/arch/i386/kernel/geode_32.c deleted file mode 100644 index 41e8aec4c61..00000000000 --- a/arch/i386/kernel/geode_32.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * AMD Geode southbridge support code - * Copyright (C) 2006, Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -static struct { - char *name; - u32 msr; - int size; - u32 base; -} lbars[] = { - { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, - { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, - { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, - { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } -}; - -static void __init init_lbars(void) -{ - u32 lo, hi; - int i; - - for (i = 0; i < ARRAY_SIZE(lbars); i++) { - rdmsr(lbars[i].msr, lo, hi); - if (hi & 0x01) - lbars[i].base = lo & 0x0000ffff; - - if (lbars[i].base == 0) - printk(KERN_ERR "geode: Couldn't initialize '%s'\n", - lbars[i].name); - } -} - -int geode_get_dev_base(unsigned int dev) -{ - BUG_ON(dev >= ARRAY_SIZE(lbars)); - return lbars[dev].base; -} -EXPORT_SYMBOL_GPL(geode_get_dev_base); - -/* === GPIO API === */ - -void geode_gpio_set(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - if (gpio < 16) - outl(1 << gpio, base + reg); - else - outl(1 << (gpio - 16), base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_set); - -void geode_gpio_clear(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - if (gpio < 16) - outl(1 << (gpio + 16), base + reg); - else - outl(1 << gpio, base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_clear); - -int geode_gpio_isset(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return 0; - - if (gpio < 16) - return (inl(base + reg) & (1 << gpio)) ? 1 : 0; - else - return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; -} -EXPORT_SYMBOL_GPL(geode_gpio_isset); - -void geode_gpio_set_irq(unsigned int group, unsigned int irq) -{ - u32 lo, hi; - - if (group > 7 || irq > 15) - return; - - rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); - - lo &= ~(0xF << (group * 4)); - lo |= (irq & 0xF) << (group * 4); - - wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); -} -EXPORT_SYMBOL_GPL(geode_gpio_set_irq); - -void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - u32 offset, shift, val; - - if (gpio >= 24) - offset = GPIO_MAP_W; - else if (gpio >= 16) - offset = GPIO_MAP_Z; - else if (gpio >= 8) - offset = GPIO_MAP_Y; - else - offset = GPIO_MAP_X; - - shift = (gpio % 8) * 4; - - val = inl(base + offset); - - /* Clear whatever was there before */ - val &= ~(0xF << shift); - - /* And set the new value */ - - val |= ((pair & 7) << shift); - - /* Set the PME bit if this is a PME event */ - - if (pme) - val |= (1 << (shift + 3)); - - outl(val, base + offset); -} -EXPORT_SYMBOL_GPL(geode_gpio_setup_event); - -static int __init geode_southbridge_init(void) -{ - if (!is_geode()) - return -ENODEV; - - init_lbars(); - return 0; -} - -postcore_initcall(geode_southbridge_init); diff --git a/arch/i386/kernel/head_32.S b/arch/i386/kernel/head_32.S deleted file mode 100644 index 9150ca9b5f8..00000000000 --- a/arch/i386/kernel/head_32.S +++ /dev/null @@ -1,578 +0,0 @@ -/* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Enhanced CPU detection and feature setting code by Mike Jagdis - * and Martin Mares, November 1997. - */ - -.text -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * References to members of the new_cpu_data structure. - */ - -#define X86 new_cpu_data+CPUINFO_x86 -#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor -#define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask -#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math -#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level -#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability -#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id - -/* - * This is how much memory *in addition to the memory covered up to - * and including _end* we need mapped initially. - * We need: - * - one bit for each possible page, but only in low memory, which means - * 2^32/4096/8 = 128K worst case (4G/4G split.) - * - enough space to map all low memory, which means - * (2^32/4096) / 1024 pages (worst case, non PAE) - * (2^32/4096) / 512 + 4 pages (worst case for PAE) - * - a few pages for allocator use before the kernel pagetable has - * been set up - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - */ -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) - -#if PTRS_PER_PMD > 1 -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD -#else -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) -#endif -BOOTBITMAP_SIZE = LOW_PAGES / 8 -ALLOCATOR_SLOP = 4 - -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm - -/* - * 32-bit kernel entrypoint; only used by the boot CPU. On entry, - * %esi points to the real-mode code as a 32-bit pointer. - * CS and DS must be 4 GB flat segments, but we don't depend on - * any particular GDT layout, because we load our own as soon as we - * can. - */ -.section .text.head,"ax",@progbits -ENTRY(startup_32) - -/* - * Set segments to known values. - */ - cld - lgdt boot_gdt_descr - __PAGE_OFFSET - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - -/* - * Clear BSS first so that there are no surprises... - * No need to cld as DF is already clear from cld above... - */ - xorl %eax,%eax - movl $__bss_start - __PAGE_OFFSET,%edi - movl $__bss_stop - __PAGE_OFFSET,%ecx - subl %edi,%ecx - shrl $2,%ecx - rep ; stosl -/* - * Copy bootup parameters out of the way. - * Note: %esi still has the pointer to the real-mode data. - * With the kexec as boot loader, parameter segment might be loaded beyond - * kernel image and might not even be addressable by early boot page tables. - * (kexec on panic case). Hence copy out the parameters before initializing - * page tables. - */ - movl $(boot_params - __PAGE_OFFSET),%edi - movl $(PARAM_SIZE/4),%ecx - cld - rep - movsl - movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi - andl %esi,%esi - jnz 2f # New command line protocol - cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR - jne 1f - movzwl OLD_CL_OFFSET,%esi - addl $(OLD_CL_BASE_ADDR),%esi -2: - movl $(boot_command_line - __PAGE_OFFSET),%edi - movl $(COMMAND_LINE_SIZE/4),%ecx - rep - movsl -1: - -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond _end. The variable - * init_pg_tables_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. - * - * Warning: don't use %esi or the stack in this code. However, %esp - * can be used as a GPR if you really need it... - */ -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $(pg0 - __PAGE_OFFSET), %edi - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ -10: - leal 0x007(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp - cmpl %ebp,%eax - jb 10b - movl %edi,(init_pg_tables_end - __PAGE_OFFSET) - - xorl %ebx,%ebx /* This is the boot CPU (BSP) */ - jmp 3f -/* - * Non-boot CPU entry point; entered from trampoline.S - * We can't lgdt here, because lgdt itself uses a data segment, but - * we know the trampoline has already loaded the boot_gdt for us. - * - * If cpu hotplug is not supported then this code can go in init section - * which will be freed later - */ - -#ifndef CONFIG_HOTPLUG_CPU -.section .init.text,"ax",@progbits -#endif - - /* Do an early initialization of the fixmap area */ - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax - addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ - movl %eax, 4092(%edx) - -#ifdef CONFIG_SMP -ENTRY(startup_32_smp) - cld - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - -/* - * New page tables may be in 4Mbyte page mode and may - * be using the global pages. - * - * NOTE! If we are on a 486 we may have no cr4 at all! - * So we do not try to touch it unless we really have - * some bits in it to set. This won't work if the BSP - * implements cr4 but this AP does not -- very unlikely - * but be warned! The same applies to the pse feature - * if not equally supported. --macro - * - * NOTE! We have to correct for the fact that we're - * not yet offset PAGE_OFFSET.. - */ -#define cr4_bits mmu_cr4_features-__PAGE_OFFSET - movl cr4_bits,%edx - andl %edx,%edx - jz 6f - movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl %edx,%eax - movl %eax,%cr4 - - btl $5, %eax # check if PAE is enabled - jnc 6f - - /* Check if extended functions are implemented */ - movl $0x80000000, %eax - cpuid - cmpl $0x80000000, %eax - jbe 6f - mov $0x80000001, %eax - cpuid - /* Execute Disable bit supported? */ - btl $20, %edx - jnc 6f - - /* Setup EFER (Extended Feature Enable Register) */ - movl $0xc0000080, %ecx - rdmsr - - btsl $11, %eax - /* Make changes effective */ - wrmsr - -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -#endif /* CONFIG_SMP */ -3: - -/* - * Enable paging - */ - movl $swapper_pg_dir-__PAGE_OFFSET,%eax - movl %eax,%cr3 /* set the page table pointer.. */ - movl %cr0,%eax - orl $0x80000000,%eax - movl %eax,%cr0 /* ..and set paging (PG) bit */ - ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ -1: - /* Set up the stack pointer */ - lss stack_start,%esp - -/* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl - -#ifdef CONFIG_SMP - andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: -#endif /* CONFIG_SMP */ - -/* - * start system 32-bit setup. We need to re-do some of the things done - * in 16-bit mode for the "real" operations. - */ - call setup_idt - -checkCPUtype: - - movl $-1,X86_CPUID # -1 for no CPUID initially - -/* check if it is 486 or 386. */ -/* - * XXX - this does a lot of unnecessary setup. Alignment checks don't - * apply at our cpl of 0 and the stack ought to be aligned already, and - * we don't need to preserve eflags. - */ - - movb $3,X86 # at least 386 - pushfl # push EFLAGS - popl %eax # get EFLAGS - movl %eax,%ecx # save original EFLAGS - xorl $0x240000,%eax # flip AC and ID bits in EFLAGS - pushl %eax # copy to EFLAGS - popfl # set EFLAGS - pushfl # get new EFLAGS - popl %eax # put it in eax - xorl %ecx,%eax # change in flags - pushl %ecx # restore original EFLAGS - popfl - testl $0x40000,%eax # check if AC bit changed - je is386 - - movb $4,X86 # at least 486 - testl $0x200000,%eax # check if ID bit changed - je is486 - - /* get vendor info */ - xorl %eax,%eax # call CPUID with 0 -> return vendor ID - cpuid - movl %eax,X86_CPUID # save CPUID level - movl %ebx,X86_VENDOR_ID # lo 4 chars - movl %edx,X86_VENDOR_ID+4 # next 4 chars - movl %ecx,X86_VENDOR_ID+8 # last 4 chars - - orl %eax,%eax # do we have processor info as well? - je is486 - - movl $1,%eax # Use the CPUID instruction to get CPU type - cpuid - movb %al,%cl # save reg for future use - andb $0x0f,%ah # mask processor family - movb %ah,X86 - andb $0xf0,%al # mask model - shrb $4,%al - movb %al,X86_MODEL - andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK - movl %edx,X86_CAPABILITY - -is486: movl $0x50022,%ecx # set AM, WP, NE and MP - jmp 2f - -is386: movl $2,%ecx # set MP -2: movl %cr0,%eax - andl $0x80000011,%eax # Save PG,PE,ET - orl %ecx,%eax - movl %eax,%cr0 - - call check_x87 - lgdt early_gdt_descr - lidt idt_descr - ljmp $(__KERNEL_CS),$1f -1: movl $(__KERNEL_DS),%eax # reload all the segment registers - movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu - - movl $(__USER_DS),%eax # DS/ES contains default USER segment - movl %eax,%ds - movl %eax,%es - - xorl %eax,%eax # Clear GS and LDT - movl %eax,%gs - lldt %ax - - cld # gcc2 wants the direction flag cleared at all times - pushl $0 # fake return address for unwinder -#ifdef CONFIG_SMP - movb ready, %cl - movb $1, ready - cmpb $0,%cl # the first CPU calls start_kernel - je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu - jmp initialize_secondary # all other CPUs call initialize_secondary -1: -#endif /* CONFIG_SMP */ - jmp start_kernel - -/* - * We depend on ET to be correct. This checks for 287/387. - */ -check_x87: - movb $0,X86_HARD_MATH - clts - fninit - fstsw %ax - cmpb $0,%al - je 1f - movl %cr0,%eax /* no coprocessor: have to set bits */ - xorl $4,%eax /* set EM */ - movl %eax,%cr0 - ret - ALIGN -1: movb $1,X86_HARD_MATH - .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ - ret - -/* - * setup_idt - * - * sets up a idt with 256 entries pointing to - * ignore_int, interrupt gates. It doesn't actually load - * idt - that can be done only after paging has been enabled - * and the kernel moved to PAGE_OFFSET. Interrupts - * are enabled elsewhere, when we can be relatively - * sure everything is ok. - * - * Warning: %esi is live across this function. - */ -setup_idt: - lea ignore_int,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax /* selector = 0x0010 = cs */ - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - - lea idt_table,%edi - mov $256,%ecx -rp_sidt: - movl %eax,(%edi) - movl %edx,4(%edi) - addl $8,%edi - dec %ecx - jne rp_sidt - -.macro set_early_handler handler,trapno - lea \handler,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - lea idt_table,%edi - movl %eax,8*\trapno(%edi) - movl %edx,8*\trapno+4(%edi) -.endm - - set_early_handler handler=early_divide_err,trapno=0 - set_early_handler handler=early_illegal_opcode,trapno=6 - set_early_handler handler=early_protection_fault,trapno=13 - set_early_handler handler=early_page_fault,trapno=14 - - ret - -early_divide_err: - xor %edx,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_illegal_opcode: - movl $6,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_protection_fault: - movl $13,%edx - jmp early_fault - -early_page_fault: - movl $14,%edx - jmp early_fault - -early_fault: - cld -#ifdef CONFIG_PRINTK - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - movl %cr2,%eax - pushl %eax - pushl %edx /* trapno */ - pushl $fault_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else - call printk -#endif -#endif -hlt_loop: - hlt - jmp hlt_loop - -/* This is the default interrupt "handler" :-) */ - ALIGN -ignore_int: - cld -#ifdef CONFIG_PRINTK - pushl %eax - pushl %ecx - pushl %edx - pushl %es - pushl %ds - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - pushl 16(%esp) - pushl 24(%esp) - pushl 32(%esp) - pushl 40(%esp) - pushl $int_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else - call printk -#endif - addl $(5*4),%esp - popl %ds - popl %es - popl %edx - popl %ecx - popl %eax -#endif - iret - -.section .text -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","wa" - .align PAGE_SIZE_asm -ENTRY(swapper_pg_dir) - .fill 1024,4,0 -ENTRY(swapper_pg_pmd) - .fill 1024,4,0 -ENTRY(empty_zero_page) - .fill 4096,1,0 - -/* - * This starts the data section. - */ -.data -ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE - .long __BOOT_DS - -ready: .byte 0 - -early_recursion_flag: - .long 0 - -int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" - -fault_msg: - .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" - .asciz "Stack: %p %p %p %p %p %p %p %p\n" - -#include "../../x86/xen/xen-head.S" - -/* - * The IDT and GDT 'descriptors' are a strange 48-bit object - * only used by the lidt and lgdt instructions. They are not - * like usual segment descriptors - they consist of a 16-bit - * segment size, and 32-bit linear address value: - */ - -.globl boot_gdt_descr -.globl idt_descr - - ALIGN -# early boot GDT descriptor (must use 1:1 address mapping) - .word 0 # 32 bit align gdt_desc.address -boot_gdt_descr: - .word __BOOT_DS+7 - .long boot_gdt - __PAGE_OFFSET - - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - -# boot GDT descriptor (later on used by CPU#0): - .word 0 # 32 bit align gdt_desc.address -ENTRY(early_gdt_descr) - .word GDT_ENTRIES*8-1 - .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ - -/* - * The boot_gdt must mirror the equivalent in setup.S and is - * used only for booting. - */ - .align L1_CACHE_BYTES -ENTRY(boot_gdt) - .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ diff --git a/arch/i386/kernel/hpet_32.c b/arch/i386/kernel/hpet_32.c deleted file mode 100644 index 533d4932bc7..00000000000 --- a/arch/i386/kernel/hpet_32.c +++ /dev/null @@ -1,553 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern struct clock_event_device *global_clock_event; - -#define HPET_MASK CLOCKSOURCE_MASK(32) -#define HPET_SHIFT 22 - -/* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 - -/* - * HPET address is set in acpi/boot.c, when an ACPI entry exists - */ -unsigned long hpet_address; -static void __iomem * hpet_virt_address; - -static inline unsigned long hpet_readl(unsigned long a) -{ - return readl(hpet_virt_address + a); -} - -static inline void hpet_writel(unsigned long d, unsigned long a) -{ - writel(d, hpet_virt_address + a); -} - -/* - * HPET command line enable / disable - */ -static int boot_hpet_disable; - -static int __init hpet_setup(char* str) -{ - if (str) { - if (!strncmp("disable", str, 7)) - boot_hpet_disable = 1; - } - return 1; -} -__setup("hpet=", hpet_setup); - -static inline int is_hpet_capable(void) -{ - return (!boot_hpet_disable && hpet_address); -} - -/* - * HPET timer interrupt enable / disable - */ -static int hpet_legacy_int_enabled; - -/** - * is_hpet_enabled - check whether the hpet timer interrupt is enabled - */ -int is_hpet_enabled(void) -{ - return is_hpet_capable() && hpet_legacy_int_enabled; -} - -/* - * When the hpet driver (/dev/hpet) is enabled, we need to reserve - * timer 0 and timer 1 in case of RTC emulation. - */ -#ifdef CONFIG_HPET -static void hpet_reserve_platform_timers(unsigned long id) -{ - struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; - struct hpet_data hd; - - nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - - memset(&hd, 0, sizeof (hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet_virt_address; - hd.hd_nirqs = nrtimers; - hd.hd_flags = HPET_DATA_PLATFORM; - hpet_reserve_timer(&hd, 0); - -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif - - hd.hd_irq[0] = HPET_LEGACY_8254; - hd.hd_irq[1] = HPET_LEGACY_RTC; - - for (i = 2; i < nrtimers; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; - - hpet_alloc(&hd); - -} -#else -static void hpet_reserve_platform_timers(unsigned long id) { } -#endif - -/* - * Common hpet info - */ -static unsigned long hpet_period; - -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt); -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt); - -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_set_mode, - .set_next_event = hpet_next_event, - .shift = 32, - .irq = 0, -}; - -static void hpet_start_counter(void) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); - hpet_writel(0, HPET_COUNTER); - hpet_writel(0, HPET_COUNTER + 4); - cfg |= HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); -} - -static void hpet_enable_int(void) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg |= HPET_CFG_LEGACY; - hpet_writel(cfg, HPET_CFG); - hpet_legacy_int_enabled = 1; -} - -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long cfg, cmp, now; - uint64_t delta; - - switch(mode) { - case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; - delta >>= hpet_clockevent.shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; - cfg = hpet_readl(HPET_T0_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | - HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); - /* - * The first write after writing TN_SETVAL to the - * config register sets the counter value, the second - * write sets the period. - */ - hpet_writel(cmp, HPET_T0_CMP); - udelay(1); - hpet_writel((unsigned long) delta, HPET_T0_CMP); - break; - - case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_T0_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_T0_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T0_CFG); - break; - - case CLOCK_EVT_MODE_RESUME: - hpet_enable_int(); - break; - } -} - -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - unsigned long cnt; - - cnt = hpet_readl(HPET_COUNTER); - cnt += delta; - hpet_writel(cnt, HPET_T0_CMP); - - return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; -} - -/* - * Clock source related code - */ -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_start_counter, -}; - -/* - * Try to setup the HPET timer - */ -int __init hpet_enable(void) -{ - unsigned long id; - uint64_t hpet_freq; - u64 tmp, start, now; - cycle_t t1; - - if (!is_hpet_capable()) - return 0; - - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); - - /* - * Read the period and check for a sane value: - */ - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) - goto out_nohpet; - - /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. - */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); - /* Calculate the min / max delta */ - hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); - - /* - * Read the HPET ID register to retrieve the IRQ routing - * information and the number of channels - */ - id = hpet_readl(HPET_ID); - -#ifdef CONFIG_HPET_EMULATE_RTC - /* - * The legacy routing mode needs at least two channels, tick timer - * and the rtc emulation channel. - */ - if (!(id & HPET_ID_NUMBER)) - goto out_nohpet; -#endif - - /* Start the counter */ - hpet_start_counter(); - - /* Verify whether hpet counter works */ - t1 = read_hpet(); - rdtscll(start); - - /* - * We don't know the TSC frequency yet, but waiting for - * 200000 TSC cycles is safe: - * 4 GHz == 50us - * 1 GHz == 200us - */ - do { - rep_nop(); - rdtscll(now); - } while ((now - start) < 200000UL); - - if (t1 == read_hpet()) { - printk(KERN_WARNING - "HPET counter not counting. HPET disabled\n"); - goto out_nohpet; - } - - /* Initialize and register HPET clocksource - * - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - clocksource_register(&clocksource_hpet); - - if (id & HPET_ID_LEGSUP) { - hpet_enable_int(); - hpet_reserve_platform_timers(id); - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - clockevents_register_device(&hpet_clockevent); - global_clock_event = &hpet_clockevent; - return 1; - } - return 0; - -out_nohpet: - iounmap(hpet_virt_address); - hpet_virt_address = NULL; - boot_hpet_disable = 1; - return 0; -} - - -#ifdef CONFIG_HPET_EMULATE_RTC - -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET - * is enabled, we support RTC interrupt functionality in software. - * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. - */ -#include -#include - -#define DEFAULT_RTC_INT_FREQ 64 -#define DEFAULT_RTC_SHIFT 6 -#define RTC_NUM_INTS 1 - -static unsigned long hpet_rtc_flags; -static unsigned long hpet_prev_update_sec; -static struct rtc_time hpet_alarm_time; -static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; -static unsigned long hpet_default_delta; -static unsigned long hpet_pie_delta; -static unsigned long hpet_pie_limit; - -/* - * Timer 1 for RTC emulation. We use one shot mode, as periodic mode - * is not supported by all HPET implementations for timer 1. - * - * hpet_rtc_timer_init() is called when the rtc is initialized. - */ -int hpet_rtc_timer_init(void) -{ - unsigned long cfg, cnt, delta, flags; - - if (!is_hpet_enabled()) - return 0; - - if (!hpet_default_delta) { - uint64_t clc; - - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; - hpet_default_delta = (unsigned long) clc; - } - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - local_irq_save(flags); - - cnt = delta + hpet_readl(HPET_COUNTER); - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; - - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - local_irq_restore(flags); - - return 1; -} - -/* - * The functions below are called from rtc driver. - * Return 0 if HPET is not being used. - * Otherwise do the necessary changes and return 1. - */ -int hpet_mask_rtc_irq_bit(unsigned long bit_mask) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags &= ~bit_mask; - return 1; -} - -int hpet_set_rtc_irq_bit(unsigned long bit_mask) -{ - unsigned long oldbits = hpet_rtc_flags; - - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags |= bit_mask; - - if (!oldbits) - hpet_rtc_timer_init(); - - return 1; -} - -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, - unsigned char sec) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_alarm_time.tm_hour = hrs; - hpet_alarm_time.tm_min = min; - hpet_alarm_time.tm_sec = sec; - - return 1; -} - -int hpet_set_periodic_freq(unsigned long freq) -{ - uint64_t clc; - - if (!is_hpet_enabled()) - return 0; - - if (freq <= DEFAULT_RTC_INT_FREQ) - hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; - else { - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - do_div(clc, freq); - clc >>= hpet_clockevent.shift; - hpet_pie_delta = (unsigned long) clc; - } - return 1; -} - -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} - -static void hpet_rtc_timer_reinit(void) -{ - unsigned long cfg, delta; - int lost_ints = -1; - - if (unlikely(!hpet_rtc_flags)) { - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); - return; - } - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - /* - * Increment the comparator value until we are ahead of the - * current count. - */ - do { - hpet_t1_cmp += delta; - hpet_writel(hpet_t1_cmp, HPET_T1_CMP); - lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); - - if (lost_ints) { - if (hpet_rtc_flags & RTC_PIE) - hpet_pie_count += lost_ints; - if (printk_ratelimit()) - printk(KERN_WARNING "rtc: lost %d interrupts\n", - lost_ints); - } -} - -irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) -{ - struct rtc_time curr_time; - unsigned long rtc_int_flag = 0; - - hpet_rtc_timer_reinit(); - - if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - rtc_get_rtc_time(&curr_time); - - if (hpet_rtc_flags & RTC_UIE && - curr_time.tm_sec != hpet_prev_update_sec) { - rtc_int_flag = RTC_UF; - hpet_prev_update_sec = curr_time.tm_sec; - } - - if (hpet_rtc_flags & RTC_PIE && - ++hpet_pie_count >= hpet_pie_limit) { - rtc_int_flag |= RTC_PF; - hpet_pie_count = 0; - } - - if (hpet_rtc_flags & RTC_PIE && - (curr_time.tm_sec == hpet_alarm_time.tm_sec) && - (curr_time.tm_min == hpet_alarm_time.tm_min) && - (curr_time.tm_hour == hpet_alarm_time.tm_hour)) - rtc_int_flag |= RTC_AF; - - if (rtc_int_flag) { - rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - rtc_interrupt(rtc_int_flag, dev_id); - } - return IRQ_HANDLED; -} -#endif diff --git a/arch/i386/kernel/i386_ksyms_32.c b/arch/i386/kernel/i386_ksyms_32.c deleted file mode 100644 index e3d4b73bfdb..00000000000 --- a/arch/i386/kernel/i386_ksyms_32.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include -#include - -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); - -EXPORT_SYMBOL(__get_user_1); -EXPORT_SYMBOL(__get_user_2); -EXPORT_SYMBOL(__get_user_4); - -EXPORT_SYMBOL(__put_user_1); -EXPORT_SYMBOL(__put_user_2); -EXPORT_SYMBOL(__put_user_4); -EXPORT_SYMBOL(__put_user_8); - -EXPORT_SYMBOL(strstr); - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -EXPORT_SYMBOL(__write_lock_failed); -EXPORT_SYMBOL(__read_lock_failed); -#endif - -EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387_32.c b/arch/i386/kernel/i387_32.c deleted file mode 100644 index 665847281ed..00000000000 --- a/arch/i386/kernel/i387_32.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * linux/arch/i386/kernel/i387.c - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_MATH_EMULATION -#define HAVE_HWFP (boot_cpu_data.hard_math) -#else -#define HAVE_HWFP 1 -#endif - -static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff; - -void mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - clts(); - if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; - if (mask == 0) mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; - stts(); -} - -/* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remeber the current task has used the FPU. - */ -void init_fpu(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; - if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = 0x1f80; - } else { - memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; - } - /* only the device not available exception or ptrace can call init_fpu */ - set_stopped_child_used_math(tsk); -} - -/* - * FPU lazy state save handling. - */ - -void kernel_fpu_begin(void) -{ - struct thread_info *thread = current_thread_info(); - - preempt_disable(); - if (thread->status & TS_USEDFPU) { - __save_init_fpu(thread->task); - return; - } - clts(); -} -EXPORT_SYMBOL_GPL(kernel_fpu_begin); - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; -} - -static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) -{ - struct _fpxreg *st = NULL; - unsigned long tos = (fxsave->swd >> 11) & 7; - unsigned long twd = (unsigned long) fxsave->twd; - unsigned long tag; - unsigned long ret = 0xffff0000u; - int i; - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); - - for ( i = 0 ; i < 8 ; i++ ) { - if ( twd & 0x1 ) { - st = FPREG_ADDR( fxsave, (i - tos) & 7 ); - - switch ( st->exponent & 0x7fff ) { - case 0x7fff: - tag = 2; /* Special */ - break; - case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } - break; - default: - if ( st->significand[3] & 0x8000 ) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } - break; - } - } else { - tag = 3; /* Empty */ - } - ret |= (tag << (2 * i)); - twd = twd >> 1; - } - return ret; -} - -/* - * FPU state interaction. - */ - -unsigned short get_fpu_cwd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.cwd; - } else { - return (unsigned short)tsk->thread.i387.fsave.cwd; - } -} - -unsigned short get_fpu_swd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.swd; - } else { - return (unsigned short)tsk->thread.i387.fsave.swd; - } -} - -#if 0 -unsigned short get_fpu_twd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.twd; - } else { - return (unsigned short)tsk->thread.i387.fsave.twd; - } -} -#endif /* 0 */ - -unsigned short get_fpu_mxcsr( struct task_struct *tsk ) -{ - if ( cpu_has_xmm ) { - return tsk->thread.i387.fxsave.mxcsr; - } else { - return 0x1f80; - } -} - -#if 0 - -void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.cwd = cwd; - } else { - tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u); - } -} - -void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.swd = swd; - } else { - tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u); - } -} - -void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); - } else { - tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u); - } -} - -#endif /* 0 */ - -/* - * FXSR floating point environment conversions. - */ - -static int convert_fxsr_to_user( struct _fpstate __user *buf, - struct i387_fxsave_struct *fxsave ) -{ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; - int i; - - env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; - env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; - env[2] = twd_fxsr_to_i387(fxsave); - env[3] = fxsave->fip; - env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); - env[5] = fxsave->foo; - env[6] = fxsave->fos; - - if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) - return 1; - - to = &buf->_st[0]; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; - } - return 0; -} - -static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, - struct _fpstate __user *buf ) -{ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; - int i; - - if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) - return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); - fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); - fxsave->fip = env[3]; - fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); - fxsave->fcs = (env[4] & 0xffff); - fxsave->foo = env[5]; - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; - } - return 0; -} - -/* - * Signal frame handlers. - */ - -static inline int save_i387_fsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - - unlazy_fpu( tsk ); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if ( __copy_to_user( buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct) ) ) - return -1; - return 1; -} - -static int save_i387_fxsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - int err = 0; - - unlazy_fpu( tsk ); - - if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) - return -1; - - err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); - err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); - if ( err ) - return -1; - - if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, - sizeof(struct i387_fxsave_struct) ) ) - return -1; - return 1; -} - -int save_i387( struct _fpstate __user *buf ) -{ - if ( !used_math() ) - return 0; - - /* This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return save_i387_fxsave( buf ); - } else { - return save_i387_fsave( buf ); - } - } else { - return save_i387_soft( ¤t->thread.i387.soft, buf ); - } -} - -static inline int restore_i387_fsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - clear_fpu( tsk ); - return __copy_from_user( &tsk->thread.i387.fsave, buf, - sizeof(struct i387_fsave_struct) ); -} - -static int restore_i387_fxsave( struct _fpstate __user *buf ) -{ - int err; - struct task_struct *tsk = current; - clear_fpu( tsk ); - err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct) ); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); -} - -int restore_i387( struct _fpstate __user *buf ) -{ - int err; - - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - err = restore_i387_fxsave( buf ); - } else { - err = restore_i387_fsave( buf ); - } - } else { - err = restore_i387_soft( ¤t->thread.i387.soft, buf ); - } - set_used_math(); - return err; -} - -/* - * ptrace request handlers. - */ - -static inline int get_fpregs_fsave( struct user_i387_struct __user *buf, - struct task_struct *tsk ) -{ - return __copy_to_user( buf, &tsk->thread.i387.fsave, - sizeof(struct user_i387_struct) ); -} - -static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf, - struct task_struct *tsk ) -{ - return convert_fxsr_to_user( (struct _fpstate __user *)buf, - &tsk->thread.i387.fxsave ); -} - -int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk ) -{ - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return get_fpregs_fxsave( buf, tsk ); - } else { - return get_fpregs_fsave( buf, tsk ); - } - } else { - return save_i387_soft( &tsk->thread.i387.soft, - (struct _fpstate __user *)buf ); - } -} - -static inline int set_fpregs_fsave( struct task_struct *tsk, - struct user_i387_struct __user *buf ) -{ - return __copy_from_user( &tsk->thread.i387.fsave, buf, - sizeof(struct user_i387_struct) ); -} - -static inline int set_fpregs_fxsave( struct task_struct *tsk, - struct user_i387_struct __user *buf ) -{ - return convert_fxsr_from_user( &tsk->thread.i387.fxsave, - (struct _fpstate __user *)buf ); -} - -int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf ) -{ - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return set_fpregs_fxsave( tsk, buf ); - } else { - return set_fpregs_fsave( tsk, buf ); - } - } else { - return restore_i387_soft( &tsk->thread.i387.soft, - (struct _fpstate __user *)buf ); - } -} - -int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - if (__copy_to_user( buf, &tsk->thread.i387.fxsave, - sizeof(struct user_fxsr_struct) )) - return -EFAULT; - return 0; - } else { - return -EIO; - } -} - -int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf ) -{ - int ret = 0; - - if ( cpu_has_fxsr ) { - if (__copy_from_user( &tsk->thread.i387.fxsave, buf, - sizeof(struct user_fxsr_struct) )) - ret = -EFAULT; - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - } else { - ret = -EIO; - } - return ret; -} - -/* - * FPU state for core dumps. - */ - -static inline void copy_fpu_fsave( struct task_struct *tsk, - struct user_i387_struct *fpu ) -{ - memcpy( fpu, &tsk->thread.i387.fsave, - sizeof(struct user_i387_struct) ); -} - -static inline void copy_fpu_fxsave( struct task_struct *tsk, - struct user_i387_struct *fpu ) -{ - unsigned short *to; - unsigned short *from; - int i; - - memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); - - to = (unsigned short *)&fpu->st_space[0]; - from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; - for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { - memcpy( to, from, 5 * sizeof(unsigned short) ); - } -} - -int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) -{ - int fpvalid; - struct task_struct *tsk = current; - - fpvalid = !!used_math(); - if ( fpvalid ) { - unlazy_fpu( tsk ); - if ( cpu_has_fxsr ) { - copy_fpu_fxsave( tsk, fpu ); - } else { - copy_fpu_fsave( tsk, fpu ); - } - } - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) -{ - int fpvalid = !!tsk_used_math(tsk); - - if (fpvalid) { - if (tsk == current) - unlazy_fpu(tsk); - if (cpu_has_fxsr) - copy_fpu_fxsave(tsk, fpu); - else - copy_fpu_fsave(tsk, fpu); - } - return fpvalid; -} - -int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu) -{ - int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr; - - if (fpvalid) { - if (tsk == current) - unlazy_fpu(tsk); - memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu)); - } - return fpvalid; -} diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c deleted file mode 100644 index 6f508e8d7c5..00000000000 --- a/arch/i386/kernel/i8237.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * i8237.c: 8237A DMA controller suspend functions. - * - * Written by Pierre Ossman, 2005. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -#include -#include - -#include - -/* - * This module just handles suspend/resume issues with the - * 8237A DMA controller (used for ISA and LPC). - * Allocation is handled in kernel/dma.c and normal usage is - * in asm/dma.h. - */ - -static int i8237A_resume(struct sys_device *dev) -{ - unsigned long flags; - int i; - - flags = claim_dma_lock(); - - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); - - for (i = 0;i < 8;i++) { - set_dma_addr(i, 0x000000); - /* DMA count is a bit weird so this is not 0 */ - set_dma_count(i, 1); - } - - /* Enable cascade DMA or channel 0-3 won't work */ - enable_dma(4); - - release_dma_lock(flags); - - return 0; -} - -static int i8237A_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; -} - -static struct sysdev_class i8237_sysdev_class = { - set_kset_name("i8237"), - .suspend = i8237A_suspend, - .resume = i8237A_resume, -}; - -static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, -}; - -static int __init i8237A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8237_sysdev_class); - if (!error) - error = sysdev_register(&device_i8237A); - return error; -} - -device_initcall(i8237A_init_sysfs); diff --git a/arch/i386/kernel/i8253_32.c b/arch/i386/kernel/i8253_32.c deleted file mode 100644 index 6d839f2f1b1..00000000000 --- a/arch/i386/kernel/i8253_32.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * i8253.c 8253/PIT functions - * - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -/* - * HPET replaces the PIT, when enabled. So we need to know, which of - * the two timers is used - */ -struct clock_event_device *global_clock_event; - -/* - * Initialize the PIT timer. - * - * This is also called after resume to bring the PIT into operation again. - */ -static void init_pit_timer(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - - switch(mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* binary, mode 2, LSB/MSB, ch 0 */ - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - outb(LATCH >> 8 , PIT_CH0); /* MSB */ - break; - - case CLOCK_EVT_MODE_SHUTDOWN: - case CLOCK_EVT_MODE_UNUSED: - if (evt->mode == CLOCK_EVT_MODE_PERIODIC || - evt->mode == CLOCK_EVT_MODE_ONESHOT) { - outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); - } - break; - - case CLOCK_EVT_MODE_ONESHOT: - /* One shot setup */ - outb_p(0x38, PIT_MODE); - break; - - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - spin_unlock_irqrestore(&i8253_lock, flags); -} - -/* - * Program the next event in oneshot mode - * - * Delta is given in PIT ticks - */ -static int pit_next_event(unsigned long delta, struct clock_event_device *evt) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(delta & 0xff , PIT_CH0); /* LSB */ - outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); - - return 0; -} - -/* - * On UP the PIT can serve all of the possible timer functions. On SMP systems - * it can be solely used for the global tick. - * - * The profiling and update capabilites are switched off once the local apic is - * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - - * !using_apic_timer decisions in do_timer_interrupt_hook() - */ -struct clock_event_device pit_clockevent = { - .name = "pit", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = init_pit_timer, - .set_next_event = pit_next_event, - .shift = 32, - .irq = 0, -}; - -/* - * Initialize the conversion factor and the min/max deltas of the clock event - * structure and register the clock event source with the framework. - */ -void __init setup_pit_timer(void) -{ - /* - * Start pit with the boot cpu mask and make it global after the - * IO_APIC has been initialized. - */ - pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); - pit_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFF, &pit_clockevent); - pit_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &pit_clockevent); - clockevents_register_device(&pit_clockevent); - global_clock_event = &pit_clockevent; -} - -/* - * Since the PIT overflows every tick, its not very useful - * to just read by itself. So use jiffies to emulate a free - * running counter: - */ -static cycle_t pit_read(void) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - spin_lock_irqsave(&i8253_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * It's possible for count to appear to go the wrong way for a - * couple of reasons: - * - * 1. The timer counter underflows, but we haven't handled the - * resulting interrupt and incremented jiffies yet. - * 2. Hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - * - * Previous attempts to handle these cases intelligently were - * buggy, so we just do the simple thing now. - */ - if (count > old_count && jifs == old_jifs) { - count = old_count; - } - old_count = count; - old_jifs = jifs; - - spin_unlock_irqrestore(&i8253_lock, flags); - - count = (LATCH - 1) - count; - - return (cycle_t)(jifs * LATCH) + count; -} - -static struct clocksource clocksource_pit = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 20, -}; - -static int __init init_pit_clocksource(void) -{ - if (num_possible_cpus() > 1) /* PIT does not scale! */ - return 0; - - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); - return clocksource_register(&clocksource_pit); -} -arch_initcall(init_pit_clocksource); diff --git a/arch/i386/kernel/i8259_32.c b/arch/i386/kernel/i8259_32.c deleted file mode 100644 index 0499cbe9871..00000000000 --- a/arch/i386/kernel/i8259_32.c +++ /dev/null @@ -1,420 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_p - this has to work on a wide range of PC hardware. - */ - outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - extern void math_error(void __user *); - outb(0,0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->eip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; - if (vector != SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - - /* setup after call gates are initialised (usually add in - * the architecture specific gates) - */ - intr_init_hook(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/i386/kernel/init_task_32.c b/arch/i386/kernel/init_task_32.c deleted file mode 100644 index d26fc063a76..00000000000 --- a/arch/i386/kernel/init_task_32.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - diff --git a/arch/i386/kernel/io_apic_32.c b/arch/i386/kernel/io_apic_32.c deleted file mode 100644 index e2f4a1c6854..00000000000 --- a/arch/i386/kernel/io_apic_32.c +++ /dev/null @@ -1,2847 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "io_ports.h" - -int (*ioapic_renumber_irq)(int ioapic, int irq); -atomic_t irq_mis_count; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int timer_over_8254 __initdata = 1; - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -static int disable_timer_pin_1 __initdata; - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -static struct irq_pin_list { - int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - volatile struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - static int first_free_entry = NR_IRQS; - struct irq_pin_list *entry = irq_2_pin + irq; - - while (entry->next) - entry = irq_2_pin + entry->next; - - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); - } - entry->apic = apic; - entry->pin = pin; -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - - while (1) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - } - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; - - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, 0x00010000); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) -{ - unsigned long flags; - int pin; - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int apicid_value; - cpumask_t tmp; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); - - apicid_value = cpu_mask_to_apicid(cpumask); - /* Prepare to do the io_apic_write */ - apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - irq_desc[irq].affinity = cpumask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#if defined(CONFIG_IRQBALANCE) -# include /* kernel_thread() */ -# include /* kstat */ -# include /* kmalloc() */ -# include /* time_after() */ - -#define IRQBALANCE_CHECK_ARCH -999 -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; -static int physical_balance __read_mostly; -static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; - -static struct irq_cpu_info { - unsigned long * last_irq; - unsigned long * irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) - -static cpumask_t balance_irq_affinity[NR_IRQS] = { - [0 ... NR_IRQS-1] = CPU_MASK_ALL -}; - -void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ - balance_irq_affinity[irq] = mask; -} - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu,now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) { - set_pending_irq(irq, cpumask_of_cpu(new_cpu)); - } -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - - for_each_online_cpu(i) { - for (j = 0; j < NR_IRQS; j++) { - if (!irq_desc[j].action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - - for_each_possible_cpu(i) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ or balancing disabled ? */ - if (!irq_desc[j].action || irq_balancing_disabled(j)) - continue; - if ( package_index == i ) - IRQ_DELTA(package_index,j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i,j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i,j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index,j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) - goto not_worth_the_effort; - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { - /* Is this an active IRQ? */ - if (!irq_desc[j].action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded,j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded,j)) { - move_this_load = IRQ_DELTA(max_loaded,j); - selected_irq = j; - } - } - if (selected_irq == -1) { - goto tryanothercpu; - } - - imbalance = move_this_load; - - /* For physical_balance case, we accumlated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, - cpu_online_map, - balance_irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - /* mark for change destination */ - set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); - set_pending_irq(i, cpumask_of_cpu(0)); - } - - set_freezable(); - for ( ; ; ) { - time_remaining = schedule_timeout_interruptible(time_remaining); - try_to_freeze(); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - preempt_disable(); - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - preempt_enable(); - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); - irq_cpu_data[i].irq_delta = NULL; - kfree(irq_cpu_data[i].last_irq); - irq_cpu_data[i].last_irq = NULL; - } - return 0; -} - -int __devinit irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 1; -} - -__setup("noirqbalance", irqbalance_disable); - -late_initcall(balanced_irq_init); -#endif /* CONFIG_IRQBALANCE */ -#endif /* CONFIG_SMP */ - -#ifndef CONFIG_SMP -void fastcall send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP */ - - -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; -int skip_ioapic_setup; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) - - return mp_irqs[i].mpc_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " - "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) - break; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && - !mp_irqs[i].mpc_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) -#define default_EISA_polarity(idx) (0) - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) (0) - -static int __init MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mpc_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - polarity = default_ISA_polarity(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - polarity = default_EISA_polarity(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - polarity = default_PCI_polarity(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - polarity = default_MCA_polarity(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - trigger = default_ISA_trigger(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - trigger = default_PCI_trigger(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mpc_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - case MP_BUS_EISA: - case MP_BUS_MCA: - { - irq = mp_irqs[idx].mpc_srcbusirq; - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - - break; - } - default: - { - printk(KERN_ERR "unknown bus type %d.\n",bus); - irq = 0; - break; - } - } - - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } - return irq; -} - -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; - -static int __assign_irq_vector(int irq) -{ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - int vector, offset, i; - - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - - if (irq_vector[irq] > 0) - return irq_vector[irq]; - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (vector == current_vector) - return -ENOSPC; - if (vector == SYSCALL_VECTOR) - goto next; - for (i = 0; i < NR_IRQ_VECTORS; i++) - if (irq_vector[i] == vector) - goto next; - - current_vector = vector; - current_offset = offset; - irq_vector[irq] = vector; - - return vector; -} - -static int assign_irq_vector(int irq) -{ - unsigned long flags; - int vector; - - spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); - - return vector; -} -static struct irq_chip ioapic_chip; - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) -{ - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { - irq_desc[irq].status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); - } - set_intr_gate(vector, interrupt[irq]); -} - -static void __init setup_IO_APIC_irqs(void) -{ - struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; - unsigned long flags; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = - cpu_mask_to_apicid(TARGET_CPUS); - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, - pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); - continue; - } - - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if (multi_timer_check(apic, irq)) - continue; - else - add_pin_to_irq(irq, apic, pin); - - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the 8259A-master output pin: - */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - - memset(&entry,0,sizeof(entry)); - - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... - */ - irq_desc[0].chip = &ioapic_chip; - set_irq_handler(0, handle_edge_irq); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); -} - -void __init print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) - continue; - printk(KERN_DEBUG "IRQ%d ", i); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -#if 0 - -static void print_APIC_bitfield (int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -void print_all_local_APICs (void) -{ - on_each_cpu(print_local_APIC, NULL, 1, 1); -} - -void /*__init*/ print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -#endif /* 0 */ - -static void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int i, apic; - unsigned long flags; - - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(apic_read(APIC_ID)); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -#ifndef CONFIG_X86_NUMAQ -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mpc_apicid; - - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mpc_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#else -static void __init setup_ioapic_ids_from_mpc(void) { } -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - - if (no_timer_check) - return 1; - - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - if (jiffies - t1 > 4) - return 1; - - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Startup quirk: - * - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - * - * (We do this for level-triggered IRQs too - it cannot hurt.) - */ -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -static void ack_ioapic_irq(unsigned int irq) -{ - move_native_irq(irq); - ack_APIC_irq(); -} - -static void ack_ioapic_quirk_irq(unsigned int irq) -{ - unsigned long v; - int i; - - move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_vector[irq]; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_vector[irq]); - - return 1; -} - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_ioapic_irq, - .eoi = ack_ioapic_quirk_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - - -static inline void init_IO_APIC_traps(void) -{ - int irq; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for (irq = 0; irq < NR_IRQS ; irq++) { - int tmp = irq; - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else - /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void ack_apic(unsigned int irq) -{ - ack_APIC_irq(); -} - -static void mask_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC-edge", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .eoi = ack_apic, -}; - -static void setup_nmi (void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -int timer_uses_ioapic_pin_0; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - */ -static inline void __init check_timer(void) -{ - int apic1, pin1, apic2, pin2; - int vector; - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); - - /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. - */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); - timer_ack = 1; - if (timer_over_8254 > 0) - enable_8259A_irq(0); - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - if (pin1 == 0) - timer_uses_ioapic_pin_0 = 1; - - printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " - "IO-APIC\n"); - } - - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); - if (pin2 != -1) { - printk("\n..... (found pin %d) ...", pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - printk("works.\n"); - if (pin1 != -1) - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - else - add_pin_to_irq(0, apic2, pin2); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } - return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); - } - printk(" failed.\n"); - - if (nmi_watchdog == NMI_IO_APIC) { - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; - } - - printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - - disable_8259A_irq(0); - set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, - "fasteoi"); - apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); - printk(" failed.\n"); - - printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); - - timer_ack = 0; - init_8259A(0); - make_8259A_irq(0); - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - printk(" failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option"); -} - -/* - * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. - * Linux doesn't really care, as it's not actually used - * for any interrupt handling anyway. - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - enable_IO_APIC(); - - if (acpi_ioapic) - io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ - else - io_apic_irqs = ~PIC_IRQS; - - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); -} - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if(sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) - entry[i] = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - set_kset_name("ioapic"), - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error = 0; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - memset(mp_ioapic_data[i], 0, size); - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -int create_irq(void) -{ - /* Allocate an unused irq */ - int irq, new, vector = 0; - unsigned long flags; - - irq = -ENOSPC; - spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { - if (platform_legacy_irq(new)) - continue; - if (irq_vector[new] != 0) - continue; - vector = __assign_irq_vector(new); - if (likely(vector > 0)) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq >= 0) { - set_intr_gate(vector, interrupt[irq]); - dynamic_irq_init(irq); - } - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - - spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI mesage composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - int vector; - unsigned dest; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - dest = cpu_mask_to_apicid(TARGET_CPUS); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); - } - return vector; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - int vector; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - vector = assign_irq_vector(irq); - if (vector < 0) - return; - - dest = cpu_mask_to_apicid(mask); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; -} -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - struct msi_msg msg; - int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, - "edge"); - - return 0; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#endif /* CONFIG_PCI_MSI */ - -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - unsigned int dest; - cpumask_t tmp; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); - - dest = cpu_mask_to_apicid(mask); - - target_ht_irq(irq, dest); - irq_desc[irq].affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - int vector; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - struct ht_irq_msg msg; - unsigned dest; - cpumask_t tmp; - - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return vector; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -int __init io_apic_get_unique_id (int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - - -int __init io_apic_get_version (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} - - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) -{ - struct IO_APIC_route_entry entry; - unsigned long flags; - - if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - entry.vector = assign_irq_vector(irq); - - apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(ioapic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return 0; -} - -#endif /* CONFIG_ACPI */ - -static int __init parse_disable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); - -static int __init parse_enable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = -1; - return 0; -} -early_param("enable_timer_pin_1", parse_enable_timer_pin_1); - -static int __init parse_noapic(char *arg) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/ioport_32.c b/arch/i386/kernel/ioport_32.c deleted file mode 100644 index 3d310a946d7..00000000000 --- a/arch/i386/kernel/ioport_32.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * linux/arch/i386/kernel/ioport.c - * - * This contains the io-permission bitmap code - written by obz, with changes - * by Linus. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) -{ - unsigned long mask; - unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); - unsigned int low_index = base & (BITS_PER_LONG-1); - int length = low_index + extent; - - if (low_index != 0) { - mask = (~0UL << low_index); - if (length < BITS_PER_LONG) - mask &= ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - length -= BITS_PER_LONG; - } - - mask = (new_value ? ~0UL : 0UL); - while (length >= BITS_PER_LONG) { - *bitmap_base++ = mask; - length -= BITS_PER_LONG; - } - - if (length > 0) { - mask = ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - } -} - - -/* - * this changes the io permissions bitmap in the current task. - */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - unsigned long i, max_long, bytes, bytes_updated; - struct thread_struct * t = ¤t->thread; - struct tss_struct * tss; - unsigned long *bitmap; - - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) - return -EINVAL; - if (turn_on && !capable(CAP_SYS_RAWIO)) - return -EPERM; - - /* - * If it's the first ioperm() call in this thread's lifetime, set the - * IO bitmap up. ioperm() is much less timing critical than clone(), - * this is why we delay this operation until now: - */ - if (!t->io_bitmap_ptr) { - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!bitmap) - return -ENOMEM; - - memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); - } - - /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: - */ - tss = &per_cpu(init_tss, get_cpu()); - - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); - - /* - * Search for a (possibly new) maximum. This is simple and stupid, - * to keep it obviously correct: - */ - max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) - max_long = i; - - bytes = (max_long + 1) * sizeof(long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; - - /* - * Sets the lazy trigger so that the next I/O operation will - * reload the correct bitmap. - * Reset the owner so that a process switch will not set - * tss->io_bitmap_base to IO_BITMAP_OFFSET. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; - tss->io_bitmap_owner = NULL; - - put_cpu(); - - return 0; -} - -/* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. - * - * Here we just change the eflags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. - */ - -asmlinkage long sys_iopl(unsigned long unused) -{ - volatile struct pt_regs * regs = (struct pt_regs *) &unused; - unsigned int level = regs->ebx; - unsigned int old = (regs->eflags >> 12) & 3; - struct thread_struct *t = ¤t->thread; - - if (level > 3) - return -EINVAL; - /* Trying to gain more privileges? */ - if (level > old) { - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - } - t->iopl = level << 12; - regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; - set_iopl_mask(t->iopl); - return 0; -} diff --git a/arch/i386/kernel/irq_32.c b/arch/i386/kernel/irq_32.c deleted file mode 100644 index dd2b97fc00b..00000000000 --- a/arch/i386/kernel/irq_32.c +++ /dev/null @@ -1,343 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the lowest level x86-specific interrupt - * entry, irq-stacks and irq statistics code. All the remaining - * irq logic is done by the generic kernel/irq/ code and - * by the x86-specific irq controller code. (e.g. i8259.c and - * io_apic.c.) - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif -} - -#ifdef CONFIG_4KSTACKS -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -}; - -static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; -static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -#endif - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - int irq = ~regs->orig_eax; - struct irq_desc *desc = irq_desc + irq; -#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx, *irqctx; - u32 *isp; -#endif - - if (unlikely((unsigned)irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __FUNCTION__, irq); - BUG(); - } - - old_regs = set_irq_regs(regs); - irq_enter(); -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 1KB free? */ - { - long esp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (THREAD_SIZE - 1)); - if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - -#ifdef CONFIG_4KSTACKS - - curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[smp_processor_id()]; - - /* - * this is where we switch to the IRQ stack. However, if we are - * already using the IRQ stack (because we interrupted a hardirq - * handler) we can't do that and just have to keep using the - * current stack (which is the irq stack already after all) - */ - if (curctx != irqctx) { - int arg1, arg2, ebx; - - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) - : "0" (irq), "1" (desc), "2" (isp), - "D" (desc->handle_irq) - : "memory", "cc" - ); - } else -#endif - desc->handle_irq(irq, desc); - - irq_exit(); - set_irq_regs(old_regs); - return 1; -} - -#ifdef CONFIG_4KSTACKS - -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -/* - * allocate per-cpu stacks for hardirq and for softirq processing - */ -void irq_ctx_init(int cpu) -{ - union irq_ctx *irqctx; - - if (hardirq_ctx[cpu]) - return; - - irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - hardirq_ctx[cpu] = irqctx; - - irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - softirq_ctx[cpu] = irqctx; - - printk("CPU %u irqstacks, hard=%p soft=%p\n", - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); -} - -void irq_ctx_exit(int cpu) -{ - hardirq_ctx[cpu] = NULL; -} - -extern asmlinkage void __do_softirq(void); - -asmlinkage void do_softirq(void) -{ - unsigned long flags; - struct thread_info *curctx; - union irq_ctx *irqctx; - u32 *isp; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - curctx = current_thread_info(); - irqctx = softirq_ctx[smp_processor_id()]; - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_softirq \n" - " movl %%ebx,%%esp \n" - : "=b"(isp) - : "0"(isp) - : "memory", "cc", "edx", "ecx", "eax" - ); - /* - * Shouldnt happen, we returned above if in_interrupt(): - */ - WARN_ON_ONCE(softirq_count()); - } - - local_irq_restore(flags); -} - -EXPORT_SYMBOL(do_softirq); -#endif - -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; - if (!action) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); - seq_putc(p, '\n'); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -#include - -void fixup_irqs(cpumask_t map) -{ - unsigned int irq; - static int warned; - - for (irq = 0; irq < NR_IRQS; irq++) { - cpumask_t mask; - if (irq == 2) - continue; - - cpus_and(mask, irq_desc[irq].affinity, map); - if (any_online_cpu(mask) == NR_CPUS) { - printk("Breaking affinity for irq %i\n", irq); - mask = map; - } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/i386/kernel/kprobes_32.c b/arch/i386/kernel/kprobes_32.c deleted file mode 100644 index 448a50b1324..00000000000 --- a/arch/i386/kernel/kprobes_32.c +++ /dev/null @@ -1,751 +0,0 @@ -/* - * Kernel Probes (KProbes) - * arch/i386/kernel/kprobes.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2002-Oct Created by Vamsi Krishna S Kernel - * Probes initial implementation ( includes contributions from - * Rusty Russell). - * 2004-July Suparna Bhattacharya added jumper probes - * interface to access function arguments. - * 2005-May Hien Nguyen , Jim Keniston - * and Prasanna S Panchamukhi - * added function-return probes. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -void jprobe_return_end(void); - -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; -DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -/* insert a jmp code */ -static __always_inline void set_jmp_op(void *from, void *to) -{ - struct __arch_jmp_op { - char op; - long raddr; - } __attribute__((packed)) *jop; - jop = (struct __arch_jmp_op *)from; - jop->raddr = (long)(to) - ((long)(from) + 5); - jop->op = RELATIVEJUMP_INSTRUCTION; -} - -/* - * returns non-zero if opcodes can be boosted. - */ -static __always_inline int can_boost(kprobe_opcode_t *opcodes) -{ -#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ - (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ - (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ - (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ - (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ - << (row % 32)) - /* - * Undefined/reserved opcodes, conditional jump, Opcode Extension - * Groups, and some special opcodes can not be boost. - */ - static const unsigned long twobyte_is_boostable[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ------------------------------- */ - W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */ - W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ - W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ - W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ - W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ - W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ - W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */ - W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ - W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ - W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */ - W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */ - W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */ - W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */ - W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */ - W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */ - /* ------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - }; -#undef W - kprobe_opcode_t opcode; - kprobe_opcode_t *orig_opcodes = opcodes; -retry: - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - opcode = *(opcodes++); - - /* 2nd-byte opcode */ - if (opcode == 0x0f) { - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - return test_bit(*opcodes, twobyte_is_boostable); - } - - switch (opcode & 0xf0) { - case 0x60: - if (0x63 < opcode && opcode < 0x67) - goto retry; /* prefixes */ - /* can't boost Address-size override and bound */ - return (opcode != 0x62 && opcode != 0x67); - case 0x70: - return 0; /* can't boost conditional jump */ - case 0xc0: - /* can't boost software-interruptions */ - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; - case 0xd0: - /* can boost AA* and XLAT */ - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); - case 0xe0: - /* can boost in/out and absolute jmps */ - return ((opcode & 0x04) || opcode == 0xea); - case 0xf0: - if ((opcode & 0x0c) == 0 && opcode != 0xf1) - goto retry; /* lock/rep(ne) prefix */ - /* clear and set flags can be boost */ - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); - default: - if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) - goto retry; /* prefixes */ - /* can't boost CS override and call */ - return (opcode != 0x2e && opcode != 0x9a); - } -} - -/* - * returns non-zero if opcode modifies the interrupt flag. - */ -static int __kprobes is_IF_modifier(kprobe_opcode_t opcode) -{ - switch (opcode) { - case 0xfa: /* cli */ - case 0xfb: /* sti */ - case 0xcf: /* iret/iretd */ - case 0x9d: /* popf/popfd */ - return 1; - } - return 0; -} - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - /* insn: must be on special executable page on i386. */ - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) - return -ENOMEM; - - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; - if (can_boost(p->addr)) { - p->ainsn.boostable = 0; - } else { - p->ainsn.boostable = -1; - } - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); -} - -void __kprobes arch_disarm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, &p->opcode, 1); -} - -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - mutex_unlock(&kprobe_mutex); -} - -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; - kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; -} - -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; - kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; -} - -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - __get_cpu_var(current_kprobe) = p; - kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kcb->kprobe_saved_eflags &= ~IF_MASK; -} - -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) -{ - regs->eflags |= TF_MASK; - regs->eflags &= ~IF_MASK; - /*single step inline if the instruction is an int3*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->eip = (unsigned long)p->addr; - else - regs->eip = (unsigned long)p->ainsn.insn; -} - -/* Called with kretprobe_lock held */ -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - unsigned long *sara = (unsigned long *)®s->esp; - - ri->ret_addr = (kprobe_opcode_t *) *sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &kretprobe_trampoline; -} - -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. - */ -static int __kprobes kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *p; - int ret = 0; - kprobe_opcode_t *addr; - struct kprobe_ctlblk *kcb; - - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); - - /* - * We don't want to be preempted for the entire - * duration of kprobe processing - */ - preempt_disable(); - kcb = get_kprobe_ctlblk(); - - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - if (kcb->kprobe_status == KPROBE_HIT_SS && - *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->eflags &= ~TF_MASK; - regs->eflags |= kcb->kprobe_saved_eflags; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. - */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_REENTER; - return 1; - } else { - if (*addr != BREAKPOINT_INSTRUCTION) { - /* The breakpoint instruction was removed by - * another cpu right after we hit, no further - * handling of this interrupt is appropriate - */ - regs->eip -= sizeof(kprobe_opcode_t); - ret = 1; - goto no_kprobe; - } - p = __get_cpu_var(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; - } - } - goto no_kprobe; - } - - p = get_kprobe(addr); - if (!p) { - if (*addr != BREAKPOINT_INSTRUCTION) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->eip -= sizeof(kprobe_opcode_t); - ret = 1; - } - /* Not one of ours: let kernel handle it */ - goto no_kprobe; - } - - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - - if (p->pre_handler && p->pre_handler(p, regs)) - /* handler has already set things up, so skip ss setup */ - return 1; - -ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) - if (p->ainsn.boostable == 1 && !p->post_handler){ - /* Boost up -- we can execute copied instructions directly */ - reset_current_kprobe(); - regs->eip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); - return 1; - } -#endif - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_HIT_SS; - return 1; - -no_kprobe: - preempt_enable_no_resched(); - return ret; -} - -/* - * For function-return probes, init_kprobes() establishes a probepoint - * here. When a retprobed function returns, this probe is hit and - * trampoline_probe_handler() runs, calling the kretprobe's handler. - */ - void __kprobes kretprobe_trampoline_holder(void) - { - asm volatile ( ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" - " pushf\n" - /* skip cs, eip, orig_eax */ - " subl $12, %esp\n" - " pushl %fs\n" - " pushl %ds\n" - " pushl %es\n" - " pushl %eax\n" - " pushl %ebp\n" - " pushl %edi\n" - " pushl %esi\n" - " pushl %edx\n" - " pushl %ecx\n" - " pushl %ebx\n" - " movl %esp, %eax\n" - " call trampoline_handler\n" - /* move eflags to cs */ - " movl 52(%esp), %edx\n" - " movl %edx, 48(%esp)\n" - /* save true return address on eflags */ - " movl %eax, 52(%esp)\n" - " popl %ebx\n" - " popl %ecx\n" - " popl %edx\n" - " popl %esi\n" - " popl %edi\n" - " popl %ebp\n" - " popl %eax\n" - /* skip eip, orig_eax, es, ds, fs */ - " addl $20, %esp\n" - " popf\n" - " ret\n"); -} - -/* - * Called from kretprobe_trampoline - */ -fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) -{ - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; - - INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); - /* fixup registers */ - regs->xcs = __KERNEL_CS | get_kernel_rpl(); - regs->eip = trampoline_address; - regs->orig_eax = 0xffffffff; - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - if (ri->rp && ri->rp->handler){ - __get_cpu_var(current_kprobe) = &ri->rp->kp; - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; - ri->rp->handler(ri, regs); - __get_cpu_var(current_kprobe) = NULL; - } - - orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); - - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } - return (void*)orig_ret_address; -} - -/* - * Called after single-stepping. p->addr is the address of the - * instruction whose first byte has been replaced by the "int 3" - * instruction. To avoid the SMP problems that can occur when we - * temporarily put back the original opcode to single-step, we - * single-stepped a copy of the instruction. The address of this - * copy is p->ainsn.insn. - * - * This function prepares to return from the post-single-step - * interrupt. We have to fix up the stack as follows: - * - * 0) Except in the case of absolute or indirect jump or call instructions, - * the new eip is relative to the copied instruction. We need to make - * it relative to the original instruction. - * - * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed eflags, and may need to be cleared. - * - * 2) If the single-stepped instruction was a call, the return address - * that is atop the stack is the address following the copied instruction. - * We need to make it the address following the original instruction. - * - * This function also checks instruction size for preparing direct execution. - */ -static void __kprobes resume_execution(struct kprobe *p, - struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - unsigned long *tos = (unsigned long *)®s->esp; - unsigned long copy_eip = (unsigned long)p->ainsn.insn; - unsigned long orig_eip = (unsigned long)p->addr; - - regs->eflags &= ~TF_MASK; - switch (p->ainsn.insn[0]) { - case 0x9c: /* pushfl */ - *tos &= ~(TF_MASK | IF_MASK); - *tos |= kcb->kprobe_old_eflags; - break; - case 0xc2: /* iret/ret/lret */ - case 0xc3: - case 0xca: - case 0xcb: - case 0xcf: - case 0xea: /* jmp absolute -- eip is correct */ - /* eip is already adjusted, no more changes required */ - p->ainsn.boostable = 1; - goto no_change; - case 0xe8: /* call relative - Fix return addr */ - *tos = orig_eip + (*tos - copy_eip); - break; - case 0x9a: /* call absolute -- same as call absolute, indirect */ - *tos = orig_eip + (*tos - copy_eip); - goto no_change; - case 0xff: - if ((p->ainsn.insn[1] & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; eip is correct. - * But this is not boostable - */ - *tos = orig_eip + (*tos - copy_eip); - goto no_change; - } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ - ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. And this is boostable */ - p->ainsn.boostable = 1; - goto no_change; - } - default: - break; - } - - if (p->ainsn.boostable == 0) { - if ((regs->eip > copy_eip) && - (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { - /* - * These instructions can be executed directly if it - * jumps back to correct address. - */ - set_jmp_op((void *)regs->eip, - (void *)orig_eip + (regs->eip - copy_eip)); - p->ainsn.boostable = 1; - } else { - p->ainsn.boostable = -1; - } - } - - regs->eip = orig_eip + (regs->eip - copy_eip); - -no_change: - return; -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. - */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - resume_execution(cur, regs, kcb); - regs->eflags |= kcb->kprobe_saved_eflags; - - /*Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, eflags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (regs->eflags & TF_MASK) - return 0; - - return 1; -} - -static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - switch(kcb->kprobe_status) { - case KPROBE_HIT_SS: - case KPROBE_REENTER: - /* - * We are here because the instruction being single - * stepped caused a page fault. We reset the current - * kprobe and the eip points back to the probe address - * and allow the page fault handler to continue as a - * normal page fault. - */ - regs->eip = (unsigned long)cur->addr; - regs->eflags |= kcb->kprobe_old_eflags; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); - preempt_enable_no_resched(); - break; - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* - * In case the user-specified fault handler returned - * zero, try to fix up. - */ - if (fixup_exception(regs)) - return 1; - - /* - * fixup_exception() could not handle it, - * Let do_page_fault() fix it. - */ - break; - default: - break; - } - return 0; -} - -/* - * Wrapper routine to for handling exceptions. - */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode_vm(args->regs)) - return ret; - - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_GPF: - case DIE_PAGE_FAULT: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - ret = NOTIFY_STOP; - preempt_enable(); - break; - default: - break; - } - return ret; -} - -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_esp = ®s->esp; - addr = (unsigned long)(kcb->jprobe_saved_esp); - - /* - * TBD: As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. - */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); - regs->eflags &= ~IF_MASK; - regs->eip = (unsigned long)(jp->entry); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - asm volatile (" xchgl %%ebx,%%esp \n" - " int3 \n" - " .globl jprobe_return_end \n" - " jprobe_return_end: \n" - " nop \n"::"b" - (kcb->jprobe_saved_esp):"memory"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->eip - 1); - unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); - struct jprobe *jp = container_of(p, struct jprobe, kp); - - if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (®s->esp != kcb->jprobe_saved_esp) { - struct pt_regs *saved_regs = - container_of(kcb->jprobe_saved_esp, - struct pt_regs, esp); - printk("current esp %p does not match saved esp %p\n", - ®s->esp, kcb->jprobe_saved_esp); - printk("Saved registers for jprobe %p\n", jp); - show_registers(saved_regs); - printk("Current registers\n"); - show_registers(regs); - BUG(); - } - *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __kprobes arch_trampoline_kprobe(struct kprobe *p) -{ - return 0; -} - -int __init arch_init_kprobes(void) -{ - return 0; -} diff --git a/arch/i386/kernel/ldt_32.c b/arch/i386/kernel/ldt_32.c deleted file mode 100644 index e0b2d17f4f1..00000000000 --- a/arch/i386/kernel/ldt_32.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * linux/arch/i386/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - down(&mm->context.sem); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - up(&mm->context.sem); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - - err = 0; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (clear_user(ptr, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); - error = 0; - -out_unlock: - up(&mm->context.sem); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff --git a/arch/i386/kernel/machine_kexec_32.c b/arch/i386/kernel/machine_kexec_32.c deleted file mode 100644 index 91966bafb3d..00000000000 --- a/arch/i386/kernel/machine_kexec_32.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * machine_kexec.c - handle transition of Linux booting another kernel - * Copyright (C) 2002-2005 Eric Biederman - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u32 kexec_pgd[1024] PAGE_ALIGNED; -#ifdef CONFIG_X86_PAE -static u32 kexec_pmd0[1024] PAGE_ALIGNED; -static u32 kexec_pmd1[1024] PAGE_ALIGNED; -#endif -static u32 kexec_pte0[1024] PAGE_ALIGNED; -static u32 kexec_pte1[1024] PAGE_ALIGNED; - -static void set_idt(void *newidt, __u16 limit) -{ - struct Xgt_desc_struct curidt; - - /* ia32 supports unaliged loads & stores */ - curidt.size = limit; - curidt.address = (unsigned long)newidt; - - load_idt(&curidt); -}; - - -static void set_gdt(void *newgdt, __u16 limit) -{ - struct Xgt_desc_struct curgdt; - - /* ia32 supports unaligned loads & stores */ - curgdt.size = limit; - curgdt.address = (unsigned long)newgdt; - - load_gdt(&curgdt); -}; - -static void load_segments(void) -{ -#define __STR(X) #X -#define STR(X) __STR(X) - - __asm__ __volatile__ ( - "\tljmp $"STR(__KERNEL_CS)",$1f\n" - "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%%eax\n" - "\tmovl %%eax,%%ds\n" - "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" - "\tmovl %%eax,%%ss\n" - ::: "eax", "memory"); -#undef STR -#undef __STR -} - -/* - * A architecture hook called to validate the - * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE - * have been allocated, but the segments have yet - * been copied into the kernel. - * - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - * - * Currently nothing. - */ -int machine_kexec_prepare(struct kimage *image) -{ - return 0; -} - -/* - * Undo anything leftover by machine_kexec_prepare - * when an image is freed. - */ -void machine_kexec_cleanup(struct kimage *image) -{ -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -NORET_TYPE void machine_kexec(struct kimage *image) -{ - unsigned long page_list[PAGES_NR]; - void *control_page; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - - control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE); - - page_list[PA_CONTROL_PAGE] = __pa(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = __pa(kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; -#ifdef CONFIG_X86_PAE - page_list[PA_PMD_0] = __pa(kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PMD_1] = __pa(kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; -#endif - page_list[PA_PTE_0] = __pa(kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PTE_1] = __pa(kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - - /* The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is - * set to a specific selector, the invisible part is loaded - * with from a table in memory. At no other time is the - * descriptor table in memory accessed. - * - * I take advantage of this here by force loading the - * segments, before I zap the gdt with an invalid value. - */ - load_segments(); - /* The gdt & idt are now invalid. - * If you want to load them you must set up your own idt & gdt. - */ - set_gdt(phys_to_virt(0),0); - set_idt(phys_to_virt(0),0); - - /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae); -} - -/* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never sets it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ -static int __init parse_crashkernel(char *arg) -{ - unsigned long size, base; - size = memparse(arg, &arg); - if (*arg == '@') { - base = memparse(arg+1, &arg); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - return 0; -} -early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/mca_32.c b/arch/i386/kernel/mca_32.c deleted file mode 100644 index b83672b8952..00000000000 --- a/arch/i386/kernel/mca_32.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * linux/arch/i386/kernel/mca.c - * Written by Martin Kolinek, February 1996 - * - * Changes: - * - * Chris Beauregard July 28th, 1996 - * - Fixed up integrated SCSI detection - * - * Chris Beauregard August 3rd, 1996 - * - Made mca_info local - * - Made integrated registers accessible through standard function calls - * - Added name field - * - More sanity checking - * - * Chris Beauregard August 9th, 1996 - * - Rewrote /proc/mca - * - * Chris Beauregard January 7th, 1997 - * - Added basic NMI-processing - * - Added more information to mca_info structure - * - * David Weinehall October 12th, 1998 - * - Made a lot of cleaning up in the source - * - Added use of save_flags / restore_flags - * - Added the 'driver_loaded' flag in MCA_adapter - * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter - * - * David Weinehall March 24th, 1999 - * - Fixed the output of 'Driver Installed' in /proc/mca/pos - * - Made the Integrated Video & SCSI show up even if they have id 0000 - * - * Alexander Viro November 9th, 1999 - * - Switched to regular procfs methods - * - * Alfred Arnold & David Weinehall August 23rd, 2000 - * - Added support for Planar POS-registers - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned char which_scsi = 0; - -int MCA_bus = 0; -EXPORT_SYMBOL(MCA_bus); - -/* - * Motherboard register spinlock. Untested on SMP at the moment, but - * are there any MCA SMP boxes? - * - * Yes - Alan - */ -static DEFINE_SPINLOCK(mca_lock); - -/* Build the status info for the adapter */ - -static void mca_configure_adapter_status(struct mca_device *mca_dev) { - mca_dev->status = MCA_ADAPTER_NONE; - - mca_dev->pos_id = mca_dev->pos[0] - + (mca_dev->pos[1] << 8); - - if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { - - /* id = 0x0000 usually indicates hardware failure, - * however, ZP Gu (zpg@castle.net> reports that his 9556 - * has 0x0000 as id and everything still works. There - * also seem to be an adapter with id = 0x0000; the - * NCR Parallel Bus Memory Card. Until this is confirmed, - * however, this code will stay. - */ - - mca_dev->status = MCA_ADAPTER_ERROR; - - return; - } else if(mca_dev->pos_id != 0xffff) { - - /* 0xffff usually indicates that there's no adapter, - * however, some integrated adapters may have 0xffff as - * their id and still be valid. Examples are on-board - * VGA of the 55sx, the integrated SCSI of the 56 & 57, - * and possibly also the 95 ULTIMEDIA. - */ - - mca_dev->status = MCA_ADAPTER_NORMAL; - } - - if((mca_dev->pos_id == 0xffff || - mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { - int j; - - for(j = 2; j < 8; j++) { - if(mca_dev->pos[j] != 0xff) { - mca_dev->status = MCA_ADAPTER_NORMAL; - break; - } - } - } - - if(!(mca_dev->pos[2] & MCA_ENABLED)) { - - /* enabled bit is in POS 2 */ - - mca_dev->status = MCA_ADAPTER_DISABLED; - } -} /* mca_configure_adapter_status */ - -/*--------------------------------------------------------------------*/ - -static struct resource mca_standard_resources[] = { - { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" }, - { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" }, - { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" }, - { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" }, - { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" }, - { .start = 0x96, .end = 0x97, .name = "POS (MCA)" }, - { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } -}; - -#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) - -/** - * mca_read_and_store_pos - read the POS registers into a memory buffer - * @pos: a char pointer to 8 bytes, contains the POS register value on - * successful return - * - * Returns 1 if a card actually exists (i.e. the pos isn't - * all 0xff) or 0 otherwise - */ -static int mca_read_and_store_pos(unsigned char *pos) { - int j; - int found = 0; - - for(j=0; j<8; j++) { - if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) { - /* 0xff all across means no device. 0x00 means - * something's broken, but a device is - * probably there. However, if you get 0x00 - * from a motherboard register it won't matter - * what we find. For the record, on the - * 57SLC, the integrated SCSI adapter has - * 0xffff for the adapter ID, but nonzero for - * other registers. */ - - found = 1; - } - } - return found; -} - -static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) -{ - unsigned char byte; - unsigned long flags; - - if(reg < 0 || reg >= 8) - return 0; - - spin_lock_irqsave(&mca_lock, flags); - if(mca_dev->pos_register) { - /* Disable adapter setup, enable motherboard setup */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - } else { - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read the appropriate register */ - - outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG); - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - } - spin_unlock_irqrestore(&mca_lock, flags); - - mca_dev->pos[reg] = byte; - - return byte; -} - -static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, - unsigned char byte) -{ - unsigned long flags; - - if(reg < 0 || reg >= 8) - return; - - spin_lock_irqsave(&mca_lock, flags); - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read in the appropriate register */ - - outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG); - outb_p(byte, MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - - spin_unlock_irqrestore(&mca_lock, flags); - - /* Update the global register list, while we have the byte */ - - mca_dev->pos[reg] = byte; - -} - -/* for the primary MCA bus, we have identity transforms */ -static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq) -{ - return irq; -} - -static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port) -{ - return port; -} - -static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem) -{ - return mem; -} - - -static int __init mca_init(void) -{ - unsigned int i, j; - struct mca_device *mca_dev; - unsigned char pos[8]; - short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; - struct mca_bus *bus; - - /* WARNING: Be careful when making changes here. Putting an adapter - * and the motherboard simultaneously into setup mode may result in - * damage to chips (according to The Indispensible PC Hardware Book - * by Hans-Peter Messmer). Also, we disable system interrupts (so - * that we are not disturbed in the middle of this). - */ - - /* Make sure the MCA bus is present */ - - if (mca_system_init()) { - printk(KERN_ERR "MCA bus system initialisation failed\n"); - return -ENODEV; - } - - if (!MCA_bus) - return -ENODEV; - - printk(KERN_INFO "Micro Channel bus detected.\n"); - - /* All MCA systems have at least a primary bus */ - bus = mca_attach_bus(MCA_PRIMARY_BUS); - if (!bus) - goto out_nomem; - bus->default_dma_mask = 0xffffffffLL; - bus->f.mca_write_pos = mca_pc_write_pos; - bus->f.mca_read_pos = mca_pc_read_pos; - bus->f.mca_transform_irq = mca_dummy_transform_irq; - bus->f.mca_transform_ioport = mca_dummy_transform_ioport; - bus->f.mca_transform_memory = mca_dummy_transform_memory; - - /* get the motherboard device */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); - if(unlikely(!mca_dev)) - goto out_nomem; - - /* - * We do not expect many MCA interrupts during initialization, - * but let us be safe: - */ - spin_lock_irq(&mca_lock); - - /* Make sure adapter setup is off */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Read motherboard POS registers */ - - mca_dev->pos_register = 0x7f; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for a motherboard */ - mca_dev->pos_id = MCA_MOTHERBOARD_POS; - mca_dev->slot = MCA_MOTHERBOARD; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) - goto out_unlock_nomem; - - /* Put motherboard into video setup mode, read integrated video - * POS registers, and turn motherboard setup off. - */ - - mca_dev->pos_register = 0xdf; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for the integrated video */ - mca_dev->pos_id = MCA_INTEGVIDEO_POS; - mca_dev->slot = MCA_INTEGVIDEO; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - /* Put motherboard into scsi setup mode, read integrated scsi - * POS registers, and turn motherboard setup off. - * - * It seems there are two possible SCSI registers. Martin says that - * for the 56,57, 0xf7 is the one, but fails on the 76. - * Alfredo (apena@vnet.ibm.com) says - * 0xfd works on his machine. We'll try both of them. I figure it's - * a good bet that only one could be valid at a time. This could - * screw up though if one is used for something else on the other - * machine. - */ - - for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { - outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); - if(mca_read_and_store_pos(pos)) - break; - } - if(which_scsi) { - /* found a scsi card */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) - goto out_unlock_nomem; - - for(j = 0; j < 8; j++) - mca_dev->pos[j] = pos[j]; - - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for integrated SCSI controller */ - mca_dev->pos_id = MCA_INTEGSCSI_POS; - mca_dev->slot = MCA_INTEGSCSI; - mca_dev->pos_register = which_scsi; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - - /* Turn off motherboard setup */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Now loop over MCA slots: put each adapter into setup mode, and - * read its POS registers. Then put adapter setup off. - */ - - for(i=0; ipos[j]=pos[j]; - - mca_dev->driver_loaded = 0; - mca_dev->slot = i; - mca_dev->pos_register = 0; - mca_configure_adapter_status(mca_dev); - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Enable interrupts and return memory start */ - spin_unlock_irq(&mca_lock); - - for (i = 0; i < MCA_STANDARD_RESOURCES; i++) - request_resource(&ioport_resource, mca_standard_resources + i); - - mca_do_proc_init(); - - return 0; - - out_unlock_nomem: - spin_unlock_irq(&mca_lock); - out_nomem: - printk(KERN_EMERG "Failed memory allocation in MCA setup!\n"); - return -ENOMEM; -} - -subsys_initcall(mca_init); - -/*--------------------------------------------------------------------*/ - -static __kprobes void -mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) -{ - int slot = mca_dev->slot; - - if(slot == MCA_INTEGSCSI) { - printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", - mca_dev->name); - } else if(slot == MCA_INTEGVIDEO) { - printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", - mca_dev->name); - } else if(slot == MCA_MOTHERBOARD) { - printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", - mca_dev->name); - } - - /* More info available in POS 6 and 7? */ - - if(check_flag) { - unsigned char pos6, pos7; - - pos6 = mca_device_read_pos(mca_dev, 6); - pos7 = mca_device_read_pos(mca_dev, 7); - - printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7); - } - -} /* mca_handle_nmi_slot */ - -/*--------------------------------------------------------------------*/ - -static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) -{ - struct mca_device *mca_dev = to_mca_device(dev); - unsigned char pos5; - - pos5 = mca_device_read_pos(mca_dev, 5); - - if(!(pos5 & 0x80)) { - /* Bit 7 of POS 5 is reset when this adapter has a hardware - * error. Bit 7 it reset if there's error information - * available in POS 6 and 7. - */ - mca_handle_nmi_device(mca_dev, !(pos5 & 0x40)); - return 1; - } - return 0; -} - -void __kprobes mca_handle_nmi(void) -{ - /* First try - scan the various adapters and see if a specific - * adapter was responsible for the error. - */ - bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); - - mca_nmi_hook(); -} /* mca_handle_nmi */ diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c deleted file mode 100644 index 09cf7811035..00000000000 --- a/arch/i386/kernel/microcode.c +++ /dev/null @@ -1,850 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, - * Order Number 245472 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/245472.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -//#define DEBUG /* pr_debug */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "1.14a" - -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) -#define get_totalsize(mc) \ - (((microcode_t *)mc)->hdr.totalsize ? \ - ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) -#define get_datasize(mc) \ - (((microcode_t *)mc)->hdr.datasize ? \ - ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -static struct ucode_cpu_info { - int valid; - unsigned int sig; - unsigned int pf; - unsigned int rev; - microcode_t *mc; -} ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) -{ - struct cpuinfo_x86 *c = cpu_data + cpu_num; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - unsigned int val[2]; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc = NULL; - uci->valid = 1; - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); - uci->valid = 0; - return; - } - - uci->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); -} - -static inline int microcode_update_match(int cpu_num, - microcode_header_t *mc_header, int sig, int pf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) - return 0; - return 1; -} - -static int microcode_sanity_check(void *mc) -{ - microcode_header_t *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; - int sum, orig_sum, ext_sigcount = 0, i; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (struct extended_signature *)((void *)ext_header - + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i); - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - * return < 0 - error - */ -static int get_maching_microcode(void *mc, int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_header_t *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - void *new_mc; - - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; - - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = (struct extended_sigtable *)(mc + - get_datasize(mc_header) + MC_HEADER_SIZE); - ext_sigcount = ext_header->count; - ext_sig = (struct extended_signature *)((void *)ext_header - + EXT_HEADER_SIZE); - for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; - ext_sig++; - } - return 0; -find: - pr_debug("microcode: CPU %d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc); - - memcpy(new_mc, mc, total_size); - uci->mc = new_mc; - return 1; -} - -static void apply_microcode(int cpu) -{ - unsigned long flags; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (uci->mc == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d updated from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); - return; - } - pr_debug("microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); - uci->rev = val[1]; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) -{ - microcode_header_t mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed(current, old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - lock_cpu_hotplug(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) -#endif - -static long get_next_ucode_from_buffer(void **mc, void *buf, - unsigned long size, long offset) -{ - microcode_header_t *mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= size) - return 0; - mc_header = (microcode_header_t *)(buf + offset); - total_size = get_totalsize(mc_header); - - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; - } - - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; -} - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int cpu_request_microcode(int cpu) -{ - char name[30]; - struct cpuinfo_x86 *c = cpu_data + cpu; - const struct firmware *firmware; - void *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { - pr_debug("ucode data file %s load failed\n", name); - return error; - } - buf = (void *)firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_maching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); - } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; - release_firmware(firmware); - - return error; -} - -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed(current, old); - return err; -} - -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed(current, old); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - uci->valid = 0; - vfree(uci->mc); - uci->mc = NULL; - mutex_unlock(µcode_mutex); -} - -static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - - old = current->cpus_allowed; - - lock_cpu_hotplug(); - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); - set_cpus_allowed(current, old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("Microcode:CPU %d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("Microcode:CPU %d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("Microcode:CPU %d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "Microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - lock_cpu_hotplug(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - lock_cpu_hotplug(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) diff --git a/arch/i386/kernel/module_32.c b/arch/i386/kernel/module_32.c deleted file mode 100644 index 3db0a5442eb..00000000000 --- a/arch/i386/kernel/module_32.c +++ /dev/null @@ -1,152 +0,0 @@ -/* Kernel module help for i386. - Copyright (C) 2001 Rusty Russell. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -#include -#include -#include -#include -#include -#include -#include - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc_exec(size); -} - - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - -/* We don't need anything special. */ -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ - return 0; -} - -int apply_relocate(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; - Elf32_Sym *sym; - uint32_t *location; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf32_Sym *)sechdrs[symindex].sh_addr - + ELF32_R_SYM(rel[i].r_info); - - switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_386_32: - /* We add the value into the location given */ - *location += sym->st_value; - break; - case R_386_PC32: - /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; - break; - default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", - me->name, ELF32_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; -} - -int apply_relocate_add(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", - me->name); - return -ENOEXEC; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; - if (!strcmp(".altinstructions", secstrings + s->sh_name)) - alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; - } - - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); - } - if (locks && text) { - void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; - alternatives_smp_module_add(me, me->name, - lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); - } - - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - - return module_bug_finalize(hdr, sechdrs, me); -} - -void module_arch_cleanup(struct module *mod) -{ - alternatives_smp_module_del(mod); - module_bug_cleanup(mod); -} diff --git a/arch/i386/kernel/mpparse_32.c b/arch/i386/kernel/mpparse_32.c deleted file mode 100644 index 13abb4ebfb7..00000000000 --- a/arch/i386/kernel/mpparse_32.c +++ /dev/null @@ -1,1132 +0,0 @@ -/* - * Intel Multiprocessor Specification 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Fixes - * Erich Boleyn : MP v1.4 and additional changes. - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Maciej W. Rozycki: Bits for default MP configurations - * Paul Diefenbaugh: Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Have we found an MP table */ -int smp_found_config; -unsigned int __cpuinitdata maxcpus = NR_CPUS; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -int apic_version [MAX_APICS]; -int mp_bus_id_to_type [MAX_MP_BUSSES]; -int mp_bus_id_to_node [MAX_MP_BUSSES]; -int mp_bus_id_to_local [MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -static int mp_current_pci_id; - -/* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* MP IRQ source entries */ -int mp_irq_entries; - -int nr_ioapics; - -int pic_mode; -unsigned long mp_lapic_addr; - -unsigned int def_to_bigsmp = 0; - -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -/* Internal processor count */ -unsigned int __cpuinitdata num_processors; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; - -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - -/* - * Intel MP BIOS table parsing routines: - */ - - -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; - -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) -{ - int ver, apicid; - physid_mask_t phys_cpu; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - apicid = mpc_apic_id(m, translation_table[mpc_record]); - - if (m->mpc_featureflag&(1<<0)) - Dprintk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) - Dprintk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) - Dprintk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) - Dprintk(" Internal APIC present.\n"); - if (m->mpc_featureflag&(1<<11)) - Dprintk(" SEP present.\n"); - if (m->mpc_featureflag&(1<<12)) - Dprintk(" MTRR present.\n"); - if (m->mpc_featureflag&(1<<13)) - Dprintk(" PGE present.\n"); - if (m->mpc_featureflag&(1<<14)) - Dprintk(" MCA present.\n"); - if (m->mpc_featureflag&(1<<15)) - Dprintk(" CMOV present.\n"); - if (m->mpc_featureflag&(1<<16)) - Dprintk(" PAT present.\n"); - if (m->mpc_featureflag&(1<<17)) - Dprintk(" PSE present.\n"); - if (m->mpc_featureflag&(1<<18)) - Dprintk(" PSN present.\n"); - if (m->mpc_featureflag&(1<<19)) - Dprintk(" Cache Line Flush Instruction present.\n"); - /* 20 Reserved */ - if (m->mpc_featureflag&(1<<21)) - Dprintk(" Debug Trace and EMON Store present.\n"); - if (m->mpc_featureflag&(1<<22)) - Dprintk(" ACPI Thermal Throttle Registers present.\n"); - if (m->mpc_featureflag&(1<<23)) - Dprintk(" MMX present.\n"); - if (m->mpc_featureflag&(1<<24)) - Dprintk(" FXSR present.\n"); - if (m->mpc_featureflag&(1<<25)) - Dprintk(" XMM present.\n"); - if (m->mpc_featureflag&(1<<26)) - Dprintk(" Willamette New Instructions present.\n"); - if (m->mpc_featureflag&(1<<27)) - Dprintk(" Self Snoop present.\n"); - if (m->mpc_featureflag&(1<<28)) - Dprintk(" HT present.\n"); - if (m->mpc_featureflag&(1<<29)) - Dprintk(" Thermal Monitor present.\n"); - /* 30, 31 Reserved */ - - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - Dprintk(" Bootup CPU\n"); - boot_cpu_physical_apicid = m->mpc_apicid; - } - - ver = m->mpc_apicver; - - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; - - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - - cpu_set(num_processors, cpu_possible_map); - num_processors++; - - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(ver)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; -} - -static void __init MP_bus_info (struct mpc_config_bus *m) -{ - char str[7]; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - - mpc_oem_bus_info(m, str, translation_table[mpc_record]); - -#if MAX_MP_BUSSES < 256 - if (m->mpc_busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->mpc_busid, str, MAX_MP_BUSSES - 1); - return; - } -#endif - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { - mpc_oem_pci_bus(m, translation_table[mpc_record]); - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else { - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); - } -} - -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); - return; - } - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) -{ - mp_irqs [mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) -{ - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); -} - -#ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info (struct mpc_config_translation *m) -{ - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ - unsigned short oemsize) -{ - int count = sizeof (*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable)+count; - - mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) - { - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], - oemtable->oem_signature[1], - oemtable->oem_signature[2], - oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) - { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m= - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); - return; - } - } - } -} - -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); - if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, - mpc->mpc_oemsize); -} -#endif /* CONFIG_X86_NUMAQ */ - -/* - * Read/parse the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) -{ - char str[16]; - char oem[10]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", - *(u32 *)mpc->mpc_signature); - return 0; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { - printk(KERN_ERR "SMP mptable: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk(KERN_ERR "SMP mptable: null local APIC address!\n"); - return 0; - } - memcpy(oem,mpc->mpc_oem,8); - oem[8]=0; - printk(KERN_INFO "OEM ID: %s ",oem); - - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); - - mps_oem_check(mpc, oem, str); - - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - - /* - * Save the local APIC address (it might be non-default) -- but only - * if we're not using ACPI. - */ - if (!acpi_lapic) - mp_lapic_addr = mpc->mpc_lapic; - - /* - * Now process the configuration blocks. - */ - mpc_record = 0; - while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } - ++mpc_record; - } - setup_apic_routing(); - if (!num_processors) - printk(KERN_ERR "SMP mptable: no processors registered!\n"); - return num_processors; -} - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); - else { - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); -} - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_config_processor processor; - struct mpc_config_bus bus; - struct mpc_config_ioapic ioapic; - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | - boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk("???\n"); - printk(KERN_ERR "Unknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); - - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct intel_mp_floating *mpf_found; - -/* - * Scan the memory blocks for an SMP configuration block. - */ -void __init get_smp_config (void) -{ - struct intel_mp_floating *mpf = mpf_found; - - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); - return; - } - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); - - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } - - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { - smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); - return; - } - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } - - } else - BUG(); - - printk(KERN_INFO "Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -static int __init smp_scan_config (unsigned long base, unsigned long length) -{ - unsigned long *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); - if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { - - smp_found_config = 1; - printk(KERN_INFO "found SMP MP-table at %08lx\n", - virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) { - /* - * We cannot access to MPC table to compute - * table size yet, as only few megabytes from - * the bottom is mapped now. - * PC-9800's MPC table places on the very last - * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() - * in reserve_bootmem. - */ - unsigned long size = PAGE_SIZE; - unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size); - } - - mpf_found = mpf; - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -void __init find_smp_config (void) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - * - * MP1.4 SPEC states to only scan first 1K of 4K EBDA. - */ - - address = get_bios_ebda(); - if (address) - smp_scan_config(address, 0x400); -} - -int es7000_plat; - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -void __init mp_register_lapic_address(u64 address) -{ - mp_lapic_addr = (unsigned long) address; - - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); -} - -void __cpuinit mp_register_lapic (u8 id, u8 enabled) -{ - struct mpc_config_processor processor; - int boot_cpu = 0; - - if (MAX_APICS - id <= 0) { - printk(KERN_WARNING "Processor #%d invalid (max %d)\n", - id, MAX_APICS); - return; - } - - if (id == boot_cpu_physical_apicid) - boot_cpu = 1; - - processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicid = id; - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - - MP_processor_info(&processor); -} - -#ifdef CONFIG_X86_IO_APIC - -#define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 - -static struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic (int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - - return -1; -} - -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) -{ - int idx = 0; - int tmpid; - - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); - return; - } - - idx = nr_ioapics++; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - tmpid = io_apic_get_unique_id(idx, id); - else - tmpid = id; - if (tmpid == -1) { - nr_ioapics--; - return; - } - mp_ioapics[idx].mpc_apicid = tmpid; - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); - - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); -} - -void __init -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); -} - -void __init mp_config_acpi_legacy_irqs (void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; - - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - - /* - * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } -} - -#define MAX_GSI_NUM 4096 - -int mp_register_gsi(u32 gsi, int triggering, int polarity) -{ - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; - static int pci_irq = 16; - /* - * Mapping between Global System Interrups, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; - } - if ((1< 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } - - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} - -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c deleted file mode 100644 index 0c1069b8d63..00000000000 --- a/arch/i386/kernel/msr.c +++ /dev/null @@ -1,224 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * msr.c - * - * x86 MSR access device - * - * This device is accessed by lseek() to the appropriate register number - * and then read/write in chunks of 8 bytes. A larger size means multiple - * reads or writes of the same register. - * - * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static struct class *msr_class; - -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret = -EINVAL; - - lock_kernel(); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - } - unlock_kernel(); - return ret; -} - -static ssize_t msr_read(struct file *file, char __user * buf, - size_t count, loff_t * ppos) -{ - u32 __user *tmp = (u32 __user *) buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) - return -EIO; - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static ssize_t msr_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - const u32 __user *tmp = (const u32 __user *)buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; - err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) - return -EIO; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static int msr_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations msr_fops = { - .owner = THIS_MODULE, - .llseek = msr_seek, - .read = msr_read, - .write = msr_write, - .open = msr_open, -}; - -static int msr_device_create(int i) -{ - int err = 0; - struct device *dev; - - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; -} - -static int msr_class_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - msr_device_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata msr_class_cpu_notifier = -{ - .notifier_call = msr_class_cpu_callback, -}; - -static int __init msr_init(void) -{ - int i, err = 0; - i = 0; - - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { - printk(KERN_ERR "msr: unable to get major %d for msr\n", - MSR_MAJOR); - err = -EBUSY; - goto out; - } - msr_class = class_create(THIS_MODULE, "msr"); - if (IS_ERR(msr_class)) { - err = PTR_ERR(msr_class); - goto out_chrdev; - } - for_each_online_cpu(i) { - err = msr_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&msr_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) - device_destroy(msr_class, MKDEV(MSR_MAJOR, i)); - class_destroy(msr_class); -out_chrdev: - unregister_chrdev(MSR_MAJOR, "cpu/msr"); -out: - return err; -} - -static void __exit msr_exit(void) -{ - int cpu = 0; - for_each_online_cpu(cpu) - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); - class_destroy(msr_class); - unregister_chrdev(MSR_MAJOR, "cpu/msr"); - unregister_hotcpu_notifier(&msr_class_cpu_notifier); -} - -module_init(msr_init); -module_exit(msr_exit) - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic MSR driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/i386/kernel/nmi_32.c b/arch/i386/kernel/nmi_32.c deleted file mode 100644 index c7227e2180f..00000000000 --- a/arch/i386/kernel/nmi_32.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * linux/arch/i386/nmi.c - * - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "mach_traps.h" - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -/* local prototypes */ -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); - -static int endflag __initdata = 0; - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -static int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - return -1; - - printk(KERN_INFO "Testing NMI watchdog ... "); - - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - /* Check cpu_callin_map here because that is set - after the timer is started. */ - if (!cpu_isset(cpu, cpu_callin_map)) - continue; -#endif - if (!per_cpu(wd_enabled, cpu)) - continue; - if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - return -1; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -} -/* This needs to happen later in boot so counters are working */ -late_initcall(check_nmi_watchdog); - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - - -/* Suspend/resume support */ - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - - -static struct sysdev_class nmi_sysclass = { - set_kset_name("lapic_nmi"), - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog (void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] - */ - -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): - */ - for_each_present_cpu(cpu) { - if (alert_counter[cpu]) - alert_counter[cpu] = 0; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -extern void die_nmi(struct pt_regs *, const char *msg); - -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) -{ - - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc=0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - - /* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && last_irq_sums[cpu] == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); - } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; - } - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -int do_nmi_callback(struct pt_regs * regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/i386/kernel/numaq_32.c b/arch/i386/kernel/numaq_32.c deleted file mode 100644 index 9000d82c6dc..00000000000 --- a/arch/i386/kernel/numaq_32.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) - -/* - * Function: smp_dump_qct() - * - * Description: gets memory layout from the quad config table. This - * function also updates node_online_map with the nodes (quads) present. - */ -static void __init smp_dump_qct(void) -{ - int node; - struct eachquadmem *eq; - struct sys_cfg_data *scd = - (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); - - nodes_clear(node_online_map); - for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) { - node_set_online(node); - eq = &scd->eq[node]; - /* Convert to pages */ - node_start_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start - eq->priv_mem_size); - node_end_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - - memory_present(node, - node_start_pfn[node], node_end_pfn[node]); - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); - } - } -} - -/* - * Unlike Summit, we don't really care to let the NUMA-Q - * fall back to flat mode. Don't compile for NUMA-Q - * unless you really need it! - */ -int __init get_memcfg_numaq(void) -{ - smp_dump_qct(); - return 1; -} - -static int __init numaq_tsc_disable(void) -{ - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - tsc_disable = 1; - } - return 0; -} -arch_initcall(numaq_tsc_disable); diff --git a/arch/i386/kernel/paravirt_32.c b/arch/i386/kernel/paravirt_32.c deleted file mode 100644 index 739cfb207dd..00000000000 --- a/arch/i386/kernel/paravirt_32.c +++ /dev/null @@ -1,392 +0,0 @@ -/* Paravirtualization interfaces - Copyright (C) 2006 Rusty Russell IBM Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* nop stub */ -void _paravirt_nop(void) -{ -} - -static void __init default_banner(void) -{ - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); -} - -char *memory_setup(void) -{ - return paravirt_ops.memory_setup(); -} - -/* Simple instruction patching code. */ -#define DEF_NATIVE(name, code) \ - extern const char start_##name[], end_##name[]; \ - asm("start_" #name ": " code "; end_" #name ":") - -DEF_NATIVE(irq_disable, "cli"); -DEF_NATIVE(irq_enable, "sti"); -DEF_NATIVE(restore_fl, "push %eax; popf"); -DEF_NATIVE(save_fl, "pushf; pop %eax"); -DEF_NATIVE(iret, "iret"); -DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); -DEF_NATIVE(read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(read_cr3, "mov %cr3, %eax"); -DEF_NATIVE(clts, "clts"); -DEF_NATIVE(read_tsc, "rdtsc"); - -DEF_NATIVE(ud2a, "ud2a"); - -static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) -{ - const unsigned char *start, *end; - unsigned ret; - - switch(type) { -#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site - SITE(irq_disable); - SITE(irq_enable); - SITE(restore_fl); - SITE(save_fl); - SITE(iret); - SITE(irq_enable_sysexit); - SITE(read_cr2); - SITE(read_cr3); - SITE(write_cr3); - SITE(clts); - SITE(read_tsc); -#undef SITE - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; - - case PARAVIRT_PATCH(make_pgd): - case PARAVIRT_PATCH(make_pte): - case PARAVIRT_PATCH(pgd_val): - case PARAVIRT_PATCH(pte_val): -#ifdef CONFIG_X86_PAE - case PARAVIRT_PATCH(make_pmd): - case PARAVIRT_PATCH(pmd_val): -#endif - /* These functions end up returning exactly what - they're passed, in the same registers. */ - ret = paravirt_patch_nop(); - break; - - default: - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); - break; - } - - return ret; -} - -unsigned paravirt_patch_nop(void) -{ - return 0; -} - -unsigned paravirt_patch_ignore(unsigned len) -{ - return len; -} - -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (tgt_clobbers & ~site_clobbers) - return len; /* target would clobber too much for this site */ - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != 5); - - return 5; -} - -unsigned paravirt_patch_jmp(const void *target, void *insnbuf, - unsigned long addr, unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe9; /* jmp */ - b->delta = delta; - - return 5; -} - -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - void *opfunc = *((void **)¶virt_ops + type); - unsigned ret; - - if (opfunc == NULL) - /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); - else if (opfunc == paravirt_nop) - /* If the operation is a nop, then nop the callsite */ - ret = paravirt_patch_nop(); - else if (type == PARAVIRT_PATCH(iret) || - type == PARAVIRT_PATCH(irq_enable_sysexit)) - /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); - else - /* Otherwise call the function; assume target could - clobber any caller-save reg */ - ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, - addr, clobbers, len); - - return ret; -} - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end) -{ - unsigned insn_len = end - start; - - if (insn_len > len || start == NULL) - insn_len = len; - else - memcpy(insnbuf, start, insn_len); - - return insn_len; -} - -void init_IRQ(void) -{ - paravirt_ops.init_IRQ(); -} - -static void native_flush_tlb(void) -{ - __native_flush_tlb(); -} - -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -static void native_flush_tlb_global(void) -{ - __native_flush_tlb_global(); -} - -static void native_flush_tlb_single(unsigned long addr) -{ - __native_flush_tlb_single(addr); -} - -/* These are in entry.S */ -extern void native_iret(void); -extern void native_irq_enable_sysexit(void); - -static int __init print_banner(void) -{ - paravirt_ops.banner(); - return 0; -} -core_initcall(print_banner); - -static struct resource reserve_ioports = { - .start = 0, - .end = IO_SPACE_LIMIT, - .name = "paravirt-ioport", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -static struct resource reserve_iomem = { - .start = 0, - .end = -1, - .name = "paravirt-iomem", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -/* - * Reserve the whole legacy IO space to prevent any legacy drivers - * from wasting time probing for their hardware. This is a fairly - * brute-force approach to disabling all non-virtual drivers. - * - * Note that this must be called very early to have any effect. - */ -int paravirt_disable_iospace(void) -{ - int ret; - - ret = request_resource(&ioport_resource, &reserve_ioports); - if (ret == 0) { - ret = request_resource(&iomem_resource, &reserve_iomem); - if (ret) - release_resource(&reserve_ioports); - } - - return ret; -} - -struct paravirt_ops paravirt_ops = { - .name = "bare hardware", - .paravirt_enabled = 0, - .kernel_rpl = 0, - .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ - - .patch = native_patch, - .banner = default_banner, - .arch_setup = paravirt_nop, - .memory_setup = machine_specific_memory_setup, - .get_wallclock = native_get_wallclock, - .set_wallclock = native_set_wallclock, - .time_init = hpet_time_init, - .init_IRQ = native_init_IRQ, - - .cpuid = native_cpuid, - .get_debugreg = native_get_debugreg, - .set_debugreg = native_set_debugreg, - .clts = native_clts, - .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, - .write_cr3 = native_write_cr3, - .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, - .write_cr4 = native_write_cr4, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, - .safe_halt = native_safe_halt, - .halt = native_halt, - .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, - .load_tr_desc = native_load_tr_desc, - .set_ldt = native_set_ldt, - .load_gdt = native_load_gdt, - .load_idt = native_load_idt, - .store_gdt = native_store_gdt, - .store_idt = native_store_idt, - .store_tr = native_store_tr, - .load_tls = native_load_tls, - .write_ldt_entry = write_dt_entry, - .write_gdt_entry = write_dt_entry, - .write_idt_entry = write_dt_entry, - .load_esp0 = native_load_esp0, - - .set_iopl_mask = native_set_iopl_mask, - .io_delay = native_io_delay, - -#ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, - .setup_boot_clock = setup_boot_APIC_clock, - .setup_secondary_clock = setup_secondary_APIC_clock, - .startup_ipi_hook = paravirt_nop, -#endif - .set_lazy_mode = paravirt_nop, - - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, - - .flush_tlb_user = native_flush_tlb, - .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, - .flush_tlb_others = native_flush_tlb_others, - - .alloc_pt = paravirt_nop, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pt = paravirt_nop, - .release_pd = paravirt_nop, - - .set_pte = native_set_pte, - .set_pte_at = native_set_pte_at, - .set_pmd = native_set_pmd, - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = kmap_atomic, -#endif - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = native_set_pte_atomic, - .set_pte_present = native_set_pte_present, - .set_pud = native_set_pud, - .pte_clear = native_pte_clear, - .pmd_clear = native_pmd_clear, - - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, -#endif - - .pte_val = native_pte_val, - .pgd_val = native_pgd_val, - - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, - - .irq_enable_sysexit = native_irq_enable_sysexit, - .iret = native_iret, - - .dup_mmap = paravirt_nop, - .exit_mmap = paravirt_nop, - .activate_mm = paravirt_nop, -}; - -EXPORT_SYMBOL(paravirt_ops); diff --git a/arch/i386/kernel/pci-dma_32.c b/arch/i386/kernel/pci-dma_32.c deleted file mode 100644 index 048f09b6255..00000000000 --- a/arch/i386/kernel/pci-dma_32.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include -#include -#include - -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - - ret = (void *)__get_free_pages(gfp, order); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c deleted file mode 100644 index bc1f2d3ea27..00000000000 --- a/arch/i386/kernel/pcspeaker.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include -#include - -static __init int add_pcspkr(void) -{ - struct platform_device *pd; - int ret; - - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; -} -device_initcall(add_pcspkr); diff --git a/arch/i386/kernel/process_32.c b/arch/i386/kernel/process_32.c deleted file mode 100644 index 84664710b78..00000000000 --- a/arch/i386/kernel/process_32.c +++ /dev/null @@ -1,951 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_MATH_EMULATION -#include -#endif - -#include - -#include -#include - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -static int hlt_counter; - -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.esp)[3]; -} - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); - -void disable_hlt(void) -{ - hlt_counter++; -} - -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} - -EXPORT_SYMBOL(enable_hlt); - -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - - local_irq_disable(); - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - } else { - /* loop is done by the caller */ - cpu_relax(); - } -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(default_idle); -#endif - -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle (void) -{ - cpu_relax(); -} - -#ifdef CONFIG_HOTPLUG_CPU -#include -/* We don't actually take CPU down, just spin without interrupts. */ -static inline void play_dead(void) -{ - /* This must be done before dead CPU ack */ - cpu_exit_clear(); - wbinvd(); - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - /* - * With physical CPU hotplug, we should halt the cpu - */ - local_irq_disable(); - while (1) - halt(); -} -#else -static inline void play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - int cpu = smp_processor_id(); - - current_thread_info()->status |= TS_POLLING; - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_stop_sched_tick(); - while (!need_resched()) { - void (*idle)(void); - - if (__get_cpu_var(cpu_idle_state)) - __get_cpu_var(cpu_idle_state) = 0; - - check_pgt_cache(); - rmb(); - idle = pm_idle; - - if (!idle) - idle = default_idle; - - if (cpu_is_offline(cpu)) - play_dead(); - - __get_cpu_var(irq_stat).idle_timestamp = jiffies; - idle(); - } - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} - -void cpu_idle_wait(void) -{ - unsigned int cpu, this_cpu = get_cpu(); - cpumask_t map, tmp = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); - put_cpu(); - - cpus_clear(map); - for_each_online_cpu(cpu) { - per_cpu(cpu_idle_state, cpu) = 1; - cpu_set(cpu, map); - } - - __get_cpu_var(cpu_idle_state) = 0; - - wmb(); - do { - ssleep(1); - for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) - cpu_clear(cpu, map); - } - cpus_and(map, map, cpu_online_map); - } while (!cpus_empty(map)); - - set_cpus_allowed(current, tmp); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(eax, ecx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_MWAIT)) { - printk("monitor/mwait feature present.\n"); - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk("using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; -#ifdef CONFIG_X86_SMP - if (smp_num_siblings > 1) - printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); -#endif - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - -void show_regs(struct pt_regs * regs) -{ - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; - unsigned long d0, d1, d2, d3, d6, d7; - - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); - print_symbol("EIP is at %s\n", regs->eip); - - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); - - cr0 = read_cr0(); - cr2 = read_cr2(); - cr3 = read_cr3(); - cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); - - get_debugreg(d0, 0); - get_debugreg(d1, 1); - get_debugreg(d2, 2); - get_debugreg(d3, 3); - printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", - d0, d1, d2, d3); - get_debugreg(d6, 6); - get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); - - show_trace(NULL, regs, ®s->esp); -} - -/* - * This gets run with %ebx containing the - * function to call, and %edx containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.ebx = (unsigned long) fn; - regs.edx = (unsigned long) arg; - - regs.xds = __USER_DS; - regs.xes = __USER_DS; - regs.xfs = __KERNEL_PERCPU; - regs.orig_eax = -1; - regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS | get_kernel_rpl(); - regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* The process may have allocated an io port bitmap... nuke it. */ - if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { - struct task_struct *tsk = current; - struct thread_struct *t = &tsk->thread; - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - - kfree(t->io_bitmap_ptr); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); - t->io_bitmap_max = 0; - tss->io_bitmap_owner = NULL; - tss->io_bitmap_max = 0; - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - put_cpu(); - } -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - clear_tsk_thread_flag(tsk, TIF_DEBUG); - /* - * Forget coprocessor state.. - */ - clear_fpu(tsk); - clear_used_math(); -} - -void release_thread(struct task_struct *dead_task) -{ - BUG_ON(dead_task->mm); - release_vm86_irqs(dead_task); -} - -/* - * This gets called before we allocate a new thread and copy - * the current task into it. - */ -void prepare_to_copy(struct task_struct *tsk) -{ - unlazy_fpu(tsk); -} - -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) -{ - struct pt_regs * childregs; - struct task_struct *tsk; - int err; - - childregs = task_pt_regs(p); - *childregs = *regs; - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - - p->thread.eip = (unsigned long) ret_from_fork; - - savesegment(gs,p->thread.gs); - - tsk = current; - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { - struct desc_struct *desc; - struct user_desc info; - int idx; - - err = -EFAULT; - if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) - goto out; - err = -EINVAL; - if (LDT_empty(&info)) - goto out; - - idx = info.entry_number; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - goto out; - - desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - - err = 0; - out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - int i; - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - dump->regs.fs = regs->xfs; - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -} -EXPORT_SYMBOL(dump_thread); - -/* - * Capture the user space registers if the task is not running (in user space) - */ -int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) -{ - struct pt_regs ptregs = *task_pt_regs(tsk); - ptregs.xcs &= 0xffff; - ptregs.xds &= 0xffff; - ptregs.xes &= 0xffff; - ptregs.xss &= 0xffff; - - elf_core_copy_regs(regs, &ptregs); - - return 1; -} - -#ifdef CONFIG_SECCOMP -void hard_disable_TSC(void) -{ - write_cr4(read_cr4() | X86_CR4_TSD); -} -void disable_TSC(void) -{ - preempt_disable(); - if (!test_and_set_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_disable_TSC(); - preempt_enable(); -} -void hard_enable_TSC(void) -{ - write_cr4(read_cr4() & ~X86_CR4_TSD); -} -#endif /* CONFIG_SECCOMP */ - -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *next; - - next = &next_p->thread; - - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg[6], 6); - set_debugreg(next->debugreg[7], 7); - } - -#ifdef CONFIG_SECCOMP - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } -#endif - - if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Disable the bitmap via an invalid offset. We still cache - * the previous bitmap owner and the IO bitmap contents: - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - return; - } - - if (likely(next == tss->io_bitmap_owner)) { - /* - * Previous owner of the bitmap (hence the bitmap content) - * matches the next task, we dont have to do anything but - * to set a valid offset in the TSS: - */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - return; - } - /* - * Lazy TSS's I/O bitmap copy. We set an invalid offset here - * and we let the task to get a GPF in case an I/O instruction - * is performed. The handler of the GPF will verify that the - * faulting task has a valid I/O bitmap and, it true, does the - * real copy and restart the instruction. This will save us - * redundant copies when the currently switched task does not - * perform any I/O during its timeslice. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; -} - -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - * - * The return value (in %eax) will be the "prev" task after - * the task-switch, and shows up in ret_from_fork in entry.S, - * for example. - */ -struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; - int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - - /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - - __unlazy_fpu(prev_p); - - - /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); - - /* - * Reload esp0. - */ - load_esp0(tss, next); - - /* - * Save away %gs. No need to save %fs, as it was saved on the - * stack on entry. No need to save %es and %ds, as those are - * always kernel segments while inside the kernel. Doing this - * before setting the new TLS descriptors avoids the situation - * where we temporarily have non-reloadable segments in %fs - * and %gs. This could be an issue if the NMI handler ever - * used %fs or %gs (it does not today), or if the kernel is - * running inside of a hypervisor layer. - */ - savesegment(gs, prev->gs); - - /* - * Load the per-thread Thread-Local Storage descriptor. - */ - load_TLS(next, cpu); - - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); - - /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. - */ - arch_leave_lazy_cpu_mode(); - - /* If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - */ - if (next_p->fpu_counter > 5) - math_state_restore(); - - /* - * Restore %gs if needed (which is common) - */ - if (prev->gs | next->gs) - loadsegment(gs, next->gs); - - x86_write_percpu(current_task, next_p); - - return prev_p; -} - -asmlinkage int sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); -} - -asmlinkage int sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.ebx; - newsp = regs.ecx; - parent_tidptr = (int __user *)regs.edx; - child_tidptr = (int __user *)regs.edi; - if (!newsp) - newsp = regs.esp; - return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage int sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); -} - -/* - * sys_execve() executes a new program. - */ -asmlinkage int sys_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname((char __user *) regs.ebx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, - (char __user * __user *) regs.ecx, - (char __user * __user *) regs.edx, - ®s); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } - putname(filename); -out: - return error; -} - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ebp, esp, eip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; - do { - if (ebp < stack_page || ebp > top_ebp+stack_page) - return 0; - eip = *(unsigned long *) (ebp+4); - if (!in_sched_functions(eip)) - return eip; - ebp = *(unsigned long *) ebp; - } while (count++ < 16); - return 0; -} - -/* - * sys_alloc_thread_area: get a yet unused TLS descriptor index. - */ -static int get_free_idx(void) -{ - struct thread_struct *t = ¤t->thread; - int idx; - - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) - if (desc_empty(t->tls_array + idx)) - return idx + GDT_ENTRY_TLS_MIN; - return -ESRCH; -} - -/* - * Set a given TLS descriptor: - */ -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) -{ - struct thread_struct *t = ¤t->thread; - struct user_desc info; - struct desc_struct *desc; - int cpu, idx; - - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - idx = info.entry_number; - - /* - * index -1 means the kernel should try to find and - * allocate an empty descriptor: - */ - if (idx == -1) { - idx = get_free_idx(); - if (idx < 0) - return idx; - if (put_user(idx, &u_info->entry_number)) - return -EFAULT; - } - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; - - /* - * We must not get preempted while modifying the TLS. - */ - cpu = get_cpu(); - - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - load_TLS(t, cpu); - - put_cpu(); - - return 0; -} - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) -{ - struct user_desc info; - struct desc_struct *desc; - int idx; - - if (get_user(idx, &u_info->entry_number)) - return -EFAULT; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - memset(&info, 0, sizeof(info)); - - desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(u_info, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} diff --git a/arch/i386/kernel/ptrace_32.c b/arch/i386/kernel/ptrace_32.c deleted file mode 100644 index 7c1b92522e9..00000000000 --- a/arch/i386/kernel/ptrace_32.c +++ /dev/null @@ -1,723 +0,0 @@ -/* ptrace.c */ -/* By Ross Biro 1/23/92 */ -/* - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). - * Also masks reserved bits (31-22, 15, 5, 3, 1). - */ -#define FLAG_MASK 0x00050dd5 - -/* set's the trap flag. */ -#define TRAP_FLAG 0x100 - -/* - * Offset of eflags on child stack.. - */ -#define EFL_OFFSET offsetof(struct pt_regs, eflags) - -static inline struct pt_regs *get_child_regs(struct task_struct *task) -{ - void *stack_top = (void *)task->thread.esp0; - return stack_top - sizeof(struct pt_regs); -} - -/* - * This routine will get a word off of the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int get_stack_long(struct task_struct *task, int offset) -{ - unsigned char *stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - return (*((int *)stack)); -} - -/* - * This routine will put a word on the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int put_stack_long(struct task_struct *task, int offset, - unsigned long data) -{ - unsigned char * stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - *(unsigned long *) stack = data; - return 0; -} - -static int putreg(struct task_struct *child, - unsigned long regno, unsigned long value) -{ - switch (regno >> 2) { - case GS: - if (value && (value & 3) != 3) - return -EIO; - child->thread.gs = value; - return 0; - case DS: - case ES: - case FS: - if (value && (value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case SS: - case CS: - if ((value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case EFL: - value &= FLAG_MASK; - value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; - break; - } - if (regno > FS*4) - regno -= 1*4; - put_stack_long(child, regno, value); - return 0; -} - -static unsigned long getreg(struct task_struct *child, - unsigned long regno) -{ - unsigned long retval = ~0UL; - - switch (regno >> 2) { - case GS: - retval = child->thread.gs; - break; - case DS: - case ES: - case FS: - case SS: - case CS: - retval = 0xffff; - /* fall through */ - default: - if (regno > FS*4) - regno -= 1*4; - retval &= get_stack_long(child, regno); - } - return retval; -} - -#define LDT_SEGMENT 4 - -static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs) -{ - unsigned long addr, seg; - - addr = regs->eip; - seg = regs->xcs & 0xffff; - if (regs->eflags & VM_MASK) { - addr = (addr & 0xffff) + (seg << 4); - return addr; - } - - /* - * We'll assume that the code segments in the GDT - * are all zero-based. That is largely true: the - * TLS segments are used for data, and the PNPBIOS - * and APM bios ones we just ignore here. - */ - if (seg & LDT_SEGMENT) { - u32 *desc; - unsigned long base; - - seg &= ~7UL; - - down(&child->mm->context.sem); - if (unlikely((seg >> 3) >= child->mm->context.size)) - addr = -1L; /* bogus selector, access would fault */ - else { - desc = child->mm->context.ldt + seg; - base = ((desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000)); - - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; - } - up(&child->mm->context.sem); - } - return addr; -} - -static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) -{ - int i, copied; - unsigned char opcode[15]; - unsigned long addr = convert_eip_to_linear(child, regs); - - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); - for (i = 0; i < copied; i++) { - switch (opcode[i]) { - /* popf and iret */ - case 0x9d: case 0xcf: - return 1; - /* opcode and address size prefixes */ - case 0x66: case 0x67: - continue; - /* irrelevant prefixes (segment overrides and repeats) */ - case 0x26: case 0x2e: - case 0x36: case 0x3e: - case 0x64: case 0x65: - case 0xf0: case 0xf2: case 0xf3: - continue; - - /* - * pushf: NOTE! We should probably not let - * the user see the TF bit being set. But - * it's more pain than it's worth to avoid - * it, and a debugger could emulate this - * all in user space if it _really_ cares. - */ - case 0x9c: - default: - return 0; - } - } - return 0; -} - -static void set_singlestep(struct task_struct *child) -{ - struct pt_regs *regs = get_child_regs(child); - - /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also - * cause us to set TF when returning to user mode. - */ - set_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* - * If TF was already set, don't do anything else - */ - if (regs->eflags & TRAP_FLAG) - return; - - /* Set TF on the kernel stack.. */ - regs->eflags |= TRAP_FLAG; - - /* - * ..but if TF is changed by the instruction we will trace, - * don't mark it as being "us" that set it, so that we - * won't clear it by hand later. - */ - if (is_setting_trap_flag(child, regs)) - return; - - child->ptrace |= PT_DTRACE; -} - -static void clear_singlestep(struct task_struct *child) -{ - /* Always clear TIF_SINGLESTEP... */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* But touch TF only if it was set by us.. */ - if (child->ptrace & PT_DTRACE) { - struct pt_regs *regs = get_child_regs(child); - regs->eflags &= ~TRAP_FLAG; - child->ptrace &= ~PT_DTRACE; - } -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void ptrace_disable(struct task_struct *child) -{ - clear_singlestep(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -} - -/* - * Perform get_thread_area on behalf of the traced child. - */ -static int -ptrace_get_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(user_desc, &info, sizeof(info))) - return -EFAULT; - - return 0; -} - -/* - * Perform set_thread_area on behalf of the traced child. - */ -static int -ptrace_set_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - - if (copy_from_user(&info, user_desc, sizeof(info))) - return -EFAULT; - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - - return 0; -} - -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - struct user * dummy = NULL; - int i, ret; - unsigned long __user *datap = (unsigned long __user *)data; - - switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - tmp = 0; /* Default return condition */ - if(addr < FRAME_SIZE*sizeof(long)) - tmp = getreg(child, addr); - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 2; - tmp = child->thread.debugreg[addr]; - } - ret = put_user(tmp, datap); - break; - } - - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - if (addr < FRAME_SIZE*sizeof(long)) { - ret = putreg(child, addr, data); - break; - } - /* We need to be very careful here. We implicitly - want to modify a portion of the task_struct, and we - have to be selective about what portions we allow someone - to modify. */ - - ret = -EIO; - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - - if(addr == (long) &dummy->u_debugreg[4]) break; - if(addr == (long) &dummy->u_debugreg[5]) break; - if(addr < (long) &dummy->u_debugreg[4] && - ((unsigned long) data) >= TASK_SIZE-3) break; - - /* Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System - * Programming)*/ - - if(addr == (long) &dummy->u_debugreg[7]) { - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - goto out_tsk; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - } - addr -= (long) &dummy->u_debugreg; - addr = addr >> 2; - child->thread.debugreg[addr] = data; - ret = 0; - } - break; - - case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSEMU) { - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } else if (request == PTRACE_SYSCALL) { - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - } else { - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - ret = 0; - break; - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - break; - - case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - - if (request == PTRACE_SYSEMU_SINGLESTEP) - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_singlestep(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - break; - - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __put_user(getreg(child, i), datap); - datap++; - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __get_user(tmp, datap); - putreg(child, i, tmp); - datap++; - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - ret = 0; - if (!tsk_used_math(child)) - init_fpu(child); - get_fpregs((struct user_i387_struct __user *)data, child); - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - set_fpregs(child, (struct user_i387_struct __user *)data); - ret = 0; - break; - } - - case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - if (!tsk_used_math(child)) - init_fpu(child); - ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); - break; - } - - case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); - break; - } - - case PTRACE_GET_THREAD_AREA: - ret = ptrace_get_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - case PTRACE_SET_THREAD_AREA: - ret = ptrace_set_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - out_tsk: - return ret; -} - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) -{ - struct siginfo info; - - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; - - /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; - - /* Send us the fakey SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); -} - -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -__attribute__((regparm(3))) -int do_syscall_trace(struct pt_regs *regs, int entryexit) -{ - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_eax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), - regs->eax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, - regs->ebx, regs->ecx, regs->edx, regs->esi); - if (ret == 0) - return 0; - - regs->orig_eax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); - return 1; -} diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c deleted file mode 100644 index 6722469c263..00000000000 --- a/arch/i386/kernel/quirks.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * This file contains work-arounds for x86 and x86_64 platform bugs. - */ -#include -#include - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) - -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) -{ - u8 config, rev; - u32 word; - - /* BIOS may enable hardware IRQ balancing for - * E7520/E7320/E7525(revision ID 0x9 and below) - * based platforms. - * Disable SW irqbalance/affinity on those platforms. - */ - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); - if (rev > 0x9) - return; - - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); - - /* read xTPR register */ - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); - - if (!(word & (1 << 13))) { - printk(KERN_INFO "Intel E7520/7320/7525 detected. " - "Disabling irq balancing and affinity\n"); -#ifdef CONFIG_IRQBALANCE - irqbalance_disable(""); -#endif - noirqdebug_setup(""); -#ifdef CONFIG_PROC_FS - no_irq_affinity = 1; -#endif - } - - /* put back the original value for config space*/ - if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); -#endif diff --git a/arch/i386/kernel/reboot_32.c b/arch/i386/kernel/reboot_32.c deleted file mode 100644 index 0d796248866..00000000000 --- a/arch/i386/kernel/reboot_32.c +++ /dev/null @@ -1,413 +0,0 @@ -/* - * linux/arch/i386/kernel/reboot.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mach_reboot.h" -#include -#include - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -static int reboot_mode; -static int reboot_thru_bios; - -#ifdef CONFIG_SMP -static int reboot_cpu = -1; -#endif -static int __init reboot_setup(char *str) -{ - while(1) { - switch (*str) { - case 'w': /* "warm" reboot (no memory testing etc) */ - reboot_mode = 0x1234; - break; - case 'c': /* "cold" reboot (with memory testing etc) */ - reboot_mode = 0x0; - break; - case 'b': /* "bios" reboot by jumping through the BIOS */ - reboot_thru_bios = 1; - break; - case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ - reboot_thru_bios = 0; - break; -#ifdef CONFIG_SMP - case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ - if (isdigit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (isdigit(*(str+2))) - reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); - } - /* we will leave sorting out the final value - when we are ready to reboot, since we might not - have set up boot_cpu_id or smp_num_cpu */ - break; -#endif - } - if((str = strchr(str,',')) != NULL) - str++; - else - break; - } - return 1; -} - -__setup("reboot=", reboot_setup); - -/* - * Reboot options and system auto-detection code provided by - * Dell Inc. so their systems "just work". :-) - */ - -/* - * Some machines require the "reboot=b" commandline option, this quirk makes that automatic. - */ -static int __init set_bios_reboot(struct dmi_system_id *d) -{ - if (!reboot_thru_bios) { - reboot_thru_bios = 1; - printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata reboot_dmi_table[] = { - { /* Handle problems with rebooting on Dell E520's */ - .callback = set_bios_reboot, - .ident = "Dell E520", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"), - }, - }, - { /* Handle problems with rebooting on Dell 1300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 1300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), - }, - }, - { /* Handle problems with rebooting on Dell 300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 745", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0WF810"), - }, - }, - { /* Handle problems with rebooting on Dell 2400's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 2400", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), - }, - }, - { /* Handle problems with rebooting on HP laptops */ - .callback = set_bios_reboot, - .ident = "HP Compaq Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), - }, - }, - { } -}; - -static int __init reboot_init(void) -{ - dmi_check_system(reboot_dmi_table); - return 0; -} - -core_initcall(reboot_init); - -/* The following code and data reboots the machine by switching to real - mode and jumping to the BIOS reset entry point, as if the CPU has - really been reset. The previous version asked the keyboard - controller to pulse the CPU reset line, which is more thorough, but - doesn't work with at least one type of 486 motherboard. It is easy - to stop this code working; hence the copious comments. */ - -static unsigned long long -real_mode_gdt_entries [3] = -{ - 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ -}; - -static struct Xgt_desc_struct -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, 0 }, -no_idt = { 0, 0 }; - - -/* This is 16-bit protected mode code to disable paging and the cache, - switch to real mode and jump to the BIOS reset code. - - The instruction that switches to real mode by writing to CR0 must be - followed immediately by a far jump instruction, which set CS to a - valid value for real mode, and flushes the prefetch queue to avoid - running instructions that have already been decoded in protected - mode. - - Clears all the flags except ET, especially PG (paging), PE - (protected-mode enable) and TS (task switch for coprocessor state - save). Flushes the TLB after paging has been disabled. Sets CD and - NW, to disable the cache on a 486, and invalidates the cache. This - is more like the state of a 486 after reset. I don't know if - something else should be done for other chips. - - More could be done here to set up the registers as if a CPU reset had - occurred; hopefully real BIOSs don't assume much. */ - -static unsigned char real_mode_switch [] = -{ - 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ - 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ - 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ - 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ - 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ - 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ - 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ - 0x74, 0x02, /* jz f */ - 0x0f, 0x09, /* wbinvd */ - 0x24, 0x10, /* f: andb $0x10,al */ - 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ -}; -static unsigned char jump_to_bios [] = -{ - 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ -}; - -/* - * Switch to real mode and then execute the code - * specified by the code and length parameters. - * We assume that length will aways be less that 100! - */ -void machine_real_restart(unsigned char *code, int length) -{ - local_irq_disable(); - - /* Write zero to CMOS register number 0x0f, which the BIOS POST - routine will recognize as telling it to do a proper reboot. (Well - that's what this book in front of me says -- it may only apply to - the Phoenix BIOS though, it's not clear). At the same time, - disable NMIs by setting the top bit in the CMOS address register, - as we're about to do peculiar things to the CPU. I'm not sure if - `outb_p' is needed instead of just `outb'. Use it to be on the - safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) - */ - - spin_lock(&rtc_lock); - CMOS_WRITE(0x00, 0x8f); - spin_unlock(&rtc_lock); - - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); - - /* - * Use `swapper_pg_dir' as our page directory. - */ - load_cr3(swapper_pg_dir); - - /* Write 0x1234 to absolute memory location 0x472. The BIOS reads - this on booting to tell it to "Bypass memory test (also warm - boot)". This seems like a fairly standard thing that gets set by - REBOOT.COM programs, and the previous reset routine did this - too. */ - - *((unsigned short *)0x472) = reboot_mode; - - /* For the switch to real mode, copy some code to low memory. It has - to be in the first 64k because it is running in 16-bit mode, and it - has to have the same physical and virtual address, because it turns - off paging. Copy it near the end of the first page, out of the way - of BIOS variables. */ - - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), - real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); - - /* Set up the IDT for real mode. */ - - load_idt(&real_mode_idt); - - /* Set up a GDT from which we can load segment descriptors for real - mode. The GDT is not used in real mode; it is just needed here to - prepare the descriptors. */ - - load_gdt(&real_mode_gdt); - - /* Load the data segment registers, and thus the descriptors ready for - real mode. The base address of each segment is 0x100, 16 times the - selector value being loaded here. This is so that the segment - registers don't have to be reloaded after switching to real mode: - the values are consistent for real mode operation already. */ - - __asm__ __volatile__ ("movl $0x0010,%%eax\n" - "\tmovl %%eax,%%ds\n" - "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" - "\tmovl %%eax,%%ss" : : : "eax"); - - /* Jump to the 16-bit code that we copied earlier. It disables paging - and the cache, switches to real mode, and jumps to the BIOS reset - entry point. */ - - __asm__ __volatile__ ("ljmp $0x0008,%0" - : - : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(machine_real_restart); -#endif - -static void native_machine_shutdown(void) -{ -#ifdef CONFIG_SMP - int reboot_cpu_id; - - /* The boot cpu is always logical cpu 0 */ - reboot_cpu_id = 0; - - /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_isset(reboot_cpu, cpu_online_map)) { - reboot_cpu_id = reboot_cpu; - } - - /* Make certain the cpu I'm rebooting on is online */ - if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { - reboot_cpu_id = smp_processor_id(); - } - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); - - /* O.K. Now that I'm on the appropriate processor, stop - * all of the others, and disable their local APICs. - */ - - smp_send_stop(); -#endif /* CONFIG_SMP */ - - lapic_shutdown(); - -#ifdef CONFIG_X86_IO_APIC - disable_IO_APIC(); -#endif -} - -void __attribute__((weak)) mach_reboot_fixups(void) -{ -} - -static void native_machine_emergency_restart(void) -{ - if (!reboot_thru_bios) { - if (efi_enabled) { - efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - load_idt(&no_idt); - __asm__ __volatile__("int3"); - } - /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; - for (;;) { - mach_reboot_fixups(); /* for board specific fixups */ - mach_reboot(); - /* That didn't work - force a triple fault.. */ - load_idt(&no_idt); - __asm__ __volatile__("int3"); - } - } - if (efi_enabled) - efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL); - - machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); -} - -static void native_machine_restart(char * __unused) -{ - machine_shutdown(); - machine_emergency_restart(); -} - -static void native_machine_halt(void) -{ -} - -static void native_machine_power_off(void) -{ - if (pm_power_off) { - machine_shutdown(); - pm_power_off(); - } -} - - -struct machine_ops machine_ops = { - .power_off = native_machine_power_off, - .shutdown = native_machine_shutdown, - .emergency_restart = native_machine_emergency_restart, - .restart = native_machine_restart, - .halt = native_machine_halt, -}; - -void machine_power_off(void) -{ - machine_ops.power_off(); -} - -void machine_shutdown(void) -{ - machine_ops.shutdown(); -} - -void machine_emergency_restart(void) -{ - machine_ops.emergency_restart(); -} - -void machine_restart(char *cmd) -{ - machine_ops.restart(cmd); -} - -void machine_halt(void) -{ - machine_ops.halt(); -} diff --git a/arch/i386/kernel/reboot_fixups_32.c b/arch/i386/kernel/reboot_fixups_32.c deleted file mode 100644 index 03e1cce58f4..00000000000 --- a/arch/i386/kernel/reboot_fixups_32.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * linux/arch/i386/kernel/reboot_fixups.c - * - * This is a good place to put board specific reboot fixups. - * - * List of supported fixups: - * geode-gx1/cs5530a - Jaya Kumar - * geode-gx/lx/cs5536 - Andres Salomon - * - */ - -#include -#include -#include -#include - -static void cs5530a_warm_reset(struct pci_dev *dev) -{ - /* writing 1 to the reset control register, 0x44 causes the - cs5530a to perform a system warm reset */ - pci_write_config_byte(dev, 0x44, 0x1); - udelay(50); /* shouldn't get here but be safe and spin-a-while */ - return; -} - -static void cs5536_warm_reset(struct pci_dev *dev) -{ - /* - * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) - * writing 1 to the LSB of this MSR causes a hard reset. - */ - wrmsrl(0x51400017, 1ULL); - udelay(50); /* shouldn't get here but be safe and spin a while */ -} - -struct device_fixup { - unsigned int vendor; - unsigned int device; - void (*reboot_fixup)(struct pci_dev *); -}; - -static struct device_fixup fixups_table[] = { -{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, -{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, -}; - -/* - * we see if any fixup is available for our current hardware. if there - * is a fixup, we call it and we expect to never return from it. if we - * do return, we keep looking and then eventually fall back to the - * standard mach_reboot on return. - */ -void mach_reboot_fixups(void) -{ - struct device_fixup *cur; - struct pci_dev *dev; - int i; - - for (i=0; i < ARRAY_SIZE(fixups_table); i++) { - cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, NULL); - if (!dev) - continue; - - cur->reboot_fixup(dev); - } -} - diff --git a/arch/i386/kernel/relocate_kernel_32.S b/arch/i386/kernel/relocate_kernel_32.S deleted file mode 100644 index f151d6fae46..00000000000 --- a/arch/i386/kernel/relocate_kernel_32.S +++ /dev/null @@ -1,252 +0,0 @@ -/* - * relocate_kernel.S - put the kernel image in place to boot - * Copyright (C) 2002-2004 Eric Biederman - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include -#include -#include - -/* - * Must be relocatable PIC code callable as a C function - */ - -#define PTR(x) (x << 2) -#define PAGE_ALIGNED (1 << PAGE_SHIFT) -#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ -#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ - - .text - .align PAGE_ALIGNED - .globl relocate_kernel -relocate_kernel: - movl 8(%esp), %ebp /* list of pages */ - -#ifdef CONFIG_X86_PAE - /* map the control page at its virtual address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0xc0000000, %eax - shrl $27, %eax - addl %edi, %eax - - movl PTR(PA_PMD_0)(%ebp), %edx - orl $PAE_PGD_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PMD_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x3fe00000, %eax - shrl $18, %eax - addl %edi, %eax - - movl PTR(PA_PTE_0)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x001ff000, %eax - shrl $9, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - /* identity map the control page at its physical address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0xc0000000, %eax - shrl $27, %eax - addl %edi, %eax - - movl PTR(PA_PMD_1)(%ebp), %edx - orl $PAE_PGD_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PMD_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x3fe00000, %eax - shrl $18, %eax - addl %edi, %eax - - movl PTR(PA_PTE_1)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x001ff000, %eax - shrl $9, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) -#else - /* map the control page at its virtual address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0xffc00000, %eax - shrl $20, %eax - addl %edi, %eax - - movl PTR(PA_PTE_0)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x003ff000, %eax - shrl $10, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - /* identity map the control page at its physical address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0xffc00000, %eax - shrl $20, %eax - addl %edi, %eax - - movl PTR(PA_PTE_1)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x003ff000, %eax - shrl $10, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) -#endif - -relocate_new_kernel: - /* read the arguments and say goodbye to the stack */ - movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* list of pages */ - movl 12(%esp), %edx /* start address */ - movl 16(%esp), %ecx /* cpu_has_pae */ - - /* zero out flags, and disable interrupts */ - pushl $0 - popfl - - /* get physical address of control page now */ - /* this is impossible after page table switch */ - movl PTR(PA_CONTROL_PAGE)(%ebp), %edi - - /* switch to new set of page tables */ - movl PTR(PA_PGD)(%ebp), %eax - movl %eax, %cr3 - - /* setup a new stack at the end of the physical control page */ - lea 4096(%edi), %esp - - /* jump to identity mapped page */ - movl %edi, %eax - addl $(identity_mapped - relocate_kernel), %eax - pushl %eax - ret - -identity_mapped: - /* store the start address on the stack */ - pushl %edx - - /* Set cr0 to a known state: - * 31 0 == Paging disabled - * 18 0 == Alignment check disabled - * 16 0 == Write protect disabled - * 3 0 == No task switch - * 2 0 == Don't do FP software emulation. - * 0 1 == Proctected mode enabled - */ - movl %cr0, %eax - andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax - orl $(1<<0), %eax - movl %eax, %cr0 - - /* clear cr4 if applicable */ - testl %ecx, %ecx - jz 1f - /* Set cr4 to a known state: - * Setting everything to zero seems safe. - */ - movl %cr4, %eax - andl $0, %eax - movl %eax, %cr4 - - jmp 1f -1: - - /* Flush the TLB (needed?) */ - xorl %eax, %eax - movl %eax, %cr3 - - /* Do the copies */ - movl %ebx, %ecx - jmp 1f - -0: /* top, read another word from the indirection page */ - movl (%ebx), %ecx - addl $4, %ebx -1: - testl $0x1, %ecx /* is it a destination page */ - jz 2f - movl %ecx, %edi - andl $0xfffff000, %edi - jmp 0b -2: - testl $0x2, %ecx /* is it an indirection page */ - jz 2f - movl %ecx, %ebx - andl $0xfffff000, %ebx - jmp 0b -2: - testl $0x4, %ecx /* is it the done indicator */ - jz 2f - jmp 3f -2: - testl $0x8, %ecx /* is it the source indicator */ - jz 0b /* Ignore it otherwise */ - movl %ecx, %esi /* For every source page do a copy */ - andl $0xfffff000, %esi - - movl $1024, %ecx - rep ; movsl - jmp 0b - -3: - - /* To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB, it's handy, and not processor dependent. - */ - xorl %eax, %eax - movl %eax, %cr3 - - /* set all of the registers to known values */ - /* leave %esp alone */ - - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorl %esi, %esi - xorl %edi, %edi - xorl %ebp, %ebp - ret diff --git a/arch/i386/kernel/scx200_32.c b/arch/i386/kernel/scx200_32.c deleted file mode 100644 index c7d3df23f58..00000000000 --- a/arch/i386/kernel/scx200_32.c +++ /dev/null @@ -1,131 +0,0 @@ -/* linux/arch/i386/kernel/scx200.c - - Copyright (c) 2001,2002 Christer Weinigel - - National Semiconductor SCx200 support. */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* Verify that the configuration block really is there */ -#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) - -#define NAME "scx200" - -MODULE_AUTHOR("Christer Weinigel "); -MODULE_DESCRIPTION("NatSemi SCx200 Driver"); -MODULE_LICENSE("GPL"); - -unsigned scx200_gpio_base = 0; -long scx200_gpio_shadow[2]; - -unsigned scx200_cb_base = 0; - -static struct pci_device_id scx200_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, - { }, -}; -MODULE_DEVICE_TABLE(pci,scx200_tbl); - -static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); - -static struct pci_driver scx200_pci_driver = { - .name = "scx200", - .id_table = scx200_tbl, - .probe = scx200_probe, -}; - -static DEFINE_MUTEX(scx200_gpio_config_lock); - -static void __devinit scx200_init_shadow(void) -{ - int bank; - - /* read the current values driven on the GPIO signals */ - for (bank = 0; bank < 2; ++bank) - scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); -} - -static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - unsigned base; - - if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || - pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { - base = pci_resource_start(pdev, 0); - printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); - - if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) { - printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); - return -EBUSY; - } - - scx200_gpio_base = base; - scx200_init_shadow(); - - } else { - /* find the base of the Configuration Block */ - if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { - scx200_cb_base = SCx200_CB_BASE_FIXED; - } else { - pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); - if (scx200_cb_probe(base)) { - scx200_cb_base = base; - } else { - printk(KERN_WARNING NAME ": Configuration Block not found\n"); - return -ENODEV; - } - } - printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); - } - - return 0; -} - -u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) -{ - u32 config, new_config; - - mutex_lock(&scx200_gpio_config_lock); - - outl(index, scx200_gpio_base + 0x20); - config = inl(scx200_gpio_base + 0x24); - - new_config = (config & mask) | bits; - outl(new_config, scx200_gpio_base + 0x24); - - mutex_unlock(&scx200_gpio_config_lock); - - return config; -} - -static int __init scx200_init(void) -{ - printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); - - return pci_register_driver(&scx200_pci_driver); -} - -static void __exit scx200_cleanup(void) -{ - pci_unregister_driver(&scx200_pci_driver); - release_region(scx200_gpio_base, SCx200_GPIO_SIZE); -} - -module_init(scx200_init); -module_exit(scx200_cleanup); - -EXPORT_SYMBOL(scx200_gpio_base); -EXPORT_SYMBOL(scx200_gpio_shadow); -EXPORT_SYMBOL(scx200_gpio_configure); -EXPORT_SYMBOL(scx200_cb_base); diff --git a/arch/i386/kernel/setup_32.c b/arch/i386/kernel/setup_32.c deleted file mode 100644 index d474cd639bc..00000000000 --- a/arch/i386/kernel/setup_32.c +++ /dev/null @@ -1,653 +0,0 @@ -/* - * linux/arch/i386/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons , July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle , February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel , March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach , December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include