From 937f961a6539b0ac5ebf31472b90810bc1f02200 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Oct 2010 10:16:59 +0200 Subject: x86: Move sfi to platform Signed-off-by: Thomas Gleixner Cc: Len Brown Cc: Alan Cox --- arch/x86/kernel/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2c833d8c414..d9067d10c6a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,6 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ -obj-$(CONFIG_SFI) += sfi.o obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o -- cgit v1.2.3-70-g09d2 From b17ed48040d9e8b6ae35bc492015bf0fe1c8bae4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Oct 2010 10:19:54 +0200 Subject: x86: Move efi to platform Signed-off-by: Thomas Gleixner Cc: Huang Ying --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/efi.c | 613 ------------------------------------ arch/x86/kernel/efi_32.c | 112 ------- arch/x86/kernel/efi_64.c | 114 ------- arch/x86/kernel/efi_stub_32.S | 123 -------- arch/x86/kernel/efi_stub_64.S | 116 ------- arch/x86/platform/Makefile | 1 + arch/x86/platform/efi/Makefile | 1 + arch/x86/platform/efi/efi.c | 613 ++++++++++++++++++++++++++++++++++++ arch/x86/platform/efi/efi_32.c | 112 +++++++ arch/x86/platform/efi/efi_64.c | 114 +++++++ arch/x86/platform/efi/efi_stub_32.S | 123 ++++++++ arch/x86/platform/efi/efi_stub_64.S | 116 +++++++ 13 files changed, 1080 insertions(+), 1079 deletions(-) delete mode 100644 arch/x86/kernel/efi.c delete mode 100644 arch/x86/kernel/efi_32.c delete mode 100644 arch/x86/kernel/efi_64.c delete mode 100644 arch/x86/kernel/efi_stub_32.S delete mode 100644 arch/x86/kernel/efi_stub_64.S create mode 100644 arch/x86/platform/efi/Makefile create mode 100644 arch/x86/platform/efi/efi.c create mode 100644 arch/x86/platform/efi/efi_32.c create mode 100644 arch/x86/platform/efi/efi_64.c create mode 100644 arch/x86/platform/efi/efi_stub_32.S create mode 100644 arch/x86/platform/efi/efi_stub_64.S (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d9067d10c6a..b01c7b1ac1b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -81,7 +81,6 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c deleted file mode 100644 index 0fe27d7c625..00000000000 --- a/arch/x86/kernel/efi.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Common EFI (Extensible Firmware Interface) support functions - * Based on Extensible Firmware Interface Specification version 1.0 - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2002 Hewlett-Packard Co. - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 2005-2008 Intel Co. - * Fenghua Yu - * Bibo Mao - * Chandramouli Narayanan - * Huang Ying - * - * Copied from efi_32.c to eliminate the duplicated code between EFI - * 32/64 support code. --ying 2007-10-26 - * - * All EFI Runtime Services are not implemented yet as EFI only - * supports physical mode addressing on SoftSDV. This is to be fixed - * in a future version. --drummond 1999-07-20 - * - * Implemented EFI runtime services and virtual mode calls. --davidm - * - * Goutham Rao: - * Skip non-WB memory and ignore empty memory ranges. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#define EFI_DEBUG 1 -#define PFX "EFI: " - -int efi_enabled; -EXPORT_SYMBOL(efi_enabled); - -struct efi efi; -EXPORT_SYMBOL(efi); - -struct efi_memory_map memmap; - -static struct efi efi_phys __initdata; -static efi_system_table_t efi_systab __initdata; - -static int __init setup_noefi(char *arg) -{ - efi_enabled = 0; - return 0; -} -early_param("noefi", setup_noefi); - -int add_efi_memmap; -EXPORT_SYMBOL(add_efi_memmap); - -static int __init setup_add_efi_memmap(char *arg) -{ - add_efi_memmap = 1; - return 0; -} -early_param("add_efi_memmap", setup_add_efi_memmap); - - -static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - return efi_call_virt2(get_time, tm, tc); -} - -static efi_status_t virt_efi_set_time(efi_time_t *tm) -{ - return efi_call_virt1(set_time, tm); -} - -static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) -{ - return efi_call_virt3(get_wakeup_time, - enabled, pending, tm); -} - -static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) -{ - return efi_call_virt2(set_wakeup_time, - enabled, tm); -} - -static efi_status_t virt_efi_get_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 *attr, - unsigned long *data_size, - void *data) -{ - return efi_call_virt5(get_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) -{ - return efi_call_virt3(get_next_variable, - name_size, name, vendor); -} - -static efi_status_t virt_efi_set_variable(efi_char16_t *name, - efi_guid_t *vendor, - unsigned long attr, - unsigned long data_size, - void *data) -{ - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); -} - -static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) -{ - return efi_call_virt1(get_next_high_mono_count, count); -} - -static void virt_efi_reset_system(int reset_type, - efi_status_t status, - unsigned long data_size, - efi_char16_t *data) -{ - efi_call_virt4(reset_system, reset_type, status, - data_size, data); -} - -static efi_status_t virt_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return efi_call_virt4(set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); -} - -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys4(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - efi_call_phys_epilog(); - return status; -} - -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, tm, tc); - efi_call_phys_epilog(); - return status; -} - -int efi_set_rtc_mmss(unsigned long nowtime) -{ - int real_seconds, real_minutes; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) { - printk(KERN_ERR "Oops: efitime: can't read time!\n"); - return -1; - } - - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - eft.minute) + 15)/30) & 1) - real_minutes += 30; - real_minutes %= 60; - eft.minute = real_minutes; - eft.second = real_seconds; - - status = efi.set_time(&eft); - if (status != EFI_SUCCESS) { - printk(KERN_ERR "Oops: efitime: can't write time!\n"); - return -1; - } - return 0; -} - -unsigned long efi_get_time(void) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) - printk(KERN_ERR "Oops: efitime: can't read time!\n"); - - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); -} - -/* - * Tell the kernel about the EFI memory map. This might include - * more than the max 128 entries that can fit in the e820 legacy - * (zeropage) memory map. - */ - -static void __init do_add_efi_memmap(void) -{ - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; - int e820_type; - - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - if (md->attribute & EFI_MEMORY_WB) - e820_type = E820_RAM; - else - e820_type = E820_RESERVED; - break; - case EFI_ACPI_RECLAIM_MEMORY: - e820_type = E820_ACPI; - break; - case EFI_ACPI_MEMORY_NVS: - e820_type = E820_NVS; - break; - case EFI_UNUSABLE_MEMORY: - e820_type = E820_UNUSABLE; - break; - default: - /* - * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE - * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO - * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE - */ - e820_type = E820_RESERVED; - break; - } - e820_add_region(start, size, e820_type); - } - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -} - -void __init efi_memblock_x86_reserve_range(void) -{ - unsigned long pmap; - -#ifdef CONFIG_X86_32 - pmap = boot_params.efi_info.efi_memmap; -#else - pmap = (boot_params.efi_info.efi_memmap | - ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); -#endif - memmap.phys_map = (void *)pmap; - memmap.nr_map = boot_params.efi_info.efi_memmap_size / - boot_params.efi_info.efi_memdesc_size; - memmap.desc_version = boot_params.efi_info.efi_memdesc_version; - memmap.desc_size = boot_params.efi_info.efi_memdesc_size; - memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, - "EFI memmap"); -} - -#if EFI_DEBUG -static void __init print_efi_memmap(void) -{ - efi_memory_desc_t *md; - void *p; - int i; - - for (p = memmap.map, i = 0; - p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", - i, md->type, md->attribute, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - (md->num_pages >> (20 - EFI_PAGE_SHIFT))); - } -} -#endif /* EFI_DEBUG */ - -void __init efi_init(void) -{ - efi_config_table_t *config_tables; - efi_runtime_services_t *runtime; - efi_char16_t *c16; - char vendor[100] = "unknown"; - int i = 0; - void *tmp; - -#ifdef CONFIG_X86_32 - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - - efi.systab = early_ioremap((unsigned long)efi_phys.systab, - sizeof(efi_system_table_t)); - if (efi.systab == NULL) - printk(KERN_ERR "Couldn't map the EFI system table!\n"); - memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t)); - early_iounmap(efi.systab, sizeof(efi_system_table_t)); - efi.systab = &efi_systab; - - /* - * Verify the EFI Table - */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - printk(KERN_ERR "EFI system table signature incorrect!\n"); - if ((efi.systab->hdr.revision >> 16) == 0) - printk(KERN_ERR "Warning: EFI system table version " - "%d.%02d, expected 1.00 or greater!\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* - * Show what we know for posterity - */ - c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); - if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; - vendor[i] = '\0'; - } else - printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); - early_iounmap(tmp, 2); - - printk(KERN_INFO "EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = early_ioremap( - efi.systab->tables, - efi.systab->nr_tables * sizeof(efi_config_table_t)); - if (config_tables == NULL) - printk(KERN_ERR "Could not map EFI Configuration Table!\n"); - - printk(KERN_INFO); - for (i = 0; i < efi.systab->nr_tables; i++) { - if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) { - efi.mps = config_tables[i].table; - printk(" MPS=0x%lx ", config_tables[i].table); - } else if (!efi_guidcmp(config_tables[i].guid, - ACPI_20_TABLE_GUID)) { - efi.acpi20 = config_tables[i].table; - printk(" ACPI 2.0=0x%lx ", config_tables[i].table); - } else if (!efi_guidcmp(config_tables[i].guid, - ACPI_TABLE_GUID)) { - efi.acpi = config_tables[i].table; - printk(" ACPI=0x%lx ", config_tables[i].table); - } else if (!efi_guidcmp(config_tables[i].guid, - SMBIOS_TABLE_GUID)) { - efi.smbios = config_tables[i].table; - printk(" SMBIOS=0x%lx ", config_tables[i].table); -#ifdef CONFIG_X86_UV - } else if (!efi_guidcmp(config_tables[i].guid, - UV_SYSTEM_TABLE_GUID)) { - efi.uv_systab = config_tables[i].table; - printk(" UVsystab=0x%lx ", config_tables[i].table); -#endif - } else if (!efi_guidcmp(config_tables[i].guid, - HCDP_TABLE_GUID)) { - efi.hcdp = config_tables[i].table; - printk(" HCDP=0x%lx ", config_tables[i].table); - } else if (!efi_guidcmp(config_tables[i].guid, - UGA_IO_PROTOCOL_GUID)) { - efi.uga = config_tables[i].table; - printk(" UGA=0x%lx ", config_tables[i].table); - } - } - printk("\n"); - early_iounmap(config_tables, - efi.systab->nr_tables * sizeof(efi_config_table_t)); - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - */ - runtime = early_ioremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_t)); - if (runtime != NULL) { - /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map - * is invoked. - */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; - } else - printk(KERN_ERR "Could not map the EFI runtime service " - "table!\n"); - early_iounmap(runtime, sizeof(efi_runtime_services_t)); - - /* Map the EFI memory map */ - memmap.map = early_ioremap((unsigned long)memmap.phys_map, - memmap.nr_map * memmap.desc_size); - if (memmap.map == NULL) - printk(KERN_ERR "Could not map the EFI memory map!\n"); - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); - - if (memmap.desc_size != sizeof(efi_memory_desc_t)) - printk(KERN_WARNING - "Kernel-defined memdesc doesn't match the one from EFI!\n"); - - if (add_efi_memmap) - do_add_efi_memmap(); - -#ifdef CONFIG_X86_32 - x86_platform.get_wallclock = efi_get_time; - x86_platform.set_wallclock = efi_set_rtc_mmss; -#endif - - /* Setup for EFI runtime service */ - reboot_type = BOOT_EFI; - -#if EFI_DEBUG - print_efi_memmap(); -#endif -} - -static void __init runtime_code_page_mkexec(void) -{ - efi_memory_desc_t *md; - void *p; - u64 addr, npages; - - /* Make EFI runtime service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if (md->type != EFI_RUNTIME_SERVICES_CODE) - continue; - - addr = md->virt_addr; - npages = md->num_pages; - memrange_efi_to_native(&addr, &npages); - set_memory_x(addr, npages); - } -} - -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ -void __init efi_enter_virtual_mode(void) -{ - efi_memory_desc_t *md; - efi_status_t status; - unsigned long size; - u64 end, systab, addr, npages, end_pfn; - void *p, *va; - - efi.systab = NULL; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - - size = md->num_pages << EFI_PAGE_SHIFT; - end = md->phys_addr + size; - - end_pfn = PFN_UP(end); - if (end_pfn <= max_low_pfn_mapped - || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) - va = __va(md->phys_addr); - else - va = efi_ioremap(md->phys_addr, size, md->type); - - md->virt_addr = (u64) (unsigned long) va; - - if (!va) { - printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; - } - - if (!(md->attribute & EFI_MEMORY_WB)) { - addr = md->virt_addr; - npages = md->num_pages; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); - } - - systab = (u64) (unsigned long) efi_phys.systab; - if (md->phys_addr <= systab && systab < end) { - systab += md->virt_addr - md->phys_addr; - efi.systab = (efi_system_table_t *) (unsigned long) systab; - } - } - - BUG_ON(!efi.systab); - - status = phys_efi_set_virtual_address_map( - memmap.desc_size * memmap.nr_map, - memmap.desc_size, - memmap.desc_version, - memmap.phys_map); - - if (status != EFI_SUCCESS) { - printk(KERN_ALERT "Unable to switch EFI into virtual mode " - "(status=%lx)!\n", status); - panic("EFI call to SetVirtualAddressMap() failed!"); - } - - /* - * Now that EFI is in virtual mode, update the function - * pointers in the runtime service table to the new virtual addresses. - * - * Call EFI services through wrapper functions. - */ - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; - efi.set_virtual_address_map = virt_efi_set_virtual_address_map; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; -} - -/* - * Convenience functions to obtain memory types and attributes - */ -u32 efi_mem_type(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && - (phys_addr < (md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT)))) - return md->type; - } - return 0; -} - -u64 efi_mem_attributes(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && - (phys_addr < (md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT)))) - return md->attribute; - } - return 0; -} diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c deleted file mode 100644 index 5cab48ee61a..00000000000 --- a/arch/x86/kernel/efi_32.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Extensible Firmware Interface - * - * Based on Extensible Firmware Interface Specification version 1.0 - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2002 Hewlett-Packard Co. - * David Mosberger-Tang - * Stephane Eranian - * - * All EFI Runtime Services are not implemented yet as EFI only - * supports physical mode addressing on SoftSDV. This is to be fixed - * in a future version. --drummond 1999-07-20 - * - * Implemented EFI runtime services and virtual mode calls. --davidm - * - * Goutham Rao: - * Skip non-WB memory and ignore empty memory ranges. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * To make EFI call EFI runtime service in physical addressing mode we need - * prelog/epilog before/after the invocation to disable interrupt, to - * claim EFI runtime service handler exclusively and to duplicate a memory in - * low memory space say 0 - 3G. - */ - -static unsigned long efi_rt_eflags; -static pgd_t efi_bak_pg_dir_pointer[2]; - -void efi_call_phys_prelog(void) -{ - unsigned long cr4; - unsigned long temp; - struct desc_ptr gdt_descr; - - local_irq_save(efi_rt_eflags); - - /* - * If I don't have PAE, I should just duplicate two entries in page - * directory. If I have PAE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4_safe(); - - if (cr4 & X86_CR4_PAE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - __flush_tlb_all(); - - gdt_descr.address = __pa(get_cpu_gdt_table(0)); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -} - -void efi_call_phys_epilog(void) -{ - unsigned long cr4; - struct desc_ptr gdt_descr; - - gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - - cr4 = read_cr4_safe(); - - if (cr4 & X86_CR4_PAE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - __flush_tlb_all(); - - local_irq_restore(efi_rt_eflags); -} diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c deleted file mode 100644 index ac0621a7ac3..00000000000 --- a/arch/x86/kernel/efi_64.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * x86_64 specific EFI support functions - * Based on Extensible Firmware Interface Specification version 1.0 - * - * Copyright (C) 2005-2008 Intel Co. - * Fenghua Yu - * Bibo Mao - * Chandramouli Narayanan - * Huang Ying - * - * Code to convert EFI to E820 map has been implemented in elilo bootloader - * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table - * is setup appropriately for EFI runtime code. - * - mouli 06/14/2007. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static pgd_t save_pgd __initdata; -static unsigned long efi_flags __initdata; - -static void __init early_mapping_set_exec(unsigned long start, - unsigned long end, - int executable) -{ - unsigned long num_pages; - - start &= PMD_MASK; - end = (end + PMD_SIZE - 1) & PMD_MASK; - num_pages = (end - start) >> PAGE_SHIFT; - if (executable) - set_memory_x((unsigned long)__va(start), num_pages); - else - set_memory_nx((unsigned long)__va(start), num_pages); -} - -static void __init early_runtime_code_mapping_set_exec(int executable) -{ - efi_memory_desc_t *md; - void *p; - - if (!(__supported_pte_mask & _PAGE_NX)) - return; - - /* Make EFI runtime service code area executable */ - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if (md->type == EFI_RUNTIME_SERVICES_CODE) { - unsigned long end; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - early_mapping_set_exec(md->phys_addr, end, executable); - } - } -} - -void __init efi_call_phys_prelog(void) -{ - unsigned long vaddress; - - early_runtime_code_mapping_set_exec(1); - local_irq_save(efi_flags); - vaddress = (unsigned long)__va(0x0UL); - save_pgd = *pgd_offset_k(0x0UL); - set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); - __flush_tlb_all(); -} - -void __init efi_call_phys_epilog(void) -{ - /* - * After the lock is released, the original page table is restored. - */ - set_pgd(pgd_offset_k(0x0UL), save_pgd); - __flush_tlb_all(); - local_irq_restore(efi_flags); - early_runtime_code_mapping_set_exec(0); -} - -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type) -{ - unsigned long last_map_pfn; - - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); - - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) - return NULL; - - return (void __iomem *)__va(phys_addr); -} diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S deleted file mode 100644 index fbe66e626c0..00000000000 --- a/arch/x86/kernel/efi_stub_32.S +++ /dev/null @@ -1,123 +0,0 @@ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. - */ - -#include -#include - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -ENTRY(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Now I am running with EIP = + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prelog and - * epilog. - */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx -1: - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ - movl %cr0, %edx - andl $0x7fffffff, %edx - movl %edx, %cr0 - jmp 1f -1: - - /* - * 4. Adjust stack pointer. - */ - subl $__PAGE_OFFSET, %esp - - /* - * 5. Call the physical function. - */ - jmp *%ecx - -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp - - /* - * 7. Restore PG bit - */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret -ENDPROC(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S deleted file mode 100644 index 4c07ccab814..00000000000 --- a/arch/x86/kernel/efi_stub_64.S +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Function calling ABI conversion from Linux to EFI for x86_64 - * - * Copyright (C) 2007 Intel Corp - * Bibo Mao - * Huang Ying - */ - -#include - -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp - -ENTRY(efi_call0) - SAVE_XMM - subq $32, %rsp - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call0) - -ENTRY(efi_call1) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call1) - -ENTRY(efi_call2) - SAVE_XMM - subq $32, %rsp - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call2) - -ENTRY(efi_call3) - SAVE_XMM - subq $32, %rsp - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call3) - -ENTRY(efi_call4) - SAVE_XMM - subq $32, %rsp - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $32, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call4) - -ENTRY(efi_call5) - SAVE_XMM - subq $48, %rsp - mov %r9, 32(%rsp) - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call5) - -ENTRY(efi_call6) - SAVE_XMM - mov (%rsp), %rax - mov 8(%rax), %rax - subq $48, %rsp - mov %r9, 32(%rsp) - mov %rax, 40(%rsp) - mov %r8, %r9 - mov %rcx, %r8 - mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - ret -ENDPROC(efi_call6) diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index a964fa3c086..99e95b32ae7 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,2 +1,3 @@ # Platform specific code goes here +obj-y += efi/ obj-y += sfi/ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile new file mode 100644 index 00000000000..73b8be0f367 --- /dev/null +++ b/arch/x86/platform/efi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c new file mode 100644 index 00000000000..0fe27d7c625 --- /dev/null +++ b/arch/x86/platform/efi/efi.c @@ -0,0 +1,613 @@ +/* + * Common EFI (Extensible Firmware Interface) support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu + * Bibo Mao + * Chandramouli Narayanan + * Huang Ying + * + * Copied from efi_32.c to eliminate the duplicated code between EFI + * 32/64 support code. --ying 2007-10-26 + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define EFI_DEBUG 1 +#define PFX "EFI: " + +int efi_enabled; +EXPORT_SYMBOL(efi_enabled); + +struct efi efi; +EXPORT_SYMBOL(efi); + +struct efi_memory_map memmap; + +static struct efi efi_phys __initdata; +static efi_system_table_t efi_systab __initdata; + +static int __init setup_noefi(char *arg) +{ + efi_enabled = 0; + return 0; +} +early_param("noefi", setup_noefi); + +int add_efi_memmap; +EXPORT_SYMBOL(add_efi_memmap); + +static int __init setup_add_efi_memmap(char *arg) +{ + add_efi_memmap = 1; + return 0; +} +early_param("add_efi_memmap", setup_add_efi_memmap); + + +static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + return efi_call_virt2(get_time, tm, tc); +} + +static efi_status_t virt_efi_set_time(efi_time_t *tm) +{ + return efi_call_virt1(set_time, tm); +} + +static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) +{ + return efi_call_virt3(get_wakeup_time, + enabled, pending, tm); +} + +static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + return efi_call_virt2(set_wakeup_time, + enabled, tm); +} + +static efi_status_t virt_efi_get_variable(efi_char16_t *name, + efi_guid_t *vendor, + u32 *attr, + unsigned long *data_size, + void *data) +{ + return efi_call_virt5(get_variable, + name, vendor, attr, + data_size, data); +} + +static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + return efi_call_virt3(get_next_variable, + name_size, name, vendor); +} + +static efi_status_t virt_efi_set_variable(efi_char16_t *name, + efi_guid_t *vendor, + unsigned long attr, + unsigned long data_size, + void *data) +{ + return efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); +} + +static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) +{ + return efi_call_virt1(get_next_high_mono_count, count); +} + +static void virt_efi_reset_system(int reset_type, + efi_status_t status, + unsigned long data_size, + efi_char16_t *data) +{ + efi_call_virt4(reset_system, reset_type, status, + data_size, data); +} + +static efi_status_t virt_efi_set_virtual_address_map( + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + return efi_call_virt4(set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); +} + +static efi_status_t __init phys_efi_set_virtual_address_map( + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + + efi_call_phys_prelog(); + status = efi_call_phys4(efi_phys.set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + efi_call_phys_epilog(); + return status; +} + +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, + efi_time_cap_t *tc) +{ + efi_status_t status; + + efi_call_phys_prelog(); + status = efi_call_phys2(efi_phys.get_time, tm, tc); + efi_call_phys_epilog(); + return status; +} + +int efi_set_rtc_mmss(unsigned long nowtime) +{ + int real_seconds, real_minutes; + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; + + status = efi.get_time(&eft, &cap); + if (status != EFI_SUCCESS) { + printk(KERN_ERR "Oops: efitime: can't read time!\n"); + return -1; + } + + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - eft.minute) + 15)/30) & 1) + real_minutes += 30; + real_minutes %= 60; + eft.minute = real_minutes; + eft.second = real_seconds; + + status = efi.set_time(&eft); + if (status != EFI_SUCCESS) { + printk(KERN_ERR "Oops: efitime: can't write time!\n"); + return -1; + } + return 0; +} + +unsigned long efi_get_time(void) +{ + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; + + status = efi.get_time(&eft, &cap); + if (status != EFI_SUCCESS) + printk(KERN_ERR "Oops: efitime: can't read time!\n"); + + return mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); +} + +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init do_add_efi_memmap(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + break; + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_ACPI; + break; + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_NVS; + break; + case EFI_UNUSABLE_MEMORY: + e820_type = E820_UNUSABLE; + break; + default: + /* + * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE + * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO + * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE + */ + e820_type = E820_RESERVED; + break; + } + e820_add_region(start, size, e820_type); + } + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + +void __init efi_memblock_x86_reserve_range(void) +{ + unsigned long pmap; + +#ifdef CONFIG_X86_32 + pmap = boot_params.efi_info.efi_memmap; +#else + pmap = (boot_params.efi_info.efi_memmap | + ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); +#endif + memmap.phys_map = (void *)pmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size / + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; + memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, + "EFI memmap"); +} + +#if EFI_DEBUG +static void __init print_efi_memmap(void) +{ + efi_memory_desc_t *md; + void *p; + int i; + + for (p = memmap.map, i = 0; + p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, " + "range=[0x%016llx-0x%016llx) (%lluMB)\n", + i, md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + (md->num_pages >> (20 - EFI_PAGE_SHIFT))); + } +} +#endif /* EFI_DEBUG */ + +void __init efi_init(void) +{ + efi_config_table_t *config_tables; + efi_runtime_services_t *runtime; + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i = 0; + void *tmp; + +#ifdef CONFIG_X86_32 + efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; +#else + efi_phys.systab = (efi_system_table_t *) + (boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi<<32)); +#endif + + efi.systab = early_ioremap((unsigned long)efi_phys.systab, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) + printk(KERN_ERR "Couldn't map the EFI system table!\n"); + memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t)); + early_iounmap(efi.systab, sizeof(efi_system_table_t)); + efi.systab = &efi_systab; + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + printk(KERN_ERR "EFI system table signature incorrect!\n"); + if ((efi.systab->hdr.revision >> 16) == 0) + printk(KERN_ERR "Warning: EFI system table version " + "%d.%02d, expected 1.00 or greater!\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* + * Show what we know for posterity + */ + c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); + if (c16) { + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } else + printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); + early_iounmap(tmp, 2); + + printk(KERN_INFO "EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = early_ioremap( + efi.systab->tables, + efi.systab->nr_tables * sizeof(efi_config_table_t)); + if (config_tables == NULL) + printk(KERN_ERR "Could not map EFI Configuration Table!\n"); + + printk(KERN_INFO); + for (i = 0; i < efi.systab->nr_tables; i++) { + if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) { + efi.mps = config_tables[i].table; + printk(" MPS=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + ACPI_20_TABLE_GUID)) { + efi.acpi20 = config_tables[i].table; + printk(" ACPI 2.0=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + ACPI_TABLE_GUID)) { + efi.acpi = config_tables[i].table; + printk(" ACPI=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + SMBIOS_TABLE_GUID)) { + efi.smbios = config_tables[i].table; + printk(" SMBIOS=0x%lx ", config_tables[i].table); +#ifdef CONFIG_X86_UV + } else if (!efi_guidcmp(config_tables[i].guid, + UV_SYSTEM_TABLE_GUID)) { + efi.uv_systab = config_tables[i].table; + printk(" UVsystab=0x%lx ", config_tables[i].table); +#endif + } else if (!efi_guidcmp(config_tables[i].guid, + HCDP_TABLE_GUID)) { + efi.hcdp = config_tables[i].table; + printk(" HCDP=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + UGA_IO_PROTOCOL_GUID)) { + efi.uga = config_tables[i].table; + printk(" UGA=0x%lx ", config_tables[i].table); + } + } + printk("\n"); + early_iounmap(config_tables, + efi.systab->nr_tables * sizeof(efi_config_table_t)); + + /* + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. + */ + runtime = early_ioremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_t)); + if (runtime != NULL) { + /* + * We will only need *early* access to the following + * two EFI runtime services before set_virtual_address_map + * is invoked. + */ + efi_phys.get_time = (efi_get_time_t *)runtime->get_time; + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + runtime->set_virtual_address_map; + /* + * Make efi_get_time can be called before entering + * virtual mode. + */ + efi.get_time = phys_efi_get_time; + } else + printk(KERN_ERR "Could not map the EFI runtime service " + "table!\n"); + early_iounmap(runtime, sizeof(efi_runtime_services_t)); + + /* Map the EFI memory map */ + memmap.map = early_ioremap((unsigned long)memmap.phys_map, + memmap.nr_map * memmap.desc_size); + if (memmap.map == NULL) + printk(KERN_ERR "Could not map the EFI memory map!\n"); + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + + if (memmap.desc_size != sizeof(efi_memory_desc_t)) + printk(KERN_WARNING + "Kernel-defined memdesc doesn't match the one from EFI!\n"); + + if (add_efi_memmap) + do_add_efi_memmap(); + +#ifdef CONFIG_X86_32 + x86_platform.get_wallclock = efi_get_time; + x86_platform.set_wallclock = efi_set_rtc_mmss; +#endif + + /* Setup for EFI runtime service */ + reboot_type = BOOT_EFI; + +#if EFI_DEBUG + print_efi_memmap(); +#endif +} + +static void __init runtime_code_page_mkexec(void) +{ + efi_memory_desc_t *md; + void *p; + u64 addr, npages; + + /* Make EFI runtime service code area executable */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); + } +} + +/* + * This function will switch the EFI runtime services to virtual mode. + * Essentially, look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor and update + * that memory descriptor with the virtual address obtained from ioremap(). + * This enables the runtime services to be called without having to + * thunk back into physical mode for every invocation. + */ +void __init efi_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + efi_status_t status; + unsigned long size; + u64 end, systab, addr, npages, end_pfn; + void *p, *va; + + efi.systab = NULL; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + + end_pfn = PFN_UP(end); + if (end_pfn <= max_low_pfn_mapped + || (end_pfn > (1UL << (32 - PAGE_SHIFT)) + && end_pfn <= max_pfn_mapped)) + va = __va(md->phys_addr); + else + va = efi_ioremap(md->phys_addr, size, md->type); + + md->virt_addr = (u64) (unsigned long) va; + + if (!va) { + printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); + continue; + } + + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } + + systab = (u64) (unsigned long) efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *) (unsigned long) systab; + } + } + + BUG_ON(!efi.systab); + + status = phys_efi_set_virtual_address_map( + memmap.desc_size * memmap.nr_map, + memmap.desc_size, + memmap.desc_version, + memmap.phys_map); + + if (status != EFI_SUCCESS) { + printk(KERN_ALERT "Unable to switch EFI into virtual mode " + "(status=%lx)!\n", status); + panic("EFI call to SetVirtualAddressMap() failed!"); + } + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.set_virtual_address_map = virt_efi_set_virtual_address_map; + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); + early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + memmap.map = NULL; +} + +/* + * Convenience functions to obtain memory types and attributes + */ +u32 efi_mem_type(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if ((md->phys_addr <= phys_addr) && + (phys_addr < (md->phys_addr + + (md->num_pages << EFI_PAGE_SHIFT)))) + return md->type; + } + return 0; +} + +u64 efi_mem_attributes(unsigned long phys_addr) +{ + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if ((md->phys_addr <= phys_addr) && + (phys_addr < (md->phys_addr + + (md->num_pages << EFI_PAGE_SHIFT)))) + return md->attribute; + } + return 0; +} diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c new file mode 100644 index 00000000000..5cab48ee61a --- /dev/null +++ b/arch/x86/platform/efi/efi_32.c @@ -0,0 +1,112 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2002 Hewlett-Packard Co. + * David Mosberger-Tang + * Stephane Eranian + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: + * Skip non-WB memory and ignore empty memory ranges. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * To make EFI call EFI runtime service in physical addressing mode we need + * prelog/epilog before/after the invocation to disable interrupt, to + * claim EFI runtime service handler exclusively and to duplicate a memory in + * low memory space say 0 - 3G. + */ + +static unsigned long efi_rt_eflags; +static pgd_t efi_bak_pg_dir_pointer[2]; + +void efi_call_phys_prelog(void) +{ + unsigned long cr4; + unsigned long temp; + struct desc_ptr gdt_descr; + + local_irq_save(efi_rt_eflags); + + /* + * If I don't have PAE, I should just duplicate two entries in page + * directory. If I have PAE, I just need to duplicate one entry in + * page directory. + */ + cr4 = read_cr4_safe(); + + if (cr4 & X86_CR4_PAE) { + efi_bak_pg_dir_pointer[0].pgd = + swapper_pg_dir[pgd_index(0)].pgd; + swapper_pg_dir[0].pgd = + swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; + } else { + efi_bak_pg_dir_pointer[0].pgd = + swapper_pg_dir[pgd_index(0)].pgd; + efi_bak_pg_dir_pointer[1].pgd = + swapper_pg_dir[pgd_index(0x400000)].pgd; + swapper_pg_dir[pgd_index(0)].pgd = + swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; + temp = PAGE_OFFSET + 0x400000; + swapper_pg_dir[pgd_index(0x400000)].pgd = + swapper_pg_dir[pgd_index(temp)].pgd; + } + + /* + * After the lock is released, the original page table is restored. + */ + __flush_tlb_all(); + + gdt_descr.address = __pa(get_cpu_gdt_table(0)); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +} + +void efi_call_phys_epilog(void) +{ + unsigned long cr4; + struct desc_ptr gdt_descr; + + gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + + cr4 = read_cr4_safe(); + + if (cr4 & X86_CR4_PAE) { + swapper_pg_dir[pgd_index(0)].pgd = + efi_bak_pg_dir_pointer[0].pgd; + } else { + swapper_pg_dir[pgd_index(0)].pgd = + efi_bak_pg_dir_pointer[0].pgd; + swapper_pg_dir[pgd_index(0x400000)].pgd = + efi_bak_pg_dir_pointer[1].pgd; + } + + /* + * After the lock is released, the original page table is restored. + */ + __flush_tlb_all(); + + local_irq_restore(efi_rt_eflags); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c new file mode 100644 index 00000000000..ac0621a7ac3 --- /dev/null +++ b/arch/x86/platform/efi/efi_64.c @@ -0,0 +1,114 @@ +/* + * x86_64 specific EFI support functions + * Based on Extensible Firmware Interface Specification version 1.0 + * + * Copyright (C) 2005-2008 Intel Co. + * Fenghua Yu + * Bibo Mao + * Chandramouli Narayanan + * Huang Ying + * + * Code to convert EFI to E820 map has been implemented in elilo bootloader + * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table + * is setup appropriately for EFI runtime code. + * - mouli 06/14/2007. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static pgd_t save_pgd __initdata; +static unsigned long efi_flags __initdata; + +static void __init early_mapping_set_exec(unsigned long start, + unsigned long end, + int executable) +{ + unsigned long num_pages; + + start &= PMD_MASK; + end = (end + PMD_SIZE - 1) & PMD_MASK; + num_pages = (end - start) >> PAGE_SHIFT; + if (executable) + set_memory_x((unsigned long)__va(start), num_pages); + else + set_memory_nx((unsigned long)__va(start), num_pages); +} + +static void __init early_runtime_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + void *p; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI runtime service code area executable */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (md->type == EFI_RUNTIME_SERVICES_CODE) { + unsigned long end; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + early_mapping_set_exec(md->phys_addr, end, executable); + } + } +} + +void __init efi_call_phys_prelog(void) +{ + unsigned long vaddress; + + early_runtime_code_mapping_set_exec(1); + local_irq_save(efi_flags); + vaddress = (unsigned long)__va(0x0UL); + save_pgd = *pgd_offset_k(0x0UL); + set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); + __flush_tlb_all(); +} + +void __init efi_call_phys_epilog(void) +{ + /* + * After the lock is released, the original page table is restored. + */ + set_pgd(pgd_offset_k(0x0UL), save_pgd); + __flush_tlb_all(); + local_irq_restore(efi_flags); + early_runtime_code_mapping_set_exec(0); +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) + return NULL; + + return (void __iomem *)__va(phys_addr); +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S new file mode 100644 index 00000000000..fbe66e626c0 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -0,0 +1,123 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. + */ + +#include +#include + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Now I am running with EIP = + PAGE_OFFSET. + * But to make it smoothly switch from virtual mode to flat mode. + * The mapping of lower virtual memory has been created in prelog and + * epilog. + */ + movl $1f, %edx + subl $__PAGE_OFFSET, %edx + jmp *%edx +1: + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %edx + movl %edx, saved_return_addr + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr + movl $2f, %edx + subl $__PAGE_OFFSET, %edx + pushl %edx + + /* + * 3. Clear PG bit in %CR0. + */ + movl %cr0, %edx + andl $0x7fffffff, %edx + movl %edx, %cr0 + jmp 1f +1: + + /* + * 4. Adjust stack pointer. + */ + subl $__PAGE_OFFSET, %esp + + /* + * 5. Call the physical function. + */ + jmp *%ecx + +2: + /* + * 6. After EFI runtime service returns, control will return to + * following instruction. We'd better readjust stack pointer first. + */ + addl $__PAGE_OFFSET, %esp + + /* + * 7. Restore PG bit + */ + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f +1: + /* + * 8. Now restore the virtual mode from flat mode by + * adding EIP with PAGE_OFFSET. + */ + movl $1f, %edx + jmp *%edx +1: + + /* + * 9. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. + */ + leal efi_rt_function_ptr, %edx + movl (%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + leal saved_return_addr, %edx + movl (%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S new file mode 100644 index 00000000000..4c07ccab814 --- /dev/null +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -0,0 +1,116 @@ +/* + * Function calling ABI conversion from Linux to EFI for x86_64 + * + * Copyright (C) 2007 Intel Corp + * Bibo Mao + * Huang Ying + */ + +#include + +#define SAVE_XMM \ + mov %rsp, %rax; \ + subq $0x70, %rsp; \ + and $~0xf, %rsp; \ + mov %rax, (%rsp); \ + mov %cr0, %rax; \ + clts; \ + mov %rax, 0x8(%rsp); \ + movaps %xmm0, 0x60(%rsp); \ + movaps %xmm1, 0x50(%rsp); \ + movaps %xmm2, 0x40(%rsp); \ + movaps %xmm3, 0x30(%rsp); \ + movaps %xmm4, 0x20(%rsp); \ + movaps %xmm5, 0x10(%rsp) + +#define RESTORE_XMM \ + movaps 0x60(%rsp), %xmm0; \ + movaps 0x50(%rsp), %xmm1; \ + movaps 0x40(%rsp), %xmm2; \ + movaps 0x30(%rsp), %xmm3; \ + movaps 0x20(%rsp), %xmm4; \ + movaps 0x10(%rsp), %xmm5; \ + mov 0x8(%rsp), %rsi; \ + mov %rsi, %cr0; \ + mov (%rsp), %rsp + +ENTRY(efi_call0) + SAVE_XMM + subq $32, %rsp + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call0) + +ENTRY(efi_call1) + SAVE_XMM + subq $32, %rsp + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call1) + +ENTRY(efi_call2) + SAVE_XMM + subq $32, %rsp + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call2) + +ENTRY(efi_call3) + SAVE_XMM + subq $32, %rsp + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call3) + +ENTRY(efi_call4) + SAVE_XMM + subq $32, %rsp + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $32, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call4) + +ENTRY(efi_call5) + SAVE_XMM + subq $48, %rsp + mov %r9, 32(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $48, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call5) + +ENTRY(efi_call6) + SAVE_XMM + mov (%rsp), %rax + mov 8(%rax), %rax + subq $48, %rsp + mov %r9, 32(%rsp) + mov %rax, 40(%rsp) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx + call *%rdi + addq $48, %rsp + RESTORE_XMM + ret +ENDPROC(efi_call6) -- cgit v1.2.3-70-g09d2 From c4e72ad6bbbbbf1f826df3a5d3e3c4af2f4d48c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Oct 2010 10:33:09 +0200 Subject: x86: Move visws to platform Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/visws_quirks.c | 614 --------------------------------- arch/x86/platform/Makefile | 1 + arch/x86/platform/visws/Makefile | 1 + arch/x86/platform/visws/visws_quirks.c | 614 +++++++++++++++++++++++++++++++++ 5 files changed, 616 insertions(+), 615 deletions(-) delete mode 100644 arch/x86/kernel/visws_quirks.c create mode 100644 arch/x86/platform/visws/Makefile create mode 100644 arch/x86/platform/visws/visws_quirks.c (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b01c7b1ac1b..28c4f3f2e97 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,6 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o -obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c deleted file mode 100644 index 3371bd053b8..00000000000 --- a/arch/x86/kernel/visws_quirks.c +++ /dev/null @@ -1,614 +0,0 @@ -/* - * SGI Visual Workstation support and quirks, unmaintained. - * - * Split out from setup.c by davej@suse.de - * - * Copyright (C) 1999 Bent Hagemark, Ingo Molnar - * - * SGI Visual Workstation interrupt controller - * - * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC - * which serves as the main interrupt controller in the system. Non-legacy - * hardware in the system uses this controller directly. Legacy devices - * are connected to the PIIX4 which in turn has its 8259(s) connected to - * a of the Cobalt APIC entry. - * - * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com - * - * 25/11/2002 - Updated for 2.5 by Andrey Panin - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include - -extern int no_broadcast; - -char visws_board_type = -1; -char visws_board_rev = -1; - -static void __init visws_time_init(void) -{ - printk(KERN_INFO "Starting Cobalt Timer system clock\n"); - - /* Set the countdown value */ - co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); - - /* Start the timer */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); - - /* Enable (unmask) the timer interrupt */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); - - setup_default_timer_irq(); -} - -/* Replaces the default init_ISA_irqs in the generic setup */ -static void __init visws_pre_intr_init(void); - -/* Quirk for machine specific memory setup. */ - -#define MB (1024 * 1024) - -unsigned long sgivwfb_mem_phys; -unsigned long sgivwfb_mem_size; -EXPORT_SYMBOL(sgivwfb_mem_phys); -EXPORT_SYMBOL(sgivwfb_mem_size); - -long long mem_size __initdata = 0; - -static char * __init visws_memory_setup(void) -{ - long long gfx_mem_size = 8 * MB; - - mem_size = boot_params.alt_mem_k; - - if (!mem_size) { - printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); - mem_size = 128 * MB; - } - - /* - * this hardcodes the graphics memory to 8 MB - * it really should be sized dynamically (or at least - * set as a boot param) - */ - if (!sgivwfb_mem_size) { - printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); - sgivwfb_mem_size = 8 * MB; - } - - /* - * Trim to nearest MB - */ - sgivwfb_mem_size &= ~((1 << 20) - 1); - sgivwfb_mem_phys = mem_size - gfx_mem_size; - - e820_add_region(0, LOWMEMSIZE(), E820_RAM); - e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); - - return "PROM"; -} - -static void visws_machine_emergency_restart(void) -{ - /* - * Visual Workstations restart after this - * register is poked on the PIIX4 - */ - outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); -} - -static void visws_machine_power_off(void) -{ - unsigned short pm_status; -/* extern unsigned int pci_bus0; */ - - while ((pm_status = inw(PMSTS_PORT)) & 0x100) - outw(pm_status, PMSTS_PORT); - - outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); - - mdelay(10); - -#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) - -/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ - outl(PIIX_SPECIAL_STOP, 0xCFC); -} - -static void __init visws_get_smp_config(unsigned int early) -{ -} - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesn't have a BIOS(-configuration table). - * No problem for Linux. - */ - -static void __init MP_processor_info(struct mpc_cpu *m) -{ - int ver, logical_apicid; - physid_mask_t apic_cpus; - - if (!(m->cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->apicid; - printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", - m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); - - if (m->cpuflag & CPU_BOOTPROCESSOR) - boot_cpu_physical_apicid = m->apicid; - - ver = m->apicver; - if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", - m->apicid, MAX_APICS); - return; - } - - apic->apicid_to_cpu_present(m->apicid, &apic_cpus); - physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->apicid); - ver = 0x10; - } - apic_version[m->apicid] = ver; -} - -static void __init visws_find_smp_config(void) -{ - struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); - unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); - - if (ncpus > CO_CPU_MAX) { - printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", - ncpus, mp); - - ncpus = CO_CPU_MAX; - } - - if (ncpus > setup_max_cpus) - ncpus = setup_max_cpus; - -#ifdef CONFIG_X86_LOCAL_APIC - smp_found_config = 1; -#endif - while (ncpus--) - MP_processor_info(mp++); - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -static void visws_trap_init(void); - -void __init visws_early_detect(void) -{ - int raw; - - visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) - >> PIIX_GPI_BD_SHIFT; - - if (visws_board_type < 0) - return; - - /* - * Override the default platform setup functions - */ - x86_init.resources.memory_setup = visws_memory_setup; - x86_init.mpparse.get_smp_config = visws_get_smp_config; - x86_init.mpparse.find_smp_config = visws_find_smp_config; - x86_init.irqs.pre_vector_init = visws_pre_intr_init; - x86_init.irqs.trap_init = visws_trap_init; - x86_init.timers.timer_init = visws_time_init; - x86_init.pci.init = pci_visws_init; - x86_init.pci.init_irq = x86_init_noop; - - /* - * Install reboot quirks: - */ - pm_power_off = visws_machine_power_off; - machine_ops.emergency_restart = visws_machine_emergency_restart; - - /* - * Do not use broadcast IPIs: - */ - no_broadcast = 0; - -#ifdef CONFIG_X86_IO_APIC - /* - * Turn off IO-APIC detection and initialization: - */ - skip_ioapic_setup = 1; -#endif - - /* - * Get Board rev. - * First, we have to initialize the 307 part to allow us access - * to the GPIO registers. Let's map them at 0x0fc0 which is right - * after the PIIX4 PM section. - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable GPIO registers. */ - - /* - * Now, we have to map the power management section to write - * a bit which enables access to the GPIO registers. - * What lunatic came up with this shit? - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable PM registers. */ - - /* - * Now, write the PM register which enables the GPIO registers. - */ - outb_p(SIO_PM_FER2, SIO_PM_INDEX); - outb_p(SIO_PM_GP_EN, SIO_PM_DATA); - - /* - * Now, initialize the GPIO registers. - * We want them all to be inputs which is the - * power on default, so let's leave them alone. - * So, let's just read the board rev! - */ - raw = inb_p(SIO_GP_DATA1); - raw &= 0x7f; /* 7 bits of valid board revision ID. */ - - if (visws_board_type == VISWS_320) { - if (raw < 0x6) { - visws_board_rev = 4; - } else if (raw < 0xc) { - visws_board_rev = 5; - } else { - visws_board_rev = 6; - } - } else if (visws_board_type == VISWS_540) { - visws_board_rev = 2; - } else { - visws_board_rev = raw; - } - - printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", - (visws_board_type == VISWS_320 ? "320" : - (visws_board_type == VISWS_540 ? "540" : - "unknown")), visws_board_rev); -} - -#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) -#define BCD (LI_INTB | LI_INTC | LI_INTD) -#define ALLDEVS (A01234 | BCD) - -static __init void lithium_init(void) -{ - set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); - set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); - - if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); -/* panic("This machine is not SGI Visual Workstation 320/540"); */ - } - - li_pcia_write16(LI_PCI_INTEN, ALLDEVS); - li_pcib_write16(LI_PCI_INTEN, ALLDEVS); -} - -static __init void cobalt_init(void) -{ - /* - * On normal SMP PC this is used only with SMP, but we have to - * use it and set it up here to start the Cobalt clock - */ - set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - setup_local_APIC(); - printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", - (unsigned int)apic_read(APIC_LVR), - (unsigned int)apic_read(APIC_ID)); - - set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); - set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); - printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", - co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); - - /* Enable Cobalt APIC being careful to NOT change the ID! */ - co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); - - printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", - co_apic_read(CO_APIC_ID)); -} - -static void __init visws_trap_init(void) -{ - lithium_init(); - cobalt_init(); -} - -/* - * IRQ controller / APIC support: - */ - -static DEFINE_SPINLOCK(cobalt_lock); - -/* - * Set the given Cobalt APIC Redirection Table entry to point - * to the given IDT vector/index. - */ -static inline void co_apic_set(int entry, int irq) -{ - co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); - co_apic_write(CO_APIC_HI(entry), 0); -} - -/* - * Cobalt (IO)-APIC functions to handle PCI devices. - */ -static inline int co_apic_ide0_hack(void) -{ - extern char visws_board_type; - extern char visws_board_rev; - - if (visws_board_type == VISWS_320 && visws_board_rev == 5) - return 5; - return CO_APIC_IDE0; -} - -static int is_co_apic(unsigned int irq) -{ - if (IS_CO_APIC(irq)) - return CO_APIC(irq); - - switch (irq) { - case 0: return CO_APIC_CPU; - case CO_IRQ_IDE0: return co_apic_ide0_hack(); - case CO_IRQ_IDE1: return CO_APIC_IDE1; - default: return -1; - } -} - - -/* - * This is the SGI Cobalt (IO-)APIC: - */ -static void enable_cobalt_irq(struct irq_data *data) -{ - co_apic_set(is_co_apic(data->irq), data->irq); -} - -static void disable_cobalt_irq(struct irq_data *data) -{ - int entry = is_co_apic(data->irq); - - co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); - co_apic_read(CO_APIC_LO(entry)); -} - -static void ack_cobalt_irq(struct irq_data *data) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - disable_cobalt_irq(data); - apic_write(APIC_EOI, APIC_EIO_ACK); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip cobalt_irq_type = { - .name = "Cobalt-APIC", - .irq_enable = enable_cobalt_irq, - .irq_disable = disable_cobalt_irq, - .irq_ack = ack_cobalt_irq, -}; - - -/* - * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt - * -- not the manner expected by the code in i8259.c. - * - * there is a 'master' physical interrupt source that gets sent to - * the CPU. But in the chipset there are various 'virtual' interrupts - * waiting to be handled. We represent this to Linux through a 'master' - * interrupt controller type, and through a special virtual interrupt- - * controller. Device drivers only see the virtual interrupt sources. - */ -static unsigned int startup_piix4_master_irq(struct irq_data *data) -{ - legacy_pic->init(0); - enable_cobalt_irq(data); -} - -static void end_piix4_master_irq(struct irq_data *data) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - enable_cobalt_irq(data); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip piix4_master_irq_type = { - .name = "PIIX4-master", - .irq_startup = startup_piix4_master_irq, - .irq_ack = ack_cobalt_irq, -}; - -static void pii4_mask(struct irq_data *data) { } - -static struct irq_chip piix4_virtual_irq_type = { - .name = "PIIX4-virtual", - .mask = pii4_mask, -}; - -/* - * PIIX4-8259 master/virtual functions to handle interrupt requests - * from legacy devices: floppy, parallel, serial, rtc. - * - * None of these get Cobalt APIC entries, neither do they have IDT - * entries. These interrupts are purely virtual and distributed from - * the 'master' interrupt source: CO_IRQ_8259. - * - * When the 8259 interrupts its handler figures out which of these - * devices is interrupting and dispatches to its handler. - * - * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ - * enable_irq gets the right irq. This 'master' irq is never directly - * manipulated by any driver. - */ -static irqreturn_t piix4_master_intr(int irq, void *dev_id) -{ - unsigned long flags; - int realirq; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - - /* Find out what's interrupting in the PIIX4 master 8259 */ - outb(0x0c, 0x20); /* OCW3 Poll command */ - realirq = inb(0x20); - - /* - * Bit 7 == 0 means invalid/spurious - */ - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq &= 7; - - if (unlikely(realirq == 2)) { - outb(0x0c, 0xa0); - realirq = inb(0xa0); - - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq = (realirq & 7) + 8; - } - - /* mask and ack interrupt */ - cached_irq_mask |= 1 << realirq; - if (unlikely(realirq > 7)) { - inb(0xa1); - outb(cached_slave_mask, 0xa1); - outb(0x60 + (realirq & 7), 0xa0); - outb(0x60 + 2, 0x20); - } else { - inb(0x21); - outb(cached_master_mask, 0x21); - outb(0x60 + realirq, 0x20); - } - - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - - /* - * handle this 'virtual interrupt' as a Cobalt one now. - */ - generic_handle_irq(realirq); - - return IRQ_HANDLED; - -out_unlock: - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - return IRQ_NONE; -} - -static struct irqaction master_action = { - .handler = piix4_master_intr, - .name = "PIIX4-8259", -}; - -static struct irqaction cascade_action = { - .handler = no_action, - .name = "cascade", -}; - -static inline void set_piix4_virtual_irq_type(void) -{ - piix4_virtual_irq_type.enable = i8259A_chip.unmask; - piix4_virtual_irq_type.disable = i8259A_chip.mask; - piix4_virtual_irq_type.unmask = i8259A_chip.unmask; -} - -static void __init visws_pre_intr_init(void) -{ - int i; - - set_piix4_virtual_irq_type(); - - for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - struct irq_chip *chip = NULL; - - if (i == 0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE0) - chip = &cobalt_irq_type; - else if (i == CO_IRQ_IDE1) - >chip = &cobalt_irq_type; - else if (i == CO_IRQ_8259) - chip = &piix4_master_irq_type; - else if (i < CO_IRQ_APIC0) - chip = &piix4_virtual_irq_type; - else if (IS_CO_APIC(i)) - chip = &cobalt_irq_type; - - if (chip) - set_irq_chip(i, chip); - } - - setup_irq(CO_IRQ_8259, &master_action); - setup_irq(2, &cascade_action); -} diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 99e95b32ae7..e629d7a428c 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,3 +1,4 @@ # Platform specific code goes here obj-y += efi/ obj-y += sfi/ +obj-y += visws/ diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile new file mode 100644 index 00000000000..91bc17ab2fd --- /dev/null +++ b/arch/x86/platform/visws/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c new file mode 100644 index 00000000000..3371bd053b8 --- /dev/null +++ b/arch/x86/platform/visws/visws_quirks.c @@ -0,0 +1,614 @@ +/* + * SGI Visual Workstation support and quirks, unmaintained. + * + * Split out from setup.c by davej@suse.de + * + * Copyright (C) 1999 Bent Hagemark, Ingo Molnar + * + * SGI Visual Workstation interrupt controller + * + * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC + * which serves as the main interrupt controller in the system. Non-legacy + * hardware in the system uses this controller directly. Legacy devices + * are connected to the PIIX4 which in turn has its 8259(s) connected to + * a of the Cobalt APIC entry. + * + * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com + * + * 25/11/2002 - Updated for 2.5 by Andrey Panin + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +extern int no_broadcast; + +char visws_board_type = -1; +char visws_board_rev = -1; + +static void __init visws_time_init(void) +{ + printk(KERN_INFO "Starting Cobalt Timer system clock\n"); + + /* Set the countdown value */ + co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); + + /* Start the timer */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); + + /* Enable (unmask) the timer interrupt */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); + + setup_default_timer_irq(); +} + +/* Replaces the default init_ISA_irqs in the generic setup */ +static void __init visws_pre_intr_init(void); + +/* Quirk for machine specific memory setup. */ + +#define MB (1024 * 1024) + +unsigned long sgivwfb_mem_phys; +unsigned long sgivwfb_mem_size; +EXPORT_SYMBOL(sgivwfb_mem_phys); +EXPORT_SYMBOL(sgivwfb_mem_size); + +long long mem_size __initdata = 0; + +static char * __init visws_memory_setup(void) +{ + long long gfx_mem_size = 8 * MB; + + mem_size = boot_params.alt_mem_k; + + if (!mem_size) { + printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); + mem_size = 128 * MB; + } + + /* + * this hardcodes the graphics memory to 8 MB + * it really should be sized dynamically (or at least + * set as a boot param) + */ + if (!sgivwfb_mem_size) { + printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); + sgivwfb_mem_size = 8 * MB; + } + + /* + * Trim to nearest MB + */ + sgivwfb_mem_size &= ~((1 << 20) - 1); + sgivwfb_mem_phys = mem_size - gfx_mem_size; + + e820_add_region(0, LOWMEMSIZE(), E820_RAM); + e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); + e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); + + return "PROM"; +} + +static void visws_machine_emergency_restart(void) +{ + /* + * Visual Workstations restart after this + * register is poked on the PIIX4 + */ + outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); +} + +static void visws_machine_power_off(void) +{ + unsigned short pm_status; +/* extern unsigned int pci_bus0; */ + + while ((pm_status = inw(PMSTS_PORT)) & 0x100) + outw(pm_status, PMSTS_PORT); + + outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); + + mdelay(10); + +#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + +/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ + outl(PIIX_SPECIAL_STOP, 0xCFC); +} + +static void __init visws_get_smp_config(unsigned int early) +{ +} + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesn't have a BIOS(-configuration table). + * No problem for Linux. + */ + +static void __init MP_processor_info(struct mpc_cpu *m) +{ + int ver, logical_apicid; + physid_mask_t apic_cpus; + + if (!(m->cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->apicid; + printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", + m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", + m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, + (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); + + if (m->cpuflag & CPU_BOOTPROCESSOR) + boot_cpu_physical_apicid = m->apicid; + + ver = m->apicver; + if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { + printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", + m->apicid, MAX_APICS); + return; + } + + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); + physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); + /* + * Validate version + */ + if (ver == 0x0) { + printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + m->apicid); + ver = 0x10; + } + apic_version[m->apicid] = ver; +} + +static void __init visws_find_smp_config(void) +{ + struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); + unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); + + if (ncpus > CO_CPU_MAX) { + printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", + ncpus, mp); + + ncpus = CO_CPU_MAX; + } + + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; + +#ifdef CONFIG_X86_LOCAL_APIC + smp_found_config = 1; +#endif + while (ncpus--) + MP_processor_info(mp++); + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +static void visws_trap_init(void); + +void __init visws_early_detect(void) +{ + int raw; + + visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) + >> PIIX_GPI_BD_SHIFT; + + if (visws_board_type < 0) + return; + + /* + * Override the default platform setup functions + */ + x86_init.resources.memory_setup = visws_memory_setup; + x86_init.mpparse.get_smp_config = visws_get_smp_config; + x86_init.mpparse.find_smp_config = visws_find_smp_config; + x86_init.irqs.pre_vector_init = visws_pre_intr_init; + x86_init.irqs.trap_init = visws_trap_init; + x86_init.timers.timer_init = visws_time_init; + x86_init.pci.init = pci_visws_init; + x86_init.pci.init_irq = x86_init_noop; + + /* + * Install reboot quirks: + */ + pm_power_off = visws_machine_power_off; + machine_ops.emergency_restart = visws_machine_emergency_restart; + + /* + * Do not use broadcast IPIs: + */ + no_broadcast = 0; + +#ifdef CONFIG_X86_IO_APIC + /* + * Turn off IO-APIC detection and initialization: + */ + skip_ioapic_setup = 1; +#endif + + /* + * Get Board rev. + * First, we have to initialize the 307 part to allow us access + * to the GPIO registers. Let's map them at 0x0fc0 which is right + * after the PIIX4 PM section. + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable GPIO registers. */ + + /* + * Now, we have to map the power management section to write + * a bit which enables access to the GPIO registers. + * What lunatic came up with this shit? + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable PM registers. */ + + /* + * Now, write the PM register which enables the GPIO registers. + */ + outb_p(SIO_PM_FER2, SIO_PM_INDEX); + outb_p(SIO_PM_GP_EN, SIO_PM_DATA); + + /* + * Now, initialize the GPIO registers. + * We want them all to be inputs which is the + * power on default, so let's leave them alone. + * So, let's just read the board rev! + */ + raw = inb_p(SIO_GP_DATA1); + raw &= 0x7f; /* 7 bits of valid board revision ID. */ + + if (visws_board_type == VISWS_320) { + if (raw < 0x6) { + visws_board_rev = 4; + } else if (raw < 0xc) { + visws_board_rev = 5; + } else { + visws_board_rev = 6; + } + } else if (visws_board_type == VISWS_540) { + visws_board_rev = 2; + } else { + visws_board_rev = raw; + } + + printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", + (visws_board_type == VISWS_320 ? "320" : + (visws_board_type == VISWS_540 ? "540" : + "unknown")), visws_board_rev); +} + +#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) +#define BCD (LI_INTB | LI_INTC | LI_INTD) +#define ALLDEVS (A01234 | BCD) + +static __init void lithium_init(void) +{ + set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); + set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); + + if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); +/* panic("This machine is not SGI Visual Workstation 320/540"); */ + } + + if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); +/* panic("This machine is not SGI Visual Workstation 320/540"); */ + } + + li_pcia_write16(LI_PCI_INTEN, ALLDEVS); + li_pcib_write16(LI_PCI_INTEN, ALLDEVS); +} + +static __init void cobalt_init(void) +{ + /* + * On normal SMP PC this is used only with SMP, but we have to + * use it and set it up here to start the Cobalt clock + */ + set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); + setup_local_APIC(); + printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", + (unsigned int)apic_read(APIC_LVR), + (unsigned int)apic_read(APIC_ID)); + + set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); + set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); + printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", + co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); + + /* Enable Cobalt APIC being careful to NOT change the ID! */ + co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); + + printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", + co_apic_read(CO_APIC_ID)); +} + +static void __init visws_trap_init(void) +{ + lithium_init(); + cobalt_init(); +} + +/* + * IRQ controller / APIC support: + */ + +static DEFINE_SPINLOCK(cobalt_lock); + +/* + * Set the given Cobalt APIC Redirection Table entry to point + * to the given IDT vector/index. + */ +static inline void co_apic_set(int entry, int irq) +{ + co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); + co_apic_write(CO_APIC_HI(entry), 0); +} + +/* + * Cobalt (IO)-APIC functions to handle PCI devices. + */ +static inline int co_apic_ide0_hack(void) +{ + extern char visws_board_type; + extern char visws_board_rev; + + if (visws_board_type == VISWS_320 && visws_board_rev == 5) + return 5; + return CO_APIC_IDE0; +} + +static int is_co_apic(unsigned int irq) +{ + if (IS_CO_APIC(irq)) + return CO_APIC(irq); + + switch (irq) { + case 0: return CO_APIC_CPU; + case CO_IRQ_IDE0: return co_apic_ide0_hack(); + case CO_IRQ_IDE1: return CO_APIC_IDE1; + default: return -1; + } +} + + +/* + * This is the SGI Cobalt (IO-)APIC: + */ +static void enable_cobalt_irq(struct irq_data *data) +{ + co_apic_set(is_co_apic(data->irq), data->irq); +} + +static void disable_cobalt_irq(struct irq_data *data) +{ + int entry = is_co_apic(data->irq); + + co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); + co_apic_read(CO_APIC_LO(entry)); +} + +static void ack_cobalt_irq(struct irq_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + disable_cobalt_irq(data); + apic_write(APIC_EOI, APIC_EIO_ACK); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static struct irq_chip cobalt_irq_type = { + .name = "Cobalt-APIC", + .irq_enable = enable_cobalt_irq, + .irq_disable = disable_cobalt_irq, + .irq_ack = ack_cobalt_irq, +}; + + +/* + * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt + * -- not the manner expected by the code in i8259.c. + * + * there is a 'master' physical interrupt source that gets sent to + * the CPU. But in the chipset there are various 'virtual' interrupts + * waiting to be handled. We represent this to Linux through a 'master' + * interrupt controller type, and through a special virtual interrupt- + * controller. Device drivers only see the virtual interrupt sources. + */ +static unsigned int startup_piix4_master_irq(struct irq_data *data) +{ + legacy_pic->init(0); + enable_cobalt_irq(data); +} + +static void end_piix4_master_irq(struct irq_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + enable_cobalt_irq(data); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static struct irq_chip piix4_master_irq_type = { + .name = "PIIX4-master", + .irq_startup = startup_piix4_master_irq, + .irq_ack = ack_cobalt_irq, +}; + +static void pii4_mask(struct irq_data *data) { } + +static struct irq_chip piix4_virtual_irq_type = { + .name = "PIIX4-virtual", + .mask = pii4_mask, +}; + +/* + * PIIX4-8259 master/virtual functions to handle interrupt requests + * from legacy devices: floppy, parallel, serial, rtc. + * + * None of these get Cobalt APIC entries, neither do they have IDT + * entries. These interrupts are purely virtual and distributed from + * the 'master' interrupt source: CO_IRQ_8259. + * + * When the 8259 interrupts its handler figures out which of these + * devices is interrupting and dispatches to its handler. + * + * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ + * enable_irq gets the right irq. This 'master' irq is never directly + * manipulated by any driver. + */ +static irqreturn_t piix4_master_intr(int irq, void *dev_id) +{ + unsigned long flags; + int realirq; + + raw_spin_lock_irqsave(&i8259A_lock, flags); + + /* Find out what's interrupting in the PIIX4 master 8259 */ + outb(0x0c, 0x20); /* OCW3 Poll command */ + realirq = inb(0x20); + + /* + * Bit 7 == 0 means invalid/spurious + */ + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq &= 7; + + if (unlikely(realirq == 2)) { + outb(0x0c, 0xa0); + realirq = inb(0xa0); + + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq = (realirq & 7) + 8; + } + + /* mask and ack interrupt */ + cached_irq_mask |= 1 << realirq; + if (unlikely(realirq > 7)) { + inb(0xa1); + outb(cached_slave_mask, 0xa1); + outb(0x60 + (realirq & 7), 0xa0); + outb(0x60 + 2, 0x20); + } else { + inb(0x21); + outb(cached_master_mask, 0x21); + outb(0x60 + realirq, 0x20); + } + + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + + /* + * handle this 'virtual interrupt' as a Cobalt one now. + */ + generic_handle_irq(realirq); + + return IRQ_HANDLED; + +out_unlock: + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return IRQ_NONE; +} + +static struct irqaction master_action = { + .handler = piix4_master_intr, + .name = "PIIX4-8259", +}; + +static struct irqaction cascade_action = { + .handler = no_action, + .name = "cascade", +}; + +static inline void set_piix4_virtual_irq_type(void) +{ + piix4_virtual_irq_type.enable = i8259A_chip.unmask; + piix4_virtual_irq_type.disable = i8259A_chip.mask; + piix4_virtual_irq_type.unmask = i8259A_chip.unmask; +} + +static void __init visws_pre_intr_init(void) +{ + int i; + + set_piix4_virtual_irq_type(); + + for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { + struct irq_chip *chip = NULL; + + if (i == 0) + chip = &cobalt_irq_type; + else if (i == CO_IRQ_IDE0) + chip = &cobalt_irq_type; + else if (i == CO_IRQ_IDE1) + >chip = &cobalt_irq_type; + else if (i == CO_IRQ_8259) + chip = &piix4_master_irq_type; + else if (i < CO_IRQ_APIC0) + chip = &piix4_virtual_irq_type; + else if (IS_CO_APIC(i)) + chip = &cobalt_irq_type; + + if (chip) + set_irq_chip(i, chip); + } + + setup_irq(CO_IRQ_8259, &master_action); + setup_irq(2, &cascade_action); +} -- cgit v1.2.3-70-g09d2 From 3b3da9d25ae9d8cac99302ad66834499cf324d08 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Oct 2010 10:35:51 +0200 Subject: x86: Move scx200 to platform Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 3 - arch/x86/kernel/scx200_32.c | 131 ----------------------------------- arch/x86/platform/Makefile | 1 + arch/x86/platform/scx200/Makefile | 2 + arch/x86/platform/scx200/scx200_32.c | 131 +++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 134 deletions(-) delete mode 100644 arch/x86/kernel/scx200_32.c create mode 100644 arch/x86/platform/scx200/Makefile create mode 100644 arch/x86/platform/scx200/scx200_32.c (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 28c4f3f2e97..f57eeea4bc1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -101,9 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o -obj-$(CONFIG_SCx200) += scx200.o -scx200-y += scx200_32.o - obj-$(CONFIG_OLPC) += olpc.o obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o diff --git a/arch/x86/kernel/scx200_32.c b/arch/x86/kernel/scx200_32.c deleted file mode 100644 index 7e004acbe52..00000000000 --- a/arch/x86/kernel/scx200_32.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2001,2002 Christer Weinigel - * - * National Semiconductor SCx200 support. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* Verify that the configuration block really is there */ -#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) - -#define NAME "scx200" - -MODULE_AUTHOR("Christer Weinigel "); -MODULE_DESCRIPTION("NatSemi SCx200 Driver"); -MODULE_LICENSE("GPL"); - -unsigned scx200_gpio_base = 0; -unsigned long scx200_gpio_shadow[2]; - -unsigned scx200_cb_base = 0; - -static struct pci_device_id scx200_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, - { }, -}; -MODULE_DEVICE_TABLE(pci,scx200_tbl); - -static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); - -static struct pci_driver scx200_pci_driver = { - .name = "scx200", - .id_table = scx200_tbl, - .probe = scx200_probe, -}; - -static DEFINE_MUTEX(scx200_gpio_config_lock); - -static void __devinit scx200_init_shadow(void) -{ - int bank; - - /* read the current values driven on the GPIO signals */ - for (bank = 0; bank < 2; ++bank) - scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); -} - -static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - unsigned base; - - if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || - pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { - base = pci_resource_start(pdev, 0); - printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); - - if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) { - printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); - return -EBUSY; - } - - scx200_gpio_base = base; - scx200_init_shadow(); - - } else { - /* find the base of the Configuration Block */ - if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { - scx200_cb_base = SCx200_CB_BASE_FIXED; - } else { - pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); - if (scx200_cb_probe(base)) { - scx200_cb_base = base; - } else { - printk(KERN_WARNING NAME ": Configuration Block not found\n"); - return -ENODEV; - } - } - printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); - } - - return 0; -} - -u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) -{ - u32 config, new_config; - - mutex_lock(&scx200_gpio_config_lock); - - outl(index, scx200_gpio_base + 0x20); - config = inl(scx200_gpio_base + 0x24); - - new_config = (config & mask) | bits; - outl(new_config, scx200_gpio_base + 0x24); - - mutex_unlock(&scx200_gpio_config_lock); - - return config; -} - -static int __init scx200_init(void) -{ - printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); - - return pci_register_driver(&scx200_pci_driver); -} - -static void __exit scx200_cleanup(void) -{ - pci_unregister_driver(&scx200_pci_driver); - release_region(scx200_gpio_base, SCx200_GPIO_SIZE); -} - -module_init(scx200_init); -module_exit(scx200_cleanup); - -EXPORT_SYMBOL(scx200_gpio_base); -EXPORT_SYMBOL(scx200_gpio_shadow); -EXPORT_SYMBOL(scx200_gpio_configure); -EXPORT_SYMBOL(scx200_cb_base); diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index e629d7a428c..1191989e9f0 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,4 +1,5 @@ # Platform specific code goes here obj-y += efi/ +obj-y += scx200/ obj-y += sfi/ obj-y += visws/ diff --git a/arch/x86/platform/scx200/Makefile b/arch/x86/platform/scx200/Makefile new file mode 100644 index 00000000000..762b4c7f431 --- /dev/null +++ b/arch/x86/platform/scx200/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_SCx200) += scx200.o +scx200-y += scx200_32.o diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c new file mode 100644 index 00000000000..7e004acbe52 --- /dev/null +++ b/arch/x86/platform/scx200/scx200_32.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2001,2002 Christer Weinigel + * + * National Semiconductor SCx200 support. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Verify that the configuration block really is there */ +#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) + +#define NAME "scx200" + +MODULE_AUTHOR("Christer Weinigel "); +MODULE_DESCRIPTION("NatSemi SCx200 Driver"); +MODULE_LICENSE("GPL"); + +unsigned scx200_gpio_base = 0; +unsigned long scx200_gpio_shadow[2]; + +unsigned scx200_cb_base = 0; + +static struct pci_device_id scx200_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, + { }, +}; +MODULE_DEVICE_TABLE(pci,scx200_tbl); + +static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); + +static struct pci_driver scx200_pci_driver = { + .name = "scx200", + .id_table = scx200_tbl, + .probe = scx200_probe, +}; + +static DEFINE_MUTEX(scx200_gpio_config_lock); + +static void __devinit scx200_init_shadow(void) +{ + int bank; + + /* read the current values driven on the GPIO signals */ + for (bank = 0; bank < 2; ++bank) + scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); +} + +static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + unsigned base; + + if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || + pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { + base = pci_resource_start(pdev, 0); + printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); + + if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) { + printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); + return -EBUSY; + } + + scx200_gpio_base = base; + scx200_init_shadow(); + + } else { + /* find the base of the Configuration Block */ + if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { + scx200_cb_base = SCx200_CB_BASE_FIXED; + } else { + pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); + if (scx200_cb_probe(base)) { + scx200_cb_base = base; + } else { + printk(KERN_WARNING NAME ": Configuration Block not found\n"); + return -ENODEV; + } + } + printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); + } + + return 0; +} + +u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) +{ + u32 config, new_config; + + mutex_lock(&scx200_gpio_config_lock); + + outl(index, scx200_gpio_base + 0x20); + config = inl(scx200_gpio_base + 0x24); + + new_config = (config & mask) | bits; + outl(new_config, scx200_gpio_base + 0x24); + + mutex_unlock(&scx200_gpio_config_lock); + + return config; +} + +static int __init scx200_init(void) +{ + printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); + + return pci_register_driver(&scx200_pci_driver); +} + +static void __exit scx200_cleanup(void) +{ + pci_unregister_driver(&scx200_pci_driver); + release_region(scx200_gpio_base, SCx200_GPIO_SIZE); +} + +module_init(scx200_init); +module_exit(scx200_cleanup); + +EXPORT_SYMBOL(scx200_gpio_base); +EXPORT_SYMBOL(scx200_gpio_shadow); +EXPORT_SYMBOL(scx200_gpio_configure); +EXPORT_SYMBOL(scx200_cb_base); -- cgit v1.2.3-70-g09d2 From 9694d4afc1ebe1e46cacfb78b107cd8f9fb550ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Oct 2010 10:38:13 +0200 Subject: x86: Move mrst to platform Signed-off-by: Thomas Gleixner Cc: Jacob Pan Cc: Alan Cox --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/mrst.c | 311 ---------------------------------------- arch/x86/platform/Makefile | 1 + arch/x86/platform/mrst/Makefile | 1 + arch/x86/platform/mrst/mrst.c | 311 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 313 insertions(+), 312 deletions(-) delete mode 100644 arch/x86/kernel/mrst.c create mode 100644 arch/x86/platform/mrst/Makefile create mode 100644 arch/x86/platform/mrst/mrst.c (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f57eeea4bc1..4e1f862dd68 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -104,7 +104,6 @@ obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_OLPC) += olpc.o obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o -obj-$(CONFIG_X86_MRST) += mrst.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c deleted file mode 100644 index 79ae68154e8..00000000000 --- a/arch/x86/kernel/mrst.c +++ /dev/null @@ -1,311 +0,0 @@ -/* - * mrst.c: Intel Moorestown platform specific setup code - * - * (C) Copyright 2008 Intel Corporation - * Author: Jacob Pan (jacob.jun.pan@intel.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, - * cmdline option x86_mrst_timer can be used to override the configuration - * to prefer one or the other. - * at runtime, there are basically three timer configurations: - * 1. per cpu apbt clock only - * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only - * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. - * - * by default (without cmdline option), platform code first detects cpu type - * to see if we are on lincroft or penwell, then set up both lapic or apbt - * clocks accordingly. - * i.e. by default, medfield uses configuration #2, moorestown uses #1. - * config #3 is supported but not recommended on medfield. - * - * rating and feature summary: - * lapic (with C3STOP) --------- 100 - * apbt (always-on) ------------ 110 - * lapic (always-on,ARAT) ------ 150 - */ - -__cpuinitdata enum mrst_timer_options mrst_timer_options; - -static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; -static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; -enum mrst_cpu_type __mrst_cpu_chip; -EXPORT_SYMBOL_GPL(__mrst_cpu_chip); - -int sfi_mtimer_num; - -struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; -EXPORT_SYMBOL_GPL(sfi_mrtc_array); -int sfi_mrtc_num; - -static inline void assign_to_mp_irq(struct mpc_intsrc *m, - struct mpc_intsrc *mp_irq) -{ - memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); -} - -static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq, - struct mpc_intsrc *m) -{ - return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); -} - -static void save_mp_irq(struct mpc_intsrc *m) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - if (!mp_irq_cmp(&mp_irqs[i], m)) - return; - } - - assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -/* parse all the mtimer info to a static mtimer array */ -static int __init sfi_parse_mtmr(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_timer_table_entry *pentry; - struct mpc_intsrc mp_irq; - int totallen; - - sb = (struct sfi_table_simple *)table; - if (!sfi_mtimer_num) { - sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, - struct sfi_timer_table_entry); - pentry = (struct sfi_timer_table_entry *) sb->pentry; - totallen = sfi_mtimer_num * sizeof(*pentry); - memcpy(sfi_mtimer_array, pentry, totallen); - } - - printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); - pentry = sfi_mtimer_array; - for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { - printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," - " irq = %d\n", totallen, (u32)pentry->phys_addr, - pentry->freq_hz, pentry->irq); - if (!pentry->irq) - continue; - mp_irq.type = MP_IOAPIC; - mp_irq.irqtype = mp_INT; -/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; - mp_irq.srcbus = 0; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - save_mp_irq(&mp_irq); - } - - return 0; -} - -struct sfi_timer_table_entry *sfi_get_mtmr(int hint) -{ - int i; - if (hint < sfi_mtimer_num) { - if (!sfi_mtimer_usage[hint]) { - pr_debug("hint taken for timer %d irq %d\n",\ - hint, sfi_mtimer_array[hint].irq); - sfi_mtimer_usage[hint] = 1; - return &sfi_mtimer_array[hint]; - } - } - /* take the first timer available */ - for (i = 0; i < sfi_mtimer_num;) { - if (!sfi_mtimer_usage[i]) { - sfi_mtimer_usage[i] = 1; - return &sfi_mtimer_array[i]; - } - i++; - } - return NULL; -} - -void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) -{ - int i; - for (i = 0; i < sfi_mtimer_num;) { - if (mtmr->irq == sfi_mtimer_array[i].irq) { - sfi_mtimer_usage[i] = 0; - return; - } - i++; - } -} - -/* parse all the mrtc info to a global mrtc array */ -int __init sfi_parse_mrtc(struct sfi_table_header *table) -{ - struct sfi_table_simple *sb; - struct sfi_rtc_table_entry *pentry; - struct mpc_intsrc mp_irq; - - int totallen; - - sb = (struct sfi_table_simple *)table; - if (!sfi_mrtc_num) { - sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, - struct sfi_rtc_table_entry); - pentry = (struct sfi_rtc_table_entry *)sb->pentry; - totallen = sfi_mrtc_num * sizeof(*pentry); - memcpy(sfi_mrtc_array, pentry, totallen); - } - - printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); - pentry = sfi_mrtc_array; - for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { - printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", - totallen, (u32)pentry->phys_addr, pentry->irq); - mp_irq.type = MP_IOAPIC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = 0; - mp_irq.srcbus = 0; - mp_irq.srcbusirq = pentry->irq; /* IRQ */ - mp_irq.dstapic = MP_APIC_ALL; - mp_irq.dstirq = pentry->irq; - save_mp_irq(&mp_irq); - } - return 0; -} - -static unsigned long __init mrst_calibrate_tsc(void) -{ - unsigned long flags, fast_calibrate; - - local_irq_save(flags); - fast_calibrate = apbt_quick_calibrate(); - local_irq_restore(flags); - - if (fast_calibrate) - return fast_calibrate; - - return 0; -} - -void __init mrst_time_init(void) -{ - switch (mrst_timer_options) { - case MRST_TIMER_APBT_ONLY: - break; - case MRST_TIMER_LAPIC_APBT: - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; - x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - break; - default: - if (!boot_cpu_has(X86_FEATURE_ARAT)) - break; - x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; - x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; - return; - } - /* we need at least one APB timer */ - sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); - pre_init_apic_IRQ0(); - apbt_time_init(); -} - -void __init mrst_rtc_init(void) -{ - sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); -} - -void __cpuinit mrst_arch_setup(void) -{ - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) - __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; - else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26) - __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; - else { - pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); - __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; - } - pr_debug("Moorestown CPU %s identified\n", - (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ? - "Lincroft" : "Penwell"); -} - -/* MID systems don't have i8042 controller */ -static int mrst_i8042_detect(void) -{ - return 0; -} - -/* - * Moorestown specific x86_init function overrides and early setup - * calls. - */ -void __init x86_mrst_early_setup(void) -{ - x86_init.resources.probe_roms = x86_init_noop; - x86_init.resources.reserve_resources = x86_init_noop; - - x86_init.timers.timer_init = mrst_time_init; - x86_init.timers.setup_percpu_clockev = x86_init_noop; - - x86_init.irqs.pre_vector_init = x86_init_noop; - - x86_init.oem.arch_setup = mrst_arch_setup; - - x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; - - x86_platform.calibrate_tsc = mrst_calibrate_tsc; - x86_platform.i8042_detect = mrst_i8042_detect; - x86_init.pci.init = pci_mrst_init; - x86_init.pci.fixup_irqs = x86_init_noop; - - legacy_pic = &null_legacy_pic; - - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - -} - -/* - * if user does not want to use per CPU apb timer, just give it a lower rating - * than local apic timer and skip the late per cpu timer init. - */ -static inline int __init setup_x86_mrst_timer(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp("apbt_only", arg) == 0) - mrst_timer_options = MRST_TIMER_APBT_ONLY; - else if (strcmp("lapic_and_apbt", arg) == 0) - mrst_timer_options = MRST_TIMER_LAPIC_APBT; - else { - pr_warning("X86 MRST timer option %s not recognised" - " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", - arg); - return -EINVAL; - } - return 0; -} -__setup("x86_mrst_timer=", setup_x86_mrst_timer); diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 1191989e9f0..06761ed5374 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,5 +1,6 @@ # Platform specific code goes here obj-y += efi/ +obj-y += mrst/ obj-y += scx200/ obj-y += sfi/ obj-y += visws/ diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile new file mode 100644 index 00000000000..efbbc552fa9 --- /dev/null +++ b/arch/x86/platform/mrst/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_MRST) += mrst.o diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c new file mode 100644 index 00000000000..79ae68154e8 --- /dev/null +++ b/arch/x86/platform/mrst/mrst.c @@ -0,0 +1,311 @@ +/* + * mrst.c: Intel Moorestown platform specific setup code + * + * (C) Copyright 2008 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, + * cmdline option x86_mrst_timer can be used to override the configuration + * to prefer one or the other. + * at runtime, there are basically three timer configurations: + * 1. per cpu apbt clock only + * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only + * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast. + * + * by default (without cmdline option), platform code first detects cpu type + * to see if we are on lincroft or penwell, then set up both lapic or apbt + * clocks accordingly. + * i.e. by default, medfield uses configuration #2, moorestown uses #1. + * config #3 is supported but not recommended on medfield. + * + * rating and feature summary: + * lapic (with C3STOP) --------- 100 + * apbt (always-on) ------------ 110 + * lapic (always-on,ARAT) ------ 150 + */ + +__cpuinitdata enum mrst_timer_options mrst_timer_options; + +static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; +static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; +enum mrst_cpu_type __mrst_cpu_chip; +EXPORT_SYMBOL_GPL(__mrst_cpu_chip); + +int sfi_mtimer_num; + +struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; +EXPORT_SYMBOL_GPL(sfi_mrtc_array); +int sfi_mrtc_num; + +static inline void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) +{ + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); +} + +static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) +{ + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); +} + +static void save_mp_irq(struct mpc_intsrc *m) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + if (!mp_irq_cmp(&mp_irqs[i], m)) + return; + } + + assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +/* parse all the mtimer info to a static mtimer array */ +static int __init sfi_parse_mtmr(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_timer_table_entry *pentry; + struct mpc_intsrc mp_irq; + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mtimer_num) { + sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_timer_table_entry); + pentry = (struct sfi_timer_table_entry *) sb->pentry; + totallen = sfi_mtimer_num * sizeof(*pentry); + memcpy(sfi_mtimer_array, pentry, totallen); + } + + printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); + pentry = sfi_mtimer_array; + for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { + printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," + " irq = %d\n", totallen, (u32)pentry->phys_addr, + pentry->freq_hz, pentry->irq); + if (!pentry->irq) + continue; + mp_irq.type = MP_IOAPIC; + mp_irq.irqtype = mp_INT; +/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ + mp_irq.irqflag = 5; + mp_irq.srcbus = 0; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + save_mp_irq(&mp_irq); + } + + return 0; +} + +struct sfi_timer_table_entry *sfi_get_mtmr(int hint) +{ + int i; + if (hint < sfi_mtimer_num) { + if (!sfi_mtimer_usage[hint]) { + pr_debug("hint taken for timer %d irq %d\n",\ + hint, sfi_mtimer_array[hint].irq); + sfi_mtimer_usage[hint] = 1; + return &sfi_mtimer_array[hint]; + } + } + /* take the first timer available */ + for (i = 0; i < sfi_mtimer_num;) { + if (!sfi_mtimer_usage[i]) { + sfi_mtimer_usage[i] = 1; + return &sfi_mtimer_array[i]; + } + i++; + } + return NULL; +} + +void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) +{ + int i; + for (i = 0; i < sfi_mtimer_num;) { + if (mtmr->irq == sfi_mtimer_array[i].irq) { + sfi_mtimer_usage[i] = 0; + return; + } + i++; + } +} + +/* parse all the mrtc info to a global mrtc array */ +int __init sfi_parse_mrtc(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + struct sfi_rtc_table_entry *pentry; + struct mpc_intsrc mp_irq; + + int totallen; + + sb = (struct sfi_table_simple *)table; + if (!sfi_mrtc_num) { + sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, + struct sfi_rtc_table_entry); + pentry = (struct sfi_rtc_table_entry *)sb->pentry; + totallen = sfi_mrtc_num * sizeof(*pentry); + memcpy(sfi_mrtc_array, pentry, totallen); + } + + printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); + pentry = sfi_mrtc_array; + for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { + printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", + totallen, (u32)pentry->phys_addr, pentry->irq); + mp_irq.type = MP_IOAPIC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = 0; + mp_irq.srcbus = 0; + mp_irq.srcbusirq = pentry->irq; /* IRQ */ + mp_irq.dstapic = MP_APIC_ALL; + mp_irq.dstirq = pentry->irq; + save_mp_irq(&mp_irq); + } + return 0; +} + +static unsigned long __init mrst_calibrate_tsc(void) +{ + unsigned long flags, fast_calibrate; + + local_irq_save(flags); + fast_calibrate = apbt_quick_calibrate(); + local_irq_restore(flags); + + if (fast_calibrate) + return fast_calibrate; + + return 0; +} + +void __init mrst_time_init(void) +{ + switch (mrst_timer_options) { + case MRST_TIMER_APBT_ONLY: + break; + case MRST_TIMER_LAPIC_APBT: + x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; + break; + default: + if (!boot_cpu_has(X86_FEATURE_ARAT)) + break; + x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock; + x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock; + return; + } + /* we need at least one APB timer */ + sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); + pre_init_apic_IRQ0(); + apbt_time_init(); +} + +void __init mrst_rtc_init(void) +{ + sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); +} + +void __cpuinit mrst_arch_setup(void) +{ + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) + __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL; + else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26) + __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; + else { + pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT; + } + pr_debug("Moorestown CPU %s identified\n", + (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ? + "Lincroft" : "Penwell"); +} + +/* MID systems don't have i8042 controller */ +static int mrst_i8042_detect(void) +{ + return 0; +} + +/* + * Moorestown specific x86_init function overrides and early setup + * calls. + */ +void __init x86_mrst_early_setup(void) +{ + x86_init.resources.probe_roms = x86_init_noop; + x86_init.resources.reserve_resources = x86_init_noop; + + x86_init.timers.timer_init = mrst_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + + x86_init.irqs.pre_vector_init = x86_init_noop; + + x86_init.oem.arch_setup = mrst_arch_setup; + + x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; + + x86_platform.calibrate_tsc = mrst_calibrate_tsc; + x86_platform.i8042_detect = mrst_i8042_detect; + x86_init.pci.init = pci_mrst_init; + x86_init.pci.fixup_irqs = x86_init_noop; + + legacy_pic = &null_legacy_pic; + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + +} + +/* + * if user does not want to use per CPU apb timer, just give it a lower rating + * than local apic timer and skip the late per cpu timer init. + */ +static inline int __init setup_x86_mrst_timer(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp("apbt_only", arg) == 0) + mrst_timer_options = MRST_TIMER_APBT_ONLY; + else if (strcmp("lapic_and_apbt", arg) == 0) + mrst_timer_options = MRST_TIMER_LAPIC_APBT; + else { + pr_warning("X86 MRST timer option %s not recognised" + " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", + arg); + return -EINVAL; + } + return 0; +} +__setup("x86_mrst_timer=", setup_x86_mrst_timer); -- cgit v1.2.3-70-g09d2 From 329b84e42e3ee348b114fd0bfe4b2421e6139257 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Oct 2010 11:23:37 +0200 Subject: x86: Move uv to platform Signed-off-by: Thomas Gleixner Cc: Mike Travis --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/bios_uv.c | 215 ----- arch/x86/kernel/tlb_uv.c | 1661 --------------------------------------- arch/x86/kernel/uv_irq.c | 285 ------- arch/x86/kernel/uv_sysfs.c | 76 -- arch/x86/kernel/uv_time.c | 423 ---------- arch/x86/platform/Makefile | 1 + arch/x86/platform/uv/Makefile | 1 + arch/x86/platform/uv/bios_uv.c | 215 +++++ arch/x86/platform/uv/tlb_uv.c | 1661 +++++++++++++++++++++++++++++++++++++++ arch/x86/platform/uv/uv_irq.c | 285 +++++++ arch/x86/platform/uv/uv_sysfs.c | 76 ++ arch/x86/platform/uv/uv_time.c | 423 ++++++++++ 13 files changed, 2662 insertions(+), 2661 deletions(-) delete mode 100644 arch/x86/kernel/bios_uv.c delete mode 100644 arch/x86/kernel/tlb_uv.c delete mode 100644 arch/x86/kernel/uv_irq.c delete mode 100644 arch/x86/kernel/uv_sysfs.c delete mode 100644 arch/x86/kernel/uv_time.c create mode 100644 arch/x86/platform/uv/Makefile create mode 100644 arch/x86/platform/uv/bios_uv.c create mode 100644 arch/x86/platform/uv/tlb_uv.c create mode 100644 arch/x86/platform/uv/uv_irq.c create mode 100644 arch/x86/platform/uv/uv_sysfs.c create mode 100644 arch/x86/platform/uv/uv_time.c (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4e1f862dd68..08e2e4bf839 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -117,7 +117,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c deleted file mode 100644 index 8bc57baaa9a..00000000000 --- a/arch/x86/kernel/bios_uv.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * BIOS run time interface routines. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson - */ - -#include -#include -#include -#include -#include - -static struct uv_systab uv_systab; - -s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) -{ - struct uv_systab *tab = &uv_systab; - s64 ret; - - if (!tab->function) - /* - * BIOS does not support UV systab - */ - return BIOS_STATUS_UNIMPLEMENTED; - - ret = efi_call6((void *)__va(tab->function), (u64)which, - a1, a2, a3, a4, a5); - return ret; -} -EXPORT_SYMBOL_GPL(uv_bios_call); - -s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, - u64 a4, u64 a5) -{ - unsigned long bios_flags; - s64 ret; - - local_irq_save(bios_flags); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); - local_irq_restore(bios_flags); - - return ret; -} - -s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, - u64 a4, u64 a5) -{ - s64 ret; - - preempt_disable(); - ret = uv_bios_call(which, a1, a2, a3, a4, a5); - preempt_enable(); - - return ret; -} - - -long sn_partition_id; -EXPORT_SYMBOL_GPL(sn_partition_id); -long sn_coherency_id; -EXPORT_SYMBOL_GPL(sn_coherency_id); -long sn_region_size; -EXPORT_SYMBOL_GPL(sn_region_size); -long system_serial_number; -EXPORT_SYMBOL_GPL(system_serial_number); -int uv_type; -EXPORT_SYMBOL_GPL(uv_type); - - -s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, - long *region, long *ssn) -{ - s64 ret; - u64 v0, v1; - union partition_info_u part; - - ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, - (u64)(&v0), (u64)(&v1), 0, 0); - if (ret != BIOS_STATUS_SUCCESS) - return ret; - - part.val = v0; - if (uvtype) - *uvtype = part.hub_version; - if (partid) - *partid = part.partition_id; - if (coher) - *coher = part.coherence_id; - if (region) - *region = part.region_size; - if (ssn) - *ssn = v1; - return ret; -} -EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); - -int -uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, - unsigned long *intr_mmr_offset) -{ - u64 watchlist; - s64 ret; - - /* - * bios returns watchlist number or negative error number. - */ - ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, - mq_size, (u64)intr_mmr_offset, - (u64)&watchlist, 0); - if (ret < BIOS_STATUS_SUCCESS) - return ret; - - return watchlist; -} -EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); - -int -uv_bios_mq_watchlist_free(int blade, int watchlist_num) -{ - return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, - blade, watchlist_num, 0, 0, 0); -} -EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); - -s64 -uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) -{ - return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, - perms, 0, 0); -} -EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); - -s64 -uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) -{ - s64 ret; - - ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, - (u64)addr, buf, (u64)len, 0); - return ret; -} -EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); - -s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) -{ - return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, - (u64)ticks_per_second, 0, 0, 0); -} -EXPORT_SYMBOL_GPL(uv_bios_freq_base); - -/* - * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target - * @decode: true to enable target, false to disable target - * @domain: PCI domain number - * @bus: PCI bus number - * - * Returns: - * 0: Success - * -EINVAL: Invalid domain or bus number - * -ENOSYS: Capability not available - * -EBUSY: Legacy VGA I/O cannot be retargeted at this time - */ -int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) -{ - return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, - (u64)decode, (u64)domain, (u64)bus, 0, 0); -} -EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); - - -#ifdef CONFIG_EFI -void uv_bios_init(void) -{ - struct uv_systab *tab; - - if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || - (efi.uv_systab == (unsigned long)NULL)) { - printk(KERN_CRIT "No EFI UV System Table.\n"); - uv_systab.function = (unsigned long)NULL; - return; - } - - tab = (struct uv_systab *)ioremap(efi.uv_systab, - sizeof(struct uv_systab)); - if (strncmp(tab->signature, "UVST", 4) != 0) - printk(KERN_ERR "bad signature in UV system table!"); - - /* - * Copy table to permanent spot for later use. - */ - memcpy(&uv_systab, tab, sizeof(struct uv_systab)); - iounmap(tab); - - printk(KERN_INFO "EFI UV System Table Revision %d\n", - uv_systab.revision); -} -#else /* !CONFIG_EFI */ - -void uv_bios_init(void) { } -#endif diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c deleted file mode 100644 index 20ea20a39e2..00000000000 --- a/arch/x86/kernel/tlb_uv.c +++ /dev/null @@ -1,1661 +0,0 @@ -/* - * SGI UltraViolet TLB flush routines. - * - * (c) 2008-2010 Cliff Wickman , SGI. - * - * This code is released under the GNU General Public License version 2 or - * later. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ -static int timeout_base_ns[] = { - 20, - 160, - 1280, - 10240, - 81920, - 655360, - 5242880, - 167772160 -}; -static int timeout_us; -static int nobau; -static int baudisabled; -static spinlock_t disable_lock; -static cycles_t congested_cycles; - -/* tunables: */ -static int max_bau_concurrent = MAX_BAU_CONCURRENT; -static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; -static int plugged_delay = PLUGGED_DELAY; -static int plugsb4reset = PLUGSB4RESET; -static int timeoutsb4reset = TIMEOUTSB4RESET; -static int ipi_reset_limit = IPI_RESET_LIMIT; -static int complete_threshold = COMPLETE_THRESHOLD; -static int congested_response_us = CONGESTED_RESPONSE_US; -static int congested_reps = CONGESTED_REPS; -static int congested_period = CONGESTED_PERIOD; -static struct dentry *tunables_dir; -static struct dentry *tunables_file; - -static int __init setup_nobau(char *arg) -{ - nobau = 1; - return 0; -} -early_param("nobau", setup_nobau); - -/* base pnode in this partition */ -static int uv_partition_base_pnode __read_mostly; -/* position of pnode (which is nasid>>1): */ -static int uv_nshift __read_mostly; -static unsigned long uv_mmask __read_mostly; - -static DEFINE_PER_CPU(struct ptc_stats, ptcstats); -static DEFINE_PER_CPU(struct bau_control, bau_control); -static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); - -/* - * Determine the first node on a uvhub. 'Nodes' are used for kernel - * memory allocation. - */ -static int __init uvhub_to_first_node(int uvhub) -{ - int node, b; - - for_each_online_node(node) { - b = uv_node_to_blade_id(node); - if (uvhub == b) - return node; - } - return -1; -} - -/* - * Determine the apicid of the first cpu on a uvhub. - */ -static int __init uvhub_to_first_apicid(int uvhub) -{ - int cpu; - - for_each_present_cpu(cpu) - if (uvhub == uv_cpu_to_blade_id(cpu)) - return per_cpu(x86_cpu_to_apicid, cpu); - return -1; -} - -/* - * Free a software acknowledge hardware resource by clearing its Pending - * bit. This will return a reply to the sender. - * If the message has timed out, a reply has already been sent by the - * hardware but the resource has not been released. In that case our - * clear of the Timeout bit (as well) will free the resource. No reply will - * be sent (the hardware will only do one reply per message). - */ -static inline void uv_reply_to_message(struct msg_desc *mdp, - struct bau_control *bcp) -{ - unsigned long dw; - struct bau_payload_queue_entry *msg; - - msg = mdp->msg; - if (!msg->canceled) { - dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | - msg->sw_ack_vector; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); - } - msg->replied_to = 1; - msg->sw_ack_vector = 0; -} - -/* - * Process the receipt of a RETRY message - */ -static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, - struct bau_control *bcp) -{ - int i; - int cancel_count = 0; - int slot2; - unsigned long msg_res; - unsigned long mmr = 0; - struct bau_payload_queue_entry *msg; - struct bau_payload_queue_entry *msg2; - struct ptc_stats *stat; - - msg = mdp->msg; - stat = bcp->statp; - stat->d_retries++; - /* - * cancel any message from msg+1 to the retry itself - */ - for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { - if (msg2 > mdp->va_queue_last) - msg2 = mdp->va_queue_first; - if (msg2 == msg) - break; - - /* same conditions for cancellation as uv_do_reset */ - if ((msg2->replied_to == 0) && (msg2->canceled == 0) && - (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & - msg->sw_ack_vector) == 0) && - (msg2->sending_cpu == msg->sending_cpu) && - (msg2->msg_type != MSG_NOOP)) { - slot2 = msg2 - mdp->va_queue_first; - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg2->sw_ack_vector; - /* - * This is a message retry; clear the resources held - * by the previous message only if they timed out. - * If it has not timed out we have an unexpected - * situation to report. - */ - if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { - /* - * is the resource timed out? - * make everyone ignore the cancelled message. - */ - msg2->canceled = 1; - stat->d_canceled++; - cancel_count++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); - } - } - } - if (!cancel_count) - stat->d_nocanceled++; -} - -/* - * Do all the things a cpu should do for a TLB shootdown message. - * Other cpu's may come here at the same time for this message. - */ -static void uv_bau_process_message(struct msg_desc *mdp, - struct bau_control *bcp) -{ - int msg_ack_count; - short socket_ack_count = 0; - struct ptc_stats *stat; - struct bau_payload_queue_entry *msg; - struct bau_control *smaster = bcp->socket_master; - - /* - * This must be a normal message, or retry of a normal message - */ - msg = mdp->msg; - stat = bcp->statp; - if (msg->address == TLB_FLUSH_ALL) { - local_flush_tlb(); - stat->d_alltlb++; - } else { - __flush_tlb_one(msg->address); - stat->d_onetlb++; - } - stat->d_requestee++; - - /* - * One cpu on each uvhub has the additional job on a RETRY - * of releasing the resource held by the message that is - * being retried. That message is identified by sending - * cpu number. - */ - if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) - uv_bau_process_retry_msg(mdp, bcp); - - /* - * This is a sw_ack message, so we have to reply to it. - * Count each responding cpu on the socket. This avoids - * pinging the count's cache line back and forth between - * the sockets. - */ - socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) - &smaster->socket_acknowledge_count[mdp->msg_slot]); - if (socket_ack_count == bcp->cpus_in_socket) { - /* - * Both sockets dump their completed count total into - * the message's count. - */ - smaster->socket_acknowledge_count[mdp->msg_slot] = 0; - msg_ack_count = atomic_add_short_return(socket_ack_count, - (struct atomic_short *)&msg->acknowledge_count); - - if (msg_ack_count == bcp->cpus_in_uvhub) { - /* - * All cpus in uvhub saw it; reply - */ - uv_reply_to_message(mdp, bcp); - } - } - - return; -} - -/* - * Determine the first cpu on a uvhub. - */ -static int uvhub_to_first_cpu(int uvhub) -{ - int cpu; - for_each_present_cpu(cpu) - if (uvhub == uv_cpu_to_blade_id(cpu)) - return cpu; - return -1; -} - -/* - * Last resort when we get a large number of destination timeouts is - * to clear resources held by a given cpu. - * Do this with IPI so that all messages in the BAU message queue - * can be identified by their nonzero sw_ack_vector field. - * - * This is entered for a single cpu on the uvhub. - * The sender want's this uvhub to free a specific message's - * sw_ack resources. - */ -static void -uv_do_reset(void *ptr) -{ - int i; - int slot; - int count = 0; - unsigned long mmr; - unsigned long msg_res; - struct bau_control *bcp; - struct reset_args *rap; - struct bau_payload_queue_entry *msg; - struct ptc_stats *stat; - - bcp = &per_cpu(bau_control, smp_processor_id()); - rap = (struct reset_args *)ptr; - stat = bcp->statp; - stat->d_resets++; - - /* - * We're looking for the given sender, and - * will free its sw_ack resource. - * If all cpu's finally responded after the timeout, its - * message 'replied_to' was set. - */ - for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { - /* uv_do_reset: same conditions for cancellation as - uv_bau_process_retry_msg() */ - if ((msg->replied_to == 0) && - (msg->canceled == 0) && - (msg->sending_cpu == rap->sender) && - (msg->sw_ack_vector) && - (msg->msg_type != MSG_NOOP)) { - /* - * make everyone else ignore this message - */ - msg->canceled = 1; - slot = msg - bcp->va_queue_first; - count++; - /* - * only reset the resource if it is still pending - */ - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg->sw_ack_vector; - if (mmr & msg_res) { - stat->d_rcanceled++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); - } - } - } - return; -} - -/* - * Use IPI to get all target uvhubs to release resources held by - * a given sending cpu number. - */ -static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, - int sender) -{ - int uvhub; - int cpu; - cpumask_t mask; - struct reset_args reset_args; - - reset_args.sender = sender; - - cpus_clear(mask); - /* find a single cpu for each uvhub in this distribution mask */ - for (uvhub = 0; - uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; - uvhub++) { - if (!bau_uvhub_isset(uvhub, distribution)) - continue; - /* find a cpu for this uvhub */ - cpu = uvhub_to_first_cpu(uvhub); - cpu_set(cpu, mask); - } - /* IPI all cpus; Preemption is already disabled */ - smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); - return; -} - -static inline unsigned long -cycles_2_us(unsigned long long cyc) -{ - unsigned long long ns; - unsigned long us; - ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) - >> CYC2NS_SCALE_FACTOR; - us = ns / 1000; - return us; -} - -/* - * wait for all cpus on this hub to finish their sends and go quiet - * leaves uvhub_quiesce set so that no new broadcasts are started by - * bau_flush_send_and_wait() - */ -static inline void -quiesce_local_uvhub(struct bau_control *hmaster) -{ - atomic_add_short_return(1, (struct atomic_short *) - &hmaster->uvhub_quiesce); -} - -/* - * mark this quiet-requestor as done - */ -static inline void -end_uvhub_quiesce(struct bau_control *hmaster) -{ - atomic_add_short_return(-1, (struct atomic_short *) - &hmaster->uvhub_quiesce); -} - -/* - * Wait for completion of a broadcast software ack message - * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP - */ -static int uv_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, int this_cpu, - struct bau_control *bcp, struct bau_control *smaster, long try) -{ - unsigned long descriptor_status; - cycles_t ttime; - struct ptc_stats *stat = bcp->statp; - struct bau_control *hmaster; - - hmaster = bcp->uvhub_master; - - /* spin on the status MMR, waiting for it to go idle */ - while ((descriptor_status = (((unsigned long) - uv_read_local_mmr(mmr_offset) >> - right_shift) & UV_ACT_STATUS_MASK)) != - DESC_STATUS_IDLE) { - /* - * Our software ack messages may be blocked because there are - * no swack resources available. As long as none of them - * has timed out hardware will NACK our message and its - * state will stay IDLE. - */ - if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { - stat->s_stimeout++; - return FLUSH_GIVEUP; - } else if (descriptor_status == - DESC_STATUS_DESTINATION_TIMEOUT) { - stat->s_dtimeout++; - ttime = get_cycles(); - - /* - * Our retries may be blocked by all destination - * swack resources being consumed, and a timeout - * pending. In that case hardware returns the - * ERROR that looks like a destination timeout. - */ - if (cycles_2_us(ttime - bcp->send_message) < - timeout_us) { - bcp->conseccompletes = 0; - return FLUSH_RETRY_PLUGGED; - } - - bcp->conseccompletes = 0; - return FLUSH_RETRY_TIMEOUT; - } else { - /* - * descriptor_status is still BUSY - */ - cpu_relax(); - } - } - bcp->conseccompletes++; - return FLUSH_COMPLETE; -} - -static inline cycles_t -sec_2_cycles(unsigned long sec) -{ - unsigned long ns; - cycles_t cyc; - - ns = sec * 1000000000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - -/* - * conditionally add 1 to *v, unless *v is >= u - * return 0 if we cannot add 1 to *v because it is >= u - * return 1 if we can add 1 to *v because it is < u - * the add is atomic - * - * This is close to atomic_add_unless(), but this allows the 'u' value - * to be lowered below the current 'v'. atomic_add_unless can only stop - * on equal. - */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) -{ - spin_lock(lock); - if (atomic_read(v) >= u) { - spin_unlock(lock); - return 0; - } - atomic_inc(v); - spin_unlock(lock); - return 1; -} - -/* - * Our retries are blocked by all destination swack resources being - * in use, and a timeout is pending. In that case hardware immediately - * returns the ERROR that looks like a destination timeout. - */ -static void -destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, - struct bau_control *hmaster, struct ptc_stats *stat) -{ - udelay(bcp->plugged_delay); - bcp->plugged_tries++; - if (bcp->plugged_tries >= bcp->plugsb4reset) { - bcp->plugged_tries = 0; - quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); - spin_unlock(&hmaster->queue_lock); - end_uvhub_quiesce(hmaster); - bcp->ipi_attempts++; - stat->s_resets_plug++; - } -} - -static void -destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, - struct bau_control *hmaster, struct ptc_stats *stat) -{ - hmaster->max_bau_concurrent = 1; - bcp->timeout_tries++; - if (bcp->timeout_tries >= bcp->timeoutsb4reset) { - bcp->timeout_tries = 0; - quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); - spin_unlock(&hmaster->queue_lock); - end_uvhub_quiesce(hmaster); - bcp->ipi_attempts++; - stat->s_resets_timeout++; - } -} - -/* - * Completions are taking a very long time due to a congested numalink - * network. - */ -static void -disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) -{ - int tcpu; - struct bau_control *tbcp; - - /* let only one cpu do this disabling */ - spin_lock(&disable_lock); - if (!baudisabled && bcp->period_requests && - ((bcp->period_time / bcp->period_requests) > congested_cycles)) { - /* it becomes this cpu's job to turn on the use of the - BAU again */ - baudisabled = 1; - bcp->set_bau_off = 1; - bcp->set_bau_on_time = get_cycles() + - sec_2_cycles(bcp->congested_period); - stat->s_bau_disabled++; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 1; - } - } - spin_unlock(&disable_lock); -} - -/** - * uv_flush_send_and_wait - * - * Send a broadcast and wait for it to complete. - * - * The flush_mask contains the cpus the broadcast is to be sent to including - * cpus that are on the local uvhub. - * - * Returns 0 if all flushing represented in the mask was done. - * Returns 1 if it gives up entirely and the original cpu mask is to be - * returned to the kernel. - */ -int uv_flush_send_and_wait(struct bau_desc *bau_desc, - struct cpumask *flush_mask, struct bau_control *bcp) -{ - int right_shift; - int completion_status = 0; - int seq_number = 0; - long try = 0; - int cpu = bcp->uvhub_cpu; - int this_cpu = bcp->cpu; - unsigned long mmr_offset; - unsigned long index; - cycles_t time1; - cycles_t time2; - cycles_t elapsed; - struct ptc_stats *stat = bcp->statp; - struct bau_control *smaster = bcp->socket_master; - struct bau_control *hmaster = bcp->uvhub_master; - - if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)) { - stat->s_throttles++; - do { - cpu_relax(); - } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)); - } - while (hmaster->uvhub_quiesce) - cpu_relax(); - - if (cpu < UV_CPUS_PER_ACT_STATUS) { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - } else { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - right_shift = - ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); - } - time1 = get_cycles(); - do { - if (try == 0) { - bau_desc->header.msg_type = MSG_REGULAR; - seq_number = bcp->message_number++; - } else { - bau_desc->header.msg_type = MSG_RETRY; - stat->s_retry_messages++; - } - bau_desc->header.sequence = seq_number; - index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | - bcp->uvhub_cpu; - bcp->send_message = get_cycles(); - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); - try++; - completion_status = uv_wait_completion(bau_desc, mmr_offset, - right_shift, this_cpu, bcp, smaster, try); - - if (completion_status == FLUSH_RETRY_PLUGGED) { - destination_plugged(bau_desc, bcp, hmaster, stat); - } else if (completion_status == FLUSH_RETRY_TIMEOUT) { - destination_timeout(bau_desc, bcp, hmaster, stat); - } - if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { - bcp->ipi_attempts = 0; - completion_status = FLUSH_GIVEUP; - break; - } - cpu_relax(); - } while ((completion_status == FLUSH_RETRY_PLUGGED) || - (completion_status == FLUSH_RETRY_TIMEOUT)); - time2 = get_cycles(); - bcp->plugged_tries = 0; - bcp->timeout_tries = 0; - if ((completion_status == FLUSH_COMPLETE) && - (bcp->conseccompletes > bcp->complete_threshold) && - (hmaster->max_bau_concurrent < - hmaster->max_bau_concurrent_constant)) - hmaster->max_bau_concurrent++; - while (hmaster->uvhub_quiesce) - cpu_relax(); - atomic_dec(&hmaster->active_descriptor_count); - if (time2 > time1) { - elapsed = time2 - time1; - stat->s_time += elapsed; - if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { - bcp->period_requests++; - bcp->period_time += elapsed; - if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->congested_reps)) { - disable_for_congestion(bcp, stat); - } - } - } else - stat->s_requestor--; - if (completion_status == FLUSH_COMPLETE && try > 1) - stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) { - stat->s_giveup++; - return 1; - } - return 0; -} - -/** - * uv_flush_tlb_others - globally purge translation cache of a virtual - * address or all TLB's - * @cpumask: mask of all cpu's in which the address is to be removed - * @mm: mm_struct containing virtual address range - * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) - * @cpu: the current cpu - * - * This is the entry point for initiating any UV global TLB shootdown. - * - * Purges the translation caches of all specified processors of the given - * virtual address, or purges all TLB's on specified processors. - * - * The caller has derived the cpumask from the mm_struct. This function - * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) - * - * The cpumask is converted into a uvhubmask of the uvhubs containing - * those cpus. - * - * Note that this function should be called with preemption disabled. - * - * Returns NULL if all remote flushing was done. - * Returns pointer to cpumask if some remote flushing remains to be - * done. The returned pointer is valid till preemption is re-enabled. - */ -const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long va, unsigned int cpu) -{ - int tcpu; - int uvhub; - int locals = 0; - int remotes = 0; - int hubs = 0; - struct bau_desc *bau_desc; - struct cpumask *flush_mask; - struct ptc_stats *stat; - struct bau_control *bcp; - struct bau_control *tbcp; - - /* kernel was booted 'nobau' */ - if (nobau) - return cpumask; - - bcp = &per_cpu(bau_control, cpu); - stat = bcp->statp; - - /* bau was disabled due to slow response */ - if (bcp->baudisabled) { - /* the cpu that disabled it must re-enable it */ - if (bcp->set_bau_off) { - if (get_cycles() >= bcp->set_bau_on_time) { - stat->s_bau_reenabled++; - baudisabled = 0; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 0; - tbcp->period_requests = 0; - tbcp->period_time = 0; - } - } - } - return cpumask; - } - - /* - * Each sending cpu has a per-cpu mask which it fills from the caller's - * cpu mask. All cpus are converted to uvhubs and copied to the - * activation descriptor. - */ - flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); - /* don't actually do a shootdown of the local cpu */ - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); - if (cpu_isset(cpu, *cpumask)) - stat->s_ntargself++; - - bau_desc = bcp->descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; - bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - - /* cpu statistics */ - for_each_cpu(tcpu, flush_mask) { - uvhub = uv_cpu_to_blade_id(tcpu); - bau_uvhub_set(uvhub, &bau_desc->distribution); - if (uvhub == bcp->uvhub) - locals++; - else - remotes++; - } - if ((locals + remotes) == 0) - return NULL; - stat->s_requestor++; - stat->s_ntargcpu += remotes + locals; - stat->s_ntargremotes += remotes; - stat->s_ntarglocals += locals; - remotes = bau_uvhub_weight(&bau_desc->distribution); - - /* uvhub statistics */ - hubs = bau_uvhub_weight(&bau_desc->distribution); - if (locals) { - stat->s_ntarglocaluvhub++; - stat->s_ntargremoteuvhub += (hubs - 1); - } else - stat->s_ntargremoteuvhub += hubs; - stat->s_ntarguvhub += hubs; - if (hubs >= 16) - stat->s_ntarguvhub16++; - else if (hubs >= 8) - stat->s_ntarguvhub8++; - else if (hubs >= 4) - stat->s_ntarguvhub4++; - else if (hubs >= 2) - stat->s_ntarguvhub2++; - else - stat->s_ntarguvhub1++; - - bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = cpu; - - /* - * uv_flush_send_and_wait returns 0 if all cpu's were messaged, - * or 1 if it gave up and the original cpumask should be returned. - */ - if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) - return NULL; - else - return cpumask; -} - -/* - * The BAU message interrupt comes here. (registered by set_intr_gate) - * See entry_64.S - * - * We received a broadcast assist message. - * - * Interrupts are disabled; this interrupt could represent - * the receipt of several messages. - * - * All cores/threads on this hub get this interrupt. - * The last one to see it does the software ack. - * (the resource will not be freed until noninterruptable cpus see this - * interrupt; hardware may timeout the s/w ack and reply ERROR) - */ -void uv_bau_message_interrupt(struct pt_regs *regs) -{ - int count = 0; - cycles_t time_start; - struct bau_payload_queue_entry *msg; - struct bau_control *bcp; - struct ptc_stats *stat; - struct msg_desc msgdesc; - - time_start = get_cycles(); - bcp = &per_cpu(bau_control, smp_processor_id()); - stat = bcp->statp; - msgdesc.va_queue_first = bcp->va_queue_first; - msgdesc.va_queue_last = bcp->va_queue_last; - msg = bcp->bau_msg_head; - while (msg->sw_ack_vector) { - count++; - msgdesc.msg_slot = msg - msgdesc.va_queue_first; - msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; - msgdesc.msg = msg; - uv_bau_process_message(&msgdesc, bcp); - msg++; - if (msg > msgdesc.va_queue_last) - msg = msgdesc.va_queue_first; - bcp->bau_msg_head = msg; - } - stat->d_time += (get_cycles() - time_start); - if (!count) - stat->d_nomsg++; - else if (count > 1) - stat->d_multmsg++; - ack_APIC_irq(); -} - -/* - * uv_enable_timeouts - * - * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have - * shootdown message timeouts enabled. The timeout does not cause - * an interrupt, but causes an error message to be returned to - * the sender. - */ -static void uv_enable_timeouts(void) -{ - int uvhub; - int nuvhubs; - int pnode; - unsigned long mmr_image; - - nuvhubs = uv_num_possible_blades(); - - for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - if (!uv_blade_nr_possible_cpus(uvhub)) - continue; - - pnode = uv_blade_to_pnode(uvhub); - mmr_image = - uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); - /* - * Set the timeout period and then lock it in, in three - * steps; captures and locks in the period. - * - * To program the period, the SOFT_ACK_MODE must be off. - */ - mmr_image &= ~((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); - /* - * Set the 4-bit period. - */ - mmr_image &= ~((unsigned long)0xf << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); - /* - * Subsequent reversals of the timebase bit (3) cause an - * immediate timeout of one or all INTD resources as - * indicated in bits 2:0 (7 causes all of them to timeout). - */ - mmr_image |= ((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); - } -} - -static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) -{ - if (*offset < num_possible_cpus()) - return offset; - return NULL; -} - -static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) -{ - (*offset)++; - if (*offset < num_possible_cpus()) - return offset; - return NULL; -} - -static void uv_ptc_seq_stop(struct seq_file *file, void *data) -{ -} - -static inline unsigned long long -microsec_2_cycles(unsigned long microsec) -{ - unsigned long ns; - unsigned long long cyc; - - ns = microsec * 1000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; -} - -/* - * Display the statistics thru /proc. - * 'data' points to the cpu number - */ -static int uv_ptc_seq_show(struct seq_file *file, void *data) -{ - struct ptc_stats *stat; - int cpu; - - cpu = *(loff_t *)data; - - if (!cpu) { - seq_printf(file, - "# cpu sent stime self locals remotes ncpus localhub "); - seq_printf(file, - "remotehub numuvhubs numuvhubs16 numuvhubs8 "); - seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto "); - seq_printf(file, - "retries rok resetp resett giveup sto bz throt "); - seq_printf(file, - "sw_ack recv rtime all "); - seq_printf(file, - "one mult none retry canc nocan reset rcan "); - seq_printf(file, - "disable enable\n"); - } - if (cpu < num_possible_cpus() && cpu_online(cpu)) { - stat = &per_cpu(ptcstats, cpu); - /* source side statistics */ - seq_printf(file, - "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - cpu, stat->s_requestor, cycles_2_us(stat->s_time), - stat->s_ntargself, stat->s_ntarglocals, - stat->s_ntargremotes, stat->s_ntargcpu, - stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, - stat->s_ntarguvhub, stat->s_ntarguvhub16); - seq_printf(file, "%ld %ld %ld %ld %ld ", - stat->s_ntarguvhub8, stat->s_ntarguvhub4, - stat->s_ntarguvhub2, stat->s_ntarguvhub1, - stat->s_dtimeout); - seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", - stat->s_retry_messages, stat->s_retriesok, - stat->s_resets_plug, stat->s_resets_timeout, - stat->s_giveup, stat->s_stimeout, - stat->s_busy, stat->s_throttles); - - /* destination side statistics */ - seq_printf(file, - "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - uv_read_global_mmr64(uv_cpu_to_pnode(cpu), - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), - stat->d_requestee, cycles_2_us(stat->d_time), - stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, - stat->d_nomsg, stat->d_retries, stat->d_canceled, - stat->d_nocanceled, stat->d_resets, - stat->d_rcanceled); - seq_printf(file, "%ld %ld\n", - stat->s_bau_disabled, stat->s_bau_reenabled); - } - - return 0; -} - -/* - * Display the tunables thru debugfs - */ -static ssize_t tunables_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - char *buf; - int ret; - - buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", - "max_bau_concurrent plugged_delay plugsb4reset", - "timeoutsb4reset ipi_reset_limit complete_threshold", - "congested_response_us congested_reps congested_period", - max_bau_concurrent, plugged_delay, plugsb4reset, - timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_response_us, congested_reps, congested_period); - - if (!buf) - return -ENOMEM; - - ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); - kfree(buf); - return ret; -} - -/* - * -1: resetf the statistics - * 0: display meaning of the statistics - */ -static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, - size_t count, loff_t *data) -{ - int cpu; - long input_arg; - char optstr[64]; - struct ptc_stats *stat; - - if (count == 0 || count > sizeof(optstr)) - return -EINVAL; - if (copy_from_user(optstr, user, count)) - return -EFAULT; - optstr[count - 1] = '\0'; - if (strict_strtol(optstr, 10, &input_arg) < 0) { - printk(KERN_DEBUG "%s is invalid\n", optstr); - return -EINVAL; - } - - if (input_arg == 0) { - printk(KERN_DEBUG "# cpu: cpu number\n"); - printk(KERN_DEBUG "Sender statistics:\n"); - printk(KERN_DEBUG - "sent: number of shootdown messages sent\n"); - printk(KERN_DEBUG - "stime: time spent sending messages\n"); - printk(KERN_DEBUG - "numuvhubs: number of hubs targeted with shootdown\n"); - printk(KERN_DEBUG - "numuvhubs16: number times 16 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs8: number times 8 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs4: number times 4 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs2: number times 2 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs1: number times 1 hub targeted\n"); - printk(KERN_DEBUG - "numcpus: number of cpus targeted with shootdown\n"); - printk(KERN_DEBUG - "dto: number of destination timeouts\n"); - printk(KERN_DEBUG - "retries: destination timeout retries sent\n"); - printk(KERN_DEBUG - "rok: : destination timeouts successfully retried\n"); - printk(KERN_DEBUG - "resetp: ipi-style resource resets for plugs\n"); - printk(KERN_DEBUG - "resett: ipi-style resource resets for timeouts\n"); - printk(KERN_DEBUG - "giveup: fall-backs to ipi-style shootdowns\n"); - printk(KERN_DEBUG - "sto: number of source timeouts\n"); - printk(KERN_DEBUG - "bz: number of stay-busy's\n"); - printk(KERN_DEBUG - "throt: number times spun in throttle\n"); - printk(KERN_DEBUG "Destination side statistics:\n"); - printk(KERN_DEBUG - "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); - printk(KERN_DEBUG - "recv: shootdown messages received\n"); - printk(KERN_DEBUG - "rtime: time spent processing messages\n"); - printk(KERN_DEBUG - "all: shootdown all-tlb messages\n"); - printk(KERN_DEBUG - "one: shootdown one-tlb messages\n"); - printk(KERN_DEBUG - "mult: interrupts that found multiple messages\n"); - printk(KERN_DEBUG - "none: interrupts that found no messages\n"); - printk(KERN_DEBUG - "retry: number of retry messages processed\n"); - printk(KERN_DEBUG - "canc: number messages canceled by retries\n"); - printk(KERN_DEBUG - "nocan: number retries that found nothing to cancel\n"); - printk(KERN_DEBUG - "reset: number of ipi-style reset requests processed\n"); - printk(KERN_DEBUG - "rcan: number messages canceled by reset requests\n"); - printk(KERN_DEBUG - "disable: number times use of the BAU was disabled\n"); - printk(KERN_DEBUG - "enable: number times use of the BAU was re-enabled\n"); - } else if (input_arg == -1) { - for_each_present_cpu(cpu) { - stat = &per_cpu(ptcstats, cpu); - memset(stat, 0, sizeof(struct ptc_stats)); - } - } - - return count; -} - -static int local_atoi(const char *name) -{ - int val = 0; - - for (;; name++) { - switch (*name) { - case '0' ... '9': - val = 10*val+(*name-'0'); - break; - default: - return val; - } - } -} - -/* - * set the tunables - * 0 values reset them to defaults - */ -static ssize_t tunables_write(struct file *file, const char __user *user, - size_t count, loff_t *data) -{ - int cpu; - int cnt = 0; - int val; - char *p; - char *q; - char instr[64]; - struct bau_control *bcp; - - if (count == 0 || count > sizeof(instr)-1) - return -EINVAL; - if (copy_from_user(instr, user, count)) - return -EFAULT; - - instr[count] = '\0'; - /* count the fields */ - p = instr + strspn(instr, WHITESPACE); - q = p; - for (; *p; p = q + strspn(q, WHITESPACE)) { - q = p + strcspn(p, WHITESPACE); - cnt++; - if (q == p) - break; - } - if (cnt != 9) { - printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); - return -EINVAL; - } - - p = instr + strspn(instr, WHITESPACE); - q = p; - for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { - q = p + strcspn(p, WHITESPACE); - val = local_atoi(p); - switch (cnt) { - case 0: - if (val == 0) { - max_bau_concurrent = MAX_BAU_CONCURRENT; - max_bau_concurrent_constant = - MAX_BAU_CONCURRENT; - continue; - } - bcp = &per_cpu(bau_control, smp_processor_id()); - if (val < 1 || val > bcp->cpus_in_uvhub) { - printk(KERN_DEBUG - "Error: BAU max concurrent %d is invalid\n", - val); - return -EINVAL; - } - max_bau_concurrent = val; - max_bau_concurrent_constant = val; - continue; - case 1: - if (val == 0) - plugged_delay = PLUGGED_DELAY; - else - plugged_delay = val; - continue; - case 2: - if (val == 0) - plugsb4reset = PLUGSB4RESET; - else - plugsb4reset = val; - continue; - case 3: - if (val == 0) - timeoutsb4reset = TIMEOUTSB4RESET; - else - timeoutsb4reset = val; - continue; - case 4: - if (val == 0) - ipi_reset_limit = IPI_RESET_LIMIT; - else - ipi_reset_limit = val; - continue; - case 5: - if (val == 0) - complete_threshold = COMPLETE_THRESHOLD; - else - complete_threshold = val; - continue; - case 6: - if (val == 0) - congested_response_us = CONGESTED_RESPONSE_US; - else - congested_response_us = val; - continue; - case 7: - if (val == 0) - congested_reps = CONGESTED_REPS; - else - congested_reps = val; - continue; - case 8: - if (val == 0) - congested_period = CONGESTED_PERIOD; - else - congested_period = val; - continue; - } - if (q == p) - break; - } - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; - } - return count; -} - -static const struct seq_operations uv_ptc_seq_ops = { - .start = uv_ptc_seq_start, - .next = uv_ptc_seq_next, - .stop = uv_ptc_seq_stop, - .show = uv_ptc_seq_show -}; - -static int uv_ptc_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &uv_ptc_seq_ops); -} - -static int tunables_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static const struct file_operations proc_uv_ptc_operations = { - .open = uv_ptc_proc_open, - .read = seq_read, - .write = uv_ptc_proc_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations tunables_fops = { - .open = tunables_open, - .read = tunables_read, - .write = tunables_write, - .llseek = default_llseek, -}; - -static int __init uv_ptc_init(void) -{ - struct proc_dir_entry *proc_uv_ptc; - - if (!is_uv_system()) - return 0; - - proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, - &proc_uv_ptc_operations); - if (!proc_uv_ptc) { - printk(KERN_ERR "unable to create %s proc entry\n", - UV_PTC_BASENAME); - return -EINVAL; - } - - tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); - if (!tunables_dir) { - printk(KERN_ERR "unable to create debugfs directory %s\n", - UV_BAU_TUNABLES_DIR); - return -EINVAL; - } - tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, - tunables_dir, NULL, &tunables_fops); - if (!tunables_file) { - printk(KERN_ERR "unable to create debugfs file %s\n", - UV_BAU_TUNABLES_FILE); - return -EINVAL; - } - return 0; -} - -/* - * initialize the sending side's sending buffers - */ -static void -uv_activation_descriptor_init(int node, int pnode) -{ - int i; - int cpu; - unsigned long pa; - unsigned long m; - unsigned long n; - struct bau_desc *bau_desc; - struct bau_desc *bd2; - struct bau_control *bcp; - - /* - * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) - * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub - */ - bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* - UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); - BUG_ON(!bau_desc); - - pa = uv_gpa(bau_desc); /* need the real nasid*/ - n = pa >> uv_nshift; - m = pa & uv_mmask; - - uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | m)); - - /* - * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each - * cpu even though we only use the first one; one descriptor can - * describe a broadcast to 256 uv hubs. - */ - for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); - i++, bd2++) { - memset(bd2, 0, sizeof(struct bau_desc)); - bd2->header.sw_ack_flag = 1; - /* - * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub - * in the partition. The bit map will indicate uvhub numbers, - * which are 0-N in a partition. Pnodes are unique system-wide. - */ - bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; - bd2->header.dest_subnodeid = 0x10; /* the LB */ - bd2->header.command = UV_NET_ENDPOINT_INTD; - bd2->header.int_both = 1; - /* - * all others need to be set to zero: - * fairness chaining multilevel count replied_to - */ - } - for_each_present_cpu(cpu) { - if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) - continue; - bcp = &per_cpu(bau_control, cpu); - bcp->descriptor_base = bau_desc; - } -} - -/* - * initialize the destination side's receiving buffers - * entered for each uvhub in the partition - * - node is first node (kernel memory notion) on the uvhub - * - pnode is the uvhub's physical identifier - */ -static void -uv_payload_queue_init(int node, int pnode) -{ - int pn; - int cpu; - char *cp; - unsigned long pa; - struct bau_payload_queue_entry *pqp; - struct bau_payload_queue_entry *pqp_malloc; - struct bau_control *bcp; - - pqp = (struct bau_payload_queue_entry *) kmalloc_node( - (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), - GFP_KERNEL, node); - BUG_ON(!pqp); - pqp_malloc = pqp; - - cp = (char *)pqp + 31; - pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); - - for_each_present_cpu(cpu) { - if (pnode != uv_cpu_to_pnode(cpu)) - continue; - /* for every cpu on this pnode: */ - bcp = &per_cpu(bau_control, cpu); - bcp->va_queue_first = pqp; - bcp->bau_msg_head = pqp; - bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); - } - /* - * need the pnode of where the memory was really allocated - */ - pa = uv_gpa(pqp); - pn = pa >> uv_nshift; - uv_write_global_mmr64(pnode, - UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, - ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, - (unsigned long) - uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); - /* in effect, all msg_type's are set to MSG_NOOP */ - memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); -} - -/* - * Initialization of each UV hub's structures - */ -static void __init uv_init_uvhub(int uvhub, int vector) -{ - int node; - int pnode; - unsigned long apicid; - - node = uvhub_to_first_node(uvhub); - pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode); - uv_payload_queue_init(node, pnode); - /* - * the below initialization can't be in firmware because the - * messaging IRQ will be determined by the OS - */ - apicid = uvhub_to_first_apicid(uvhub); - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, - ((apicid << 32) | vector)); -} - -/* - * We will set BAU_MISC_CONTROL with a timeout period. - * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. - * So the destination timeout period has be be calculated from them. - */ -static int -calculate_destination_timeout(void) -{ - unsigned long mmr_image; - int mult1; - int mult2; - int index; - int base; - int ret; - unsigned long ts_ns; - - mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; - mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); - index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; - mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); - mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - base = timeout_base_ns[index]; - ts_ns = base * mult1 * mult2; - ret = ts_ns / 1000; - return ret; -} - -/* - * initialize the bau_control structure for each cpu - */ -static void __init uv_init_per_cpu(int nuvhubs) -{ - int i; - int cpu; - int pnode; - int uvhub; - int have_hmaster; - short socket = 0; - unsigned short socket_mask; - unsigned char *uvhub_mask; - struct bau_control *bcp; - struct uvhub_desc *bdp; - struct socket_desc *sdp; - struct bau_control *hmaster = NULL; - struct bau_control *smaster = NULL; - struct socket_desc { - short num_cpus; - short cpu_number[16]; - }; - struct uvhub_desc { - unsigned short socket_mask; - short num_cpus; - short uvhub; - short pnode; - struct socket_desc socket[2]; - }; - struct uvhub_desc *uvhub_descs; - - timeout_us = calculate_destination_timeout(); - - uvhub_descs = (struct uvhub_desc *) - kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); - memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); - uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - memset(bcp, 0, sizeof(struct bau_control)); - pnode = uv_cpu_hub_info(cpu)->pnode; - uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; - *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); - bdp = &uvhub_descs[uvhub]; - bdp->num_cpus++; - bdp->uvhub = uvhub; - bdp->pnode = pnode; - /* kludge: 'assuming' one node per socket, and assuming that - disabling a socket just leaves a gap in node numbers */ - socket = (cpu_to_node(cpu) & 1); - bdp->socket_mask |= (1 << socket); - sdp = &bdp->socket[socket]; - sdp->cpu_number[sdp->num_cpus] = cpu; - sdp->num_cpus++; - } - for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) - continue; - have_hmaster = 0; - bdp = &uvhub_descs[uvhub]; - socket_mask = bdp->socket_mask; - socket = 0; - while (socket_mask) { - if (!(socket_mask & 1)) - goto nextsocket; - sdp = &bdp->socket[socket]; - for (i = 0; i < sdp->num_cpus; i++) { - cpu = sdp->cpu_number[i]; - bcp = &per_cpu(bau_control, cpu); - bcp->cpu = cpu; - if (i == 0) { - smaster = bcp; - if (!have_hmaster) { - have_hmaster++; - hmaster = bcp; - } - } - bcp->cpus_in_uvhub = bdp->num_cpus; - bcp->cpus_in_socket = sdp->num_cpus; - bcp->socket_master = smaster; - bcp->uvhub = bdp->uvhub; - bcp->uvhub_master = hmaster; - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> - blade_processor_id; - } -nextsocket: - socket++; - socket_mask = (socket_mask >> 1); - } - } - kfree(uvhub_descs); - kfree(uvhub_mask); - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->baudisabled = 0; - bcp->statp = &per_cpu(ptcstats, cpu); - /* time interval to catch a hardware stay-busy bug */ - bcp->timeout_interval = microsec_2_cycles(2*timeout_us); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; - } -} - -/* - * Initialization of BAU-related structures - */ -static int __init uv_bau_init(void) -{ - int uvhub; - int pnode; - int nuvhubs; - int cur_cpu; - int vector; - unsigned long mmr; - - if (!is_uv_system()) - return 0; - - if (nobau) - return 0; - - for_each_possible_cpu(cur_cpu) - zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), - GFP_KERNEL, cpu_to_node(cur_cpu)); - - uv_nshift = uv_hub_info->m_val; - uv_mmask = (1UL << uv_hub_info->m_val) - 1; - nuvhubs = uv_num_possible_blades(); - spin_lock_init(&disable_lock); - congested_cycles = microsec_2_cycles(congested_response_us); - - uv_init_per_cpu(nuvhubs); - - uv_partition_base_pnode = 0x7fffffff; - for (uvhub = 0; uvhub < nuvhubs; uvhub++) - if (uv_blade_nr_possible_cpus(uvhub) && - (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) - uv_partition_base_pnode = uv_blade_to_pnode(uvhub); - - vector = UV_BAU_MESSAGE; - for_each_possible_blade(uvhub) - if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector); - - uv_enable_timeouts(); - alloc_intr_gate(vector, uv_bau_message_intr1); - - for_each_possible_blade(uvhub) { - if (uv_blade_nr_possible_cpus(uvhub)) { - pnode = uv_blade_to_pnode(uvhub); - /* INIT the bau */ - uv_write_global_mmr64(pnode, - UVH_LB_BAU_SB_ACTIVATION_CONTROL, - ((unsigned long)1 << 63)); - mmr = 1; /* should be 1 to broadcast to both sockets */ - uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, - mmr); - } - } - - return 0; -} -core_initcall(uv_bau_init); -fs_initcall(uv_ptc_init); diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c deleted file mode 100644 index 7b24460917d..00000000000 --- a/arch/x86/kernel/uv_irq.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV IRQ functions - * - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. - */ - -#include -#include -#include -#include - -#include -#include -#include - -/* MMR offset and pnode of hub sourcing interrupts for a given irq */ -struct uv_irq_2_mmr_pnode{ - struct rb_node list; - unsigned long offset; - int pnode; - int irq; -}; - -static spinlock_t uv_irq_lock; -static struct rb_root uv_irq_root; - -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); - -static void uv_noop(struct irq_data *data) { } - -static void uv_ack_apic(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static struct irq_chip uv_irq_chip = { - .name = "UV-CORE", - .irq_mask = uv_noop, - .irq_unmask = uv_noop, - .irq_eoi = uv_ack_apic, - .irq_set_affinity = uv_set_irq_affinity, -}; - -/* - * Add offset and pnode information of the hub sourcing interrupts to the - * rb tree for a specific irq. - */ -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) -{ - struct rb_node **link = &uv_irq_root.rb_node; - struct rb_node *parent = NULL; - struct uv_irq_2_mmr_pnode *n; - struct uv_irq_2_mmr_pnode *e; - unsigned long irqflags; - - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, - uv_blade_to_memory_nid(blade)); - if (!n) - return -ENOMEM; - - n->irq = irq; - n->offset = offset; - n->pnode = uv_blade_to_pnode(blade); - spin_lock_irqsave(&uv_irq_lock, irqflags); - /* Find the right place in the rbtree: */ - while (*link) { - parent = *link; - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); - - if (unlikely(irq == e->irq)) { - /* irq entry exists */ - e->pnode = uv_blade_to_pnode(blade); - e->offset = offset; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - kfree(n); - return 0; - } - - if (irq < e->irq) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - /* Insert the node into the rbtree. */ - rb_link_node(&n->list, parent, link); - rb_insert_color(&n->list, &uv_irq_root); - - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; -} - -/* Retrieve offset and pnode information from the rb tree for a specific irq */ -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) -{ - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - - if (e->irq == irq) { - *offset = e->offset; - *pnode = e->pnode; - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return 0; - } - - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - return -1; -} - -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -static int -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = get_irq_chip_data(irq); - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode, err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - if (limit == UV_AFFINITY_CPU) - irq_set_status_flags(irq, IRQ_NO_BALANCING); - else - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != - sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} - -static int -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest; - unsigned long mmr_value, mmr_offset; - struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} - -/* - * Set up a mapping of an available irq and vector, and enable the specified - * MMR that defines the MSI that is to be sent to the specified CPU when an - * interrupt is raised. - */ -int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset, int limit) -{ - int irq, ret; - - irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - - if (irq <= 0) - return -EBUSY; - - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, - limit); - if (ret == irq) - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); - else - destroy_irq(irq); - - return ret; -} -EXPORT_SYMBOL_GPL(uv_setup_irq); - -/* - * Tear down a mapping of an irq and vector, and disable the specified MMR that - * defined the MSI that was to be sent to the specified CPU when an interrupt - * was raised. - * - * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). - */ -void uv_teardown_irq(unsigned int irq) -{ - struct uv_irq_2_mmr_pnode *e; - struct rb_node *n; - unsigned long irqflags; - - spin_lock_irqsave(&uv_irq_lock, irqflags); - n = uv_irq_root.rb_node; - while (n) { - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); - if (e->irq == irq) { - arch_disable_uv_irq(e->pnode, e->offset); - rb_erase(n, &uv_irq_root); - kfree(e); - break; - } - if (irq < e->irq) - n = n->rb_left; - else - n = n->rb_right; - } - spin_unlock_irqrestore(&uv_irq_lock, irqflags); - destroy_irq(irq); -} -EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c deleted file mode 100644 index 309c70fb775..00000000000 --- a/arch/x86/kernel/uv_sysfs.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson - */ - -#include -#include -#include - -struct kobject *sgi_uv_kobj; - -static ssize_t partition_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); -} - -static ssize_t coherence_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); -} - -static struct kobj_attribute partition_id_attr = - __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); - -static struct kobj_attribute coherence_id_attr = - __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); - - -static int __init sgi_uv_sysfs_init(void) -{ - unsigned long ret; - - if (!is_uv_system()) - return -ENODEV; - - if (!sgi_uv_kobj) - sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); - if (!sgi_uv_kobj) { - printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); - return -EINVAL; - } - - ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); - return ret; - } - - ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); - return ret; - } - - return 0; -} - -device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c deleted file mode 100644 index 56e421bc379..00000000000 --- a/arch/x86/kernel/uv_time.c +++ /dev/null @@ -1,423 +0,0 @@ -/* - * SGI RTC clock/timer routines. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Dimitri Sivanich - */ -#include -#include - -#include -#include -#include -#include -#include -#include - -#define RTC_NAME "sgi_rtc" - -static cycle_t uv_read_rtc(struct clocksource *cs); -static int uv_rtc_next_event(unsigned long, struct clock_event_device *); -static void uv_rtc_timer_setup(enum clock_event_mode, - struct clock_event_device *); - -static struct clocksource clocksource_uv = { - .name = RTC_NAME, - .rating = 400, - .read = uv_read_rtc, - .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, - .shift = 10, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static struct clock_event_device clock_event_device_uv = { - .name = RTC_NAME, - .features = CLOCK_EVT_FEAT_ONESHOT, - .shift = 20, - .rating = 400, - .irq = -1, - .set_next_event = uv_rtc_next_event, - .set_mode = uv_rtc_timer_setup, - .event_handler = NULL, -}; - -static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); - -/* There is one of these allocated per node */ -struct uv_rtc_timer_head { - spinlock_t lock; - /* next cpu waiting for timer, local node relative: */ - int next_cpu; - /* number of cpus on this node: */ - int ncpus; - struct { - int lcpu; /* systemwide logical cpu number */ - u64 expires; /* next timer expiration for this cpu */ - } cpu[1]; -}; - -/* - * Access to uv_rtc_timer_head via blade id. - */ -static struct uv_rtc_timer_head **blade_info __read_mostly; - -static int uv_rtc_evt_enable; - -/* - * Hardware interface routines - */ - -/* Send IPIs to another node */ -static void uv_rtc_send_IPI(int cpu) -{ - unsigned long apicid, val; - int pnode; - - apicid = cpu_physical_id(cpu); - pnode = uv_apicid_to_pnode(apicid); - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - (apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); - - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); -} - -/* Check for an RTC interrupt pending */ -static int uv_intr_pending(int pnode) -{ - return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & - UVH_EVENT_OCCURRED0_RTC1_MASK; -} - -/* Setup interrupt and return non-zero if early expiration occurred. */ -static int uv_setup_intr(int cpu, u64 expires) -{ - u64 val; - int pnode = uv_cpu_to_pnode(cpu); - - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, - UVH_RTC1_INT_CONFIG_M_MASK); - uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); - - uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, - UVH_EVENT_OCCURRED0_RTC1_MASK); - - val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | - ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); - - /* Set configuration */ - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); - /* Initialize comparator value */ - uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); - - if (uv_read_rtc(NULL) <= expires) - return 0; - - return !uv_intr_pending(pnode); -} - -/* - * Per-cpu timer tracking routines - */ - -static __init void uv_rtc_deallocate_timers(void) -{ - int bid; - - for_each_possible_blade(bid) { - kfree(blade_info[bid]); - } - kfree(blade_info); -} - -/* Allocate per-node list of cpu timer expiration times. */ -static __init int uv_rtc_allocate_timers(void) -{ - int cpu; - - blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); - if (!blade_info) - return -ENOMEM; - memset(blade_info, 0, uv_possible_blades * sizeof(void *)); - - for_each_present_cpu(cpu) { - int nid = cpu_to_node(cpu); - int bid = uv_cpu_to_blade_id(cpu); - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - struct uv_rtc_timer_head *head = blade_info[bid]; - - if (!head) { - head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + - (uv_blade_nr_possible_cpus(bid) * - 2 * sizeof(u64)), - GFP_KERNEL, nid); - if (!head) { - uv_rtc_deallocate_timers(); - return -ENOMEM; - } - spin_lock_init(&head->lock); - head->ncpus = uv_blade_nr_possible_cpus(bid); - head->next_cpu = -1; - blade_info[bid] = head; - } - - head->cpu[bcpu].lcpu = cpu; - head->cpu[bcpu].expires = ULLONG_MAX; - } - - return 0; -} - -/* Find and set the next expiring timer. */ -static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) -{ - u64 lowest = ULLONG_MAX; - int c, bcpu = -1; - - head->next_cpu = -1; - for (c = 0; c < head->ncpus; c++) { - u64 exp = head->cpu[c].expires; - if (exp < lowest) { - bcpu = c; - lowest = exp; - } - } - if (bcpu >= 0) { - head->next_cpu = bcpu; - c = head->cpu[bcpu].lcpu; - if (uv_setup_intr(c, lowest)) - /* If we didn't set it up in time, trigger */ - uv_rtc_send_IPI(c); - } else { - uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, - UVH_RTC1_INT_CONFIG_M_MASK); - } -} - -/* - * Set expiration time for current cpu. - * - * Returns 1 if we missed the expiration time. - */ -static int uv_rtc_set_timer(int cpu, u64 expires) -{ - int pnode = uv_cpu_to_pnode(cpu); - int bid = uv_cpu_to_blade_id(cpu); - struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - u64 *t = &head->cpu[bcpu].expires; - unsigned long flags; - int next_cpu; - - spin_lock_irqsave(&head->lock, flags); - - next_cpu = head->next_cpu; - *t = expires; - - /* Will this one be next to go off? */ - if (next_cpu < 0 || bcpu == next_cpu || - expires < head->cpu[next_cpu].expires) { - head->next_cpu = bcpu; - if (uv_setup_intr(cpu, expires)) { - *t = ULLONG_MAX; - uv_rtc_find_next_timer(head, pnode); - spin_unlock_irqrestore(&head->lock, flags); - return -ETIME; - } - } - - spin_unlock_irqrestore(&head->lock, flags); - return 0; -} - -/* - * Unset expiration time for current cpu. - * - * Returns 1 if this timer was pending. - */ -static int uv_rtc_unset_timer(int cpu, int force) -{ - int pnode = uv_cpu_to_pnode(cpu); - int bid = uv_cpu_to_blade_id(cpu); - struct uv_rtc_timer_head *head = blade_info[bid]; - int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; - u64 *t = &head->cpu[bcpu].expires; - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&head->lock, flags); - - if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) - rc = 1; - - if (rc) { - *t = ULLONG_MAX; - /* Was the hardware setup for this timer? */ - if (head->next_cpu == bcpu) - uv_rtc_find_next_timer(head, pnode); - } - - spin_unlock_irqrestore(&head->lock, flags); - - return rc; -} - - -/* - * Kernel interface routines. - */ - -/* - * Read the RTC. - * - * Starting with HUB rev 2.0, the UV RTC register is replicated across all - * cachelines of it's own page. This allows faster simultaneous reads - * from a given socket. - */ -static cycle_t uv_read_rtc(struct clocksource *cs) -{ - unsigned long offset; - - if (uv_get_min_hub_revision_id() == 1) - offset = 0; - else - offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; - - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); -} - -/* - * Program the next event, relative to now - */ -static int uv_rtc_next_event(unsigned long delta, - struct clock_event_device *ced) -{ - int ced_cpu = cpumask_first(ced->cpumask); - - return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); -} - -/* - * Setup the RTC timer in oneshot mode - */ -static void uv_rtc_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - int ced_cpu = cpumask_first(evt->cpumask); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here yet */ - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - uv_rtc_unset_timer(ced_cpu, 1); - break; - } -} - -static void uv_rtc_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); - - if (!ced || !ced->event_handler) - return; - - if (uv_rtc_unset_timer(cpu, 0) != 1) - return; - - ced->event_handler(ced); -} - -static int __init uv_enable_evt_rtc(char *str) -{ - uv_rtc_evt_enable = 1; - - return 1; -} -__setup("uvrtcevt", uv_enable_evt_rtc); - -static __init void uv_rtc_register_clockevents(struct work_struct *dummy) -{ - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); - - *ced = clock_event_device_uv; - ced->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(ced); -} - -static __init int uv_rtc_setup_clock(void) -{ - int rc; - - if (!is_uv_system()) - return -ENODEV; - - clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, - clocksource_uv.shift); - - /* If single blade, prefer tsc */ - if (uv_num_possible_blades() == 1) - clocksource_uv.rating = 250; - - rc = clocksource_register(&clocksource_uv); - if (rc) - printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); - else - printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", - sn_rtc_cycles_per_second/(unsigned long)1E6); - - if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) - return rc; - - /* Setup and register clockevents */ - rc = uv_rtc_allocate_timers(); - if (rc) - goto error; - - x86_platform_ipi_callback = uv_rtc_interrupt; - - clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, - NSEC_PER_SEC, clock_event_device_uv.shift); - - clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / - sn_rtc_cycles_per_second; - - clock_event_device_uv.max_delta_ns = clocksource_uv.mask * - (NSEC_PER_SEC / sn_rtc_cycles_per_second); - - rc = schedule_on_each_cpu(uv_rtc_register_clockevents); - if (rc) { - x86_platform_ipi_callback = NULL; - uv_rtc_deallocate_timers(); - goto error; - } - - printk(KERN_INFO "UV RTC clockevents registered\n"); - - return 0; - -error: - clocksource_unregister(&clocksource_uv); - printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); - - return rc; -} -arch_initcall(uv_rtc_setup_clock); diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 06761ed5374..8519b01f1ac 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -4,3 +4,4 @@ obj-y += mrst/ obj-y += scx200/ obj-y += sfi/ obj-y += visws/ +obj-y += uv/ diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile new file mode 100644 index 00000000000..6c40995fefb --- /dev/null +++ b/arch/x86/platform/uv/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c new file mode 100644 index 00000000000..8bc57baaa9a --- /dev/null +++ b/arch/x86/platform/uv/bios_uv.c @@ -0,0 +1,215 @@ +/* + * BIOS run time interface routines. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include +#include +#include +#include +#include + +static struct uv_systab uv_systab; + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +{ + struct uv_systab *tab = &uv_systab; + s64 ret; + + if (!tab->function) + /* + * BIOS does not support UV systab + */ + return BIOS_STATUS_UNIMPLEMENTED; + + ret = efi_call6((void *)__va(tab->function), (u64)which, + a1, a2, a3, a4, a5); + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_call); + +s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + unsigned long bios_flags; + s64 ret; + + local_irq_save(bios_flags); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + return ret; +} + +s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + s64 ret; + + preempt_disable(); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + preempt_enable(); + + return ret; +} + + +long sn_partition_id; +EXPORT_SYMBOL_GPL(sn_partition_id); +long sn_coherency_id; +EXPORT_SYMBOL_GPL(sn_coherency_id); +long sn_region_size; +EXPORT_SYMBOL_GPL(sn_region_size); +long system_serial_number; +EXPORT_SYMBOL_GPL(system_serial_number); +int uv_type; +EXPORT_SYMBOL_GPL(uv_type); + + +s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, + long *region, long *ssn) +{ + s64 ret; + u64 v0, v1; + union partition_info_u part; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, + (u64)(&v0), (u64)(&v1), 0, 0); + if (ret != BIOS_STATUS_SUCCESS) + return ret; + + part.val = v0; + if (uvtype) + *uvtype = part.hub_version; + if (partid) + *partid = part.partition_id; + if (coher) + *coher = part.coherence_id; + if (region) + *region = part.region_size; + if (ssn) + *ssn = v1; + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); + +int +uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, + unsigned long *intr_mmr_offset) +{ + u64 watchlist; + s64 ret; + + /* + * bios returns watchlist number or negative error number. + */ + ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, + mq_size, (u64)intr_mmr_offset, + (u64)&watchlist, 0); + if (ret < BIOS_STATUS_SUCCESS) + return ret; + + return watchlist; +} +EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); + +int +uv_bios_mq_watchlist_free(int blade, int watchlist_num) +{ + return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, + blade, watchlist_num, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); + +s64 +uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) +{ + return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, + perms, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); + +s64 +uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) +{ + s64 ret; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, + (u64)addr, buf, (u64)len, 0); + return ret; +} +EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); + +s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) +{ + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, + (u64)ticks_per_second, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_freq_base); + +/* + * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target + * @decode: true to enable target, false to disable target + * @domain: PCI domain number + * @bus: PCI bus number + * + * Returns: + * 0: Success + * -EINVAL: Invalid domain or bus number + * -ENOSYS: Capability not available + * -EBUSY: Legacy VGA I/O cannot be retargeted at this time + */ +int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) +{ + return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, + (u64)decode, (u64)domain, (u64)bus, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); + + +#ifdef CONFIG_EFI +void uv_bios_init(void) +{ + struct uv_systab *tab; + + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + (efi.uv_systab == (unsigned long)NULL)) { + printk(KERN_CRIT "No EFI UV System Table.\n"); + uv_systab.function = (unsigned long)NULL; + return; + } + + tab = (struct uv_systab *)ioremap(efi.uv_systab, + sizeof(struct uv_systab)); + if (strncmp(tab->signature, "UVST", 4) != 0) + printk(KERN_ERR "bad signature in UV system table!"); + + /* + * Copy table to permanent spot for later use. + */ + memcpy(&uv_systab, tab, sizeof(struct uv_systab)); + iounmap(tab); + + printk(KERN_INFO "EFI UV System Table Revision %d\n", + uv_systab.revision); +} +#else /* !CONFIG_EFI */ + +void uv_bios_init(void) { } +#endif diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c new file mode 100644 index 00000000000..20ea20a39e2 --- /dev/null +++ b/arch/x86/platform/uv/tlb_uv.c @@ -0,0 +1,1661 @@ +/* + * SGI UltraViolet TLB flush routines. + * + * (c) 2008-2010 Cliff Wickman , SGI. + * + * This code is released under the GNU General Public License version 2 or + * later. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ +static int timeout_base_ns[] = { + 20, + 160, + 1280, + 10240, + 81920, + 655360, + 5242880, + 167772160 +}; +static int timeout_us; +static int nobau; +static int baudisabled; +static spinlock_t disable_lock; +static cycles_t congested_cycles; + +/* tunables: */ +static int max_bau_concurrent = MAX_BAU_CONCURRENT; +static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; +static int plugged_delay = PLUGGED_DELAY; +static int plugsb4reset = PLUGSB4RESET; +static int timeoutsb4reset = TIMEOUTSB4RESET; +static int ipi_reset_limit = IPI_RESET_LIMIT; +static int complete_threshold = COMPLETE_THRESHOLD; +static int congested_response_us = CONGESTED_RESPONSE_US; +static int congested_reps = CONGESTED_REPS; +static int congested_period = CONGESTED_PERIOD; +static struct dentry *tunables_dir; +static struct dentry *tunables_file; + +static int __init setup_nobau(char *arg) +{ + nobau = 1; + return 0; +} +early_param("nobau", setup_nobau); + +/* base pnode in this partition */ +static int uv_partition_base_pnode __read_mostly; +/* position of pnode (which is nasid>>1): */ +static int uv_nshift __read_mostly; +static unsigned long uv_mmask __read_mostly; + +static DEFINE_PER_CPU(struct ptc_stats, ptcstats); +static DEFINE_PER_CPU(struct bau_control, bau_control); +static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); + +/* + * Determine the first node on a uvhub. 'Nodes' are used for kernel + * memory allocation. + */ +static int __init uvhub_to_first_node(int uvhub) +{ + int node, b; + + for_each_online_node(node) { + b = uv_node_to_blade_id(node); + if (uvhub == b) + return node; + } + return -1; +} + +/* + * Determine the apicid of the first cpu on a uvhub. + */ +static int __init uvhub_to_first_apicid(int uvhub) +{ + int cpu; + + for_each_present_cpu(cpu) + if (uvhub == uv_cpu_to_blade_id(cpu)) + return per_cpu(x86_cpu_to_apicid, cpu); + return -1; +} + +/* + * Free a software acknowledge hardware resource by clearing its Pending + * bit. This will return a reply to the sender. + * If the message has timed out, a reply has already been sent by the + * hardware but the resource has not been released. In that case our + * clear of the Timeout bit (as well) will free the resource. No reply will + * be sent (the hardware will only do one reply per message). + */ +static inline void uv_reply_to_message(struct msg_desc *mdp, + struct bau_control *bcp) +{ + unsigned long dw; + struct bau_payload_queue_entry *msg; + + msg = mdp->msg; + if (!msg->canceled) { + dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | + msg->sw_ack_vector; + uv_write_local_mmr( + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); + } + msg->replied_to = 1; + msg->sw_ack_vector = 0; +} + +/* + * Process the receipt of a RETRY message + */ +static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, + struct bau_control *bcp) +{ + int i; + int cancel_count = 0; + int slot2; + unsigned long msg_res; + unsigned long mmr = 0; + struct bau_payload_queue_entry *msg; + struct bau_payload_queue_entry *msg2; + struct ptc_stats *stat; + + msg = mdp->msg; + stat = bcp->statp; + stat->d_retries++; + /* + * cancel any message from msg+1 to the retry itself + */ + for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { + if (msg2 > mdp->va_queue_last) + msg2 = mdp->va_queue_first; + if (msg2 == msg) + break; + + /* same conditions for cancellation as uv_do_reset */ + if ((msg2->replied_to == 0) && (msg2->canceled == 0) && + (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & + msg->sw_ack_vector) == 0) && + (msg2->sending_cpu == msg->sending_cpu) && + (msg2->msg_type != MSG_NOOP)) { + slot2 = msg2 - mdp->va_queue_first; + mmr = uv_read_local_mmr + (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); + msg_res = msg2->sw_ack_vector; + /* + * This is a message retry; clear the resources held + * by the previous message only if they timed out. + * If it has not timed out we have an unexpected + * situation to report. + */ + if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { + /* + * is the resource timed out? + * make everyone ignore the cancelled message. + */ + msg2->canceled = 1; + stat->d_canceled++; + cancel_count++; + uv_write_local_mmr( + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, + (msg_res << UV_SW_ACK_NPENDING) | + msg_res); + } + } + } + if (!cancel_count) + stat->d_nocanceled++; +} + +/* + * Do all the things a cpu should do for a TLB shootdown message. + * Other cpu's may come here at the same time for this message. + */ +static void uv_bau_process_message(struct msg_desc *mdp, + struct bau_control *bcp) +{ + int msg_ack_count; + short socket_ack_count = 0; + struct ptc_stats *stat; + struct bau_payload_queue_entry *msg; + struct bau_control *smaster = bcp->socket_master; + + /* + * This must be a normal message, or retry of a normal message + */ + msg = mdp->msg; + stat = bcp->statp; + if (msg->address == TLB_FLUSH_ALL) { + local_flush_tlb(); + stat->d_alltlb++; + } else { + __flush_tlb_one(msg->address); + stat->d_onetlb++; + } + stat->d_requestee++; + + /* + * One cpu on each uvhub has the additional job on a RETRY + * of releasing the resource held by the message that is + * being retried. That message is identified by sending + * cpu number. + */ + if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) + uv_bau_process_retry_msg(mdp, bcp); + + /* + * This is a sw_ack message, so we have to reply to it. + * Count each responding cpu on the socket. This avoids + * pinging the count's cache line back and forth between + * the sockets. + */ + socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) + &smaster->socket_acknowledge_count[mdp->msg_slot]); + if (socket_ack_count == bcp->cpus_in_socket) { + /* + * Both sockets dump their completed count total into + * the message's count. + */ + smaster->socket_acknowledge_count[mdp->msg_slot] = 0; + msg_ack_count = atomic_add_short_return(socket_ack_count, + (struct atomic_short *)&msg->acknowledge_count); + + if (msg_ack_count == bcp->cpus_in_uvhub) { + /* + * All cpus in uvhub saw it; reply + */ + uv_reply_to_message(mdp, bcp); + } + } + + return; +} + +/* + * Determine the first cpu on a uvhub. + */ +static int uvhub_to_first_cpu(int uvhub) +{ + int cpu; + for_each_present_cpu(cpu) + if (uvhub == uv_cpu_to_blade_id(cpu)) + return cpu; + return -1; +} + +/* + * Last resort when we get a large number of destination timeouts is + * to clear resources held by a given cpu. + * Do this with IPI so that all messages in the BAU message queue + * can be identified by their nonzero sw_ack_vector field. + * + * This is entered for a single cpu on the uvhub. + * The sender want's this uvhub to free a specific message's + * sw_ack resources. + */ +static void +uv_do_reset(void *ptr) +{ + int i; + int slot; + int count = 0; + unsigned long mmr; + unsigned long msg_res; + struct bau_control *bcp; + struct reset_args *rap; + struct bau_payload_queue_entry *msg; + struct ptc_stats *stat; + + bcp = &per_cpu(bau_control, smp_processor_id()); + rap = (struct reset_args *)ptr; + stat = bcp->statp; + stat->d_resets++; + + /* + * We're looking for the given sender, and + * will free its sw_ack resource. + * If all cpu's finally responded after the timeout, its + * message 'replied_to' was set. + */ + for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { + /* uv_do_reset: same conditions for cancellation as + uv_bau_process_retry_msg() */ + if ((msg->replied_to == 0) && + (msg->canceled == 0) && + (msg->sending_cpu == rap->sender) && + (msg->sw_ack_vector) && + (msg->msg_type != MSG_NOOP)) { + /* + * make everyone else ignore this message + */ + msg->canceled = 1; + slot = msg - bcp->va_queue_first; + count++; + /* + * only reset the resource if it is still pending + */ + mmr = uv_read_local_mmr + (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); + msg_res = msg->sw_ack_vector; + if (mmr & msg_res) { + stat->d_rcanceled++; + uv_write_local_mmr( + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, + (msg_res << UV_SW_ACK_NPENDING) | + msg_res); + } + } + } + return; +} + +/* + * Use IPI to get all target uvhubs to release resources held by + * a given sending cpu number. + */ +static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, + int sender) +{ + int uvhub; + int cpu; + cpumask_t mask; + struct reset_args reset_args; + + reset_args.sender = sender; + + cpus_clear(mask); + /* find a single cpu for each uvhub in this distribution mask */ + for (uvhub = 0; + uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; + uvhub++) { + if (!bau_uvhub_isset(uvhub, distribution)) + continue; + /* find a cpu for this uvhub */ + cpu = uvhub_to_first_cpu(uvhub); + cpu_set(cpu, mask); + } + /* IPI all cpus; Preemption is already disabled */ + smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); + return; +} + +static inline unsigned long +cycles_2_us(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long us; + ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) + >> CYC2NS_SCALE_FACTOR; + us = ns / 1000; + return us; +} + +/* + * wait for all cpus on this hub to finish their sends and go quiet + * leaves uvhub_quiesce set so that no new broadcasts are started by + * bau_flush_send_and_wait() + */ +static inline void +quiesce_local_uvhub(struct bau_control *hmaster) +{ + atomic_add_short_return(1, (struct atomic_short *) + &hmaster->uvhub_quiesce); +} + +/* + * mark this quiet-requestor as done + */ +static inline void +end_uvhub_quiesce(struct bau_control *hmaster) +{ + atomic_add_short_return(-1, (struct atomic_short *) + &hmaster->uvhub_quiesce); +} + +/* + * Wait for completion of a broadcast software ack message + * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP + */ +static int uv_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, int this_cpu, + struct bau_control *bcp, struct bau_control *smaster, long try) +{ + unsigned long descriptor_status; + cycles_t ttime; + struct ptc_stats *stat = bcp->statp; + struct bau_control *hmaster; + + hmaster = bcp->uvhub_master; + + /* spin on the status MMR, waiting for it to go idle */ + while ((descriptor_status = (((unsigned long) + uv_read_local_mmr(mmr_offset) >> + right_shift) & UV_ACT_STATUS_MASK)) != + DESC_STATUS_IDLE) { + /* + * Our software ack messages may be blocked because there are + * no swack resources available. As long as none of them + * has timed out hardware will NACK our message and its + * state will stay IDLE. + */ + if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { + stat->s_stimeout++; + return FLUSH_GIVEUP; + } else if (descriptor_status == + DESC_STATUS_DESTINATION_TIMEOUT) { + stat->s_dtimeout++; + ttime = get_cycles(); + + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + */ + if (cycles_2_us(ttime - bcp->send_message) < + timeout_us) { + bcp->conseccompletes = 0; + return FLUSH_RETRY_PLUGGED; + } + + bcp->conseccompletes = 0; + return FLUSH_RETRY_TIMEOUT; + } else { + /* + * descriptor_status is still BUSY + */ + cpu_relax(); + } + } + bcp->conseccompletes++; + return FLUSH_COMPLETE; +} + +static inline cycles_t +sec_2_cycles(unsigned long sec) +{ + unsigned long ns; + cycles_t cyc; + + ns = sec * 1000000000; + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); + return cyc; +} + +/* + * conditionally add 1 to *v, unless *v is >= u + * return 0 if we cannot add 1 to *v because it is >= u + * return 1 if we can add 1 to *v because it is < u + * the add is atomic + * + * This is close to atomic_add_unless(), but this allows the 'u' value + * to be lowered below the current 'v'. atomic_add_unless can only stop + * on equal. + */ +static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +{ + spin_lock(lock); + if (atomic_read(v) >= u) { + spin_unlock(lock); + return 0; + } + atomic_inc(v); + spin_unlock(lock); + return 1; +} + +/* + * Our retries are blocked by all destination swack resources being + * in use, and a timeout is pending. In that case hardware immediately + * returns the ERROR that looks like a destination timeout. + */ +static void +destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, + struct bau_control *hmaster, struct ptc_stats *stat) +{ + udelay(bcp->plugged_delay); + bcp->plugged_tries++; + if (bcp->plugged_tries >= bcp->plugsb4reset) { + bcp->plugged_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); + uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; + stat->s_resets_plug++; + } +} + +static void +destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, + struct bau_control *hmaster, struct ptc_stats *stat) +{ + hmaster->max_bau_concurrent = 1; + bcp->timeout_tries++; + if (bcp->timeout_tries >= bcp->timeoutsb4reset) { + bcp->timeout_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); + uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; + stat->s_resets_timeout++; + } +} + +/* + * Completions are taking a very long time due to a congested numalink + * network. + */ +static void +disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) +{ + int tcpu; + struct bau_control *tbcp; + + /* let only one cpu do this disabling */ + spin_lock(&disable_lock); + if (!baudisabled && bcp->period_requests && + ((bcp->period_time / bcp->period_requests) > congested_cycles)) { + /* it becomes this cpu's job to turn on the use of the + BAU again */ + baudisabled = 1; + bcp->set_bau_off = 1; + bcp->set_bau_on_time = get_cycles() + + sec_2_cycles(bcp->congested_period); + stat->s_bau_disabled++; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 1; + } + } + spin_unlock(&disable_lock); +} + +/** + * uv_flush_send_and_wait + * + * Send a broadcast and wait for it to complete. + * + * The flush_mask contains the cpus the broadcast is to be sent to including + * cpus that are on the local uvhub. + * + * Returns 0 if all flushing represented in the mask was done. + * Returns 1 if it gives up entirely and the original cpu mask is to be + * returned to the kernel. + */ +int uv_flush_send_and_wait(struct bau_desc *bau_desc, + struct cpumask *flush_mask, struct bau_control *bcp) +{ + int right_shift; + int completion_status = 0; + int seq_number = 0; + long try = 0; + int cpu = bcp->uvhub_cpu; + int this_cpu = bcp->cpu; + unsigned long mmr_offset; + unsigned long index; + cycles_t time1; + cycles_t time2; + cycles_t elapsed; + struct ptc_stats *stat = bcp->statp; + struct bau_control *smaster = bcp->socket_master; + struct bau_control *hmaster = bcp->uvhub_master; + + if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, + &hmaster->active_descriptor_count, + hmaster->max_bau_concurrent)) { + stat->s_throttles++; + do { + cpu_relax(); + } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, + &hmaster->active_descriptor_count, + hmaster->max_bau_concurrent)); + } + while (hmaster->uvhub_quiesce) + cpu_relax(); + + if (cpu < UV_CPUS_PER_ACT_STATUS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = + ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); + } + time1 = get_cycles(); + do { + if (try == 0) { + bau_desc->header.msg_type = MSG_REGULAR; + seq_number = bcp->message_number++; + } else { + bau_desc->header.msg_type = MSG_RETRY; + stat->s_retry_messages++; + } + bau_desc->header.sequence = seq_number; + index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | + bcp->uvhub_cpu; + bcp->send_message = get_cycles(); + uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); + try++; + completion_status = uv_wait_completion(bau_desc, mmr_offset, + right_shift, this_cpu, bcp, smaster, try); + + if (completion_status == FLUSH_RETRY_PLUGGED) { + destination_plugged(bau_desc, bcp, hmaster, stat); + } else if (completion_status == FLUSH_RETRY_TIMEOUT) { + destination_timeout(bau_desc, bcp, hmaster, stat); + } + if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { + bcp->ipi_attempts = 0; + completion_status = FLUSH_GIVEUP; + break; + } + cpu_relax(); + } while ((completion_status == FLUSH_RETRY_PLUGGED) || + (completion_status == FLUSH_RETRY_TIMEOUT)); + time2 = get_cycles(); + bcp->plugged_tries = 0; + bcp->timeout_tries = 0; + if ((completion_status == FLUSH_COMPLETE) && + (bcp->conseccompletes > bcp->complete_threshold) && + (hmaster->max_bau_concurrent < + hmaster->max_bau_concurrent_constant)) + hmaster->max_bau_concurrent++; + while (hmaster->uvhub_quiesce) + cpu_relax(); + atomic_dec(&hmaster->active_descriptor_count); + if (time2 > time1) { + elapsed = time2 - time1; + stat->s_time += elapsed; + if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { + bcp->period_requests++; + bcp->period_time += elapsed; + if ((elapsed > congested_cycles) && + (bcp->period_requests > bcp->congested_reps)) { + disable_for_congestion(bcp, stat); + } + } + } else + stat->s_requestor--; + if (completion_status == FLUSH_COMPLETE && try > 1) + stat->s_retriesok++; + else if (completion_status == FLUSH_GIVEUP) { + stat->s_giveup++; + return 1; + } + return 0; +} + +/** + * uv_flush_tlb_others - globally purge translation cache of a virtual + * address or all TLB's + * @cpumask: mask of all cpu's in which the address is to be removed + * @mm: mm_struct containing virtual address range + * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu + * + * This is the entry point for initiating any UV global TLB shootdown. + * + * Purges the translation caches of all specified processors of the given + * virtual address, or purges all TLB's on specified processors. + * + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) + * + * The cpumask is converted into a uvhubmask of the uvhubs containing + * those cpus. + * + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. + */ +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, unsigned int cpu) +{ + int tcpu; + int uvhub; + int locals = 0; + int remotes = 0; + int hubs = 0; + struct bau_desc *bau_desc; + struct cpumask *flush_mask; + struct ptc_stats *stat; + struct bau_control *bcp; + struct bau_control *tbcp; + + /* kernel was booted 'nobau' */ + if (nobau) + return cpumask; + + bcp = &per_cpu(bau_control, cpu); + stat = bcp->statp; + + /* bau was disabled due to slow response */ + if (bcp->baudisabled) { + /* the cpu that disabled it must re-enable it */ + if (bcp->set_bau_off) { + if (get_cycles() >= bcp->set_bau_on_time) { + stat->s_bau_reenabled++; + baudisabled = 0; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 0; + tbcp->period_requests = 0; + tbcp->period_time = 0; + } + } + } + return cpumask; + } + + /* + * Each sending cpu has a per-cpu mask which it fills from the caller's + * cpu mask. All cpus are converted to uvhubs and copied to the + * activation descriptor. + */ + flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); + /* don't actually do a shootdown of the local cpu */ + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + if (cpu_isset(cpu, *cpumask)) + stat->s_ntargself++; + + bau_desc = bcp->descriptor_base; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; + bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); + + /* cpu statistics */ + for_each_cpu(tcpu, flush_mask) { + uvhub = uv_cpu_to_blade_id(tcpu); + bau_uvhub_set(uvhub, &bau_desc->distribution); + if (uvhub == bcp->uvhub) + locals++; + else + remotes++; + } + if ((locals + remotes) == 0) + return NULL; + stat->s_requestor++; + stat->s_ntargcpu += remotes + locals; + stat->s_ntargremotes += remotes; + stat->s_ntarglocals += locals; + remotes = bau_uvhub_weight(&bau_desc->distribution); + + /* uvhub statistics */ + hubs = bau_uvhub_weight(&bau_desc->distribution); + if (locals) { + stat->s_ntarglocaluvhub++; + stat->s_ntargremoteuvhub += (hubs - 1); + } else + stat->s_ntargremoteuvhub += hubs; + stat->s_ntarguvhub += hubs; + if (hubs >= 16) + stat->s_ntarguvhub16++; + else if (hubs >= 8) + stat->s_ntarguvhub8++; + else if (hubs >= 4) + stat->s_ntarguvhub4++; + else if (hubs >= 2) + stat->s_ntarguvhub2++; + else + stat->s_ntarguvhub1++; + + bau_desc->payload.address = va; + bau_desc->payload.sending_cpu = cpu; + + /* + * uv_flush_send_and_wait returns 0 if all cpu's were messaged, + * or 1 if it gave up and the original cpumask should be returned. + */ + if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) + return NULL; + else + return cpumask; +} + +/* + * The BAU message interrupt comes here. (registered by set_intr_gate) + * See entry_64.S + * + * We received a broadcast assist message. + * + * Interrupts are disabled; this interrupt could represent + * the receipt of several messages. + * + * All cores/threads on this hub get this interrupt. + * The last one to see it does the software ack. + * (the resource will not be freed until noninterruptable cpus see this + * interrupt; hardware may timeout the s/w ack and reply ERROR) + */ +void uv_bau_message_interrupt(struct pt_regs *regs) +{ + int count = 0; + cycles_t time_start; + struct bau_payload_queue_entry *msg; + struct bau_control *bcp; + struct ptc_stats *stat; + struct msg_desc msgdesc; + + time_start = get_cycles(); + bcp = &per_cpu(bau_control, smp_processor_id()); + stat = bcp->statp; + msgdesc.va_queue_first = bcp->va_queue_first; + msgdesc.va_queue_last = bcp->va_queue_last; + msg = bcp->bau_msg_head; + while (msg->sw_ack_vector) { + count++; + msgdesc.msg_slot = msg - msgdesc.va_queue_first; + msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; + msgdesc.msg = msg; + uv_bau_process_message(&msgdesc, bcp); + msg++; + if (msg > msgdesc.va_queue_last) + msg = msgdesc.va_queue_first; + bcp->bau_msg_head = msg; + } + stat->d_time += (get_cycles() - time_start); + if (!count) + stat->d_nomsg++; + else if (count > 1) + stat->d_multmsg++; + ack_APIC_irq(); +} + +/* + * uv_enable_timeouts + * + * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have + * shootdown message timeouts enabled. The timeout does not cause + * an interrupt, but causes an error message to be returned to + * the sender. + */ +static void uv_enable_timeouts(void) +{ + int uvhub; + int nuvhubs; + int pnode; + unsigned long mmr_image; + + nuvhubs = uv_num_possible_blades(); + + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + if (!uv_blade_nr_possible_cpus(uvhub)) + continue; + + pnode = uv_blade_to_pnode(uvhub); + mmr_image = + uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); + /* + * Set the timeout period and then lock it in, in three + * steps; captures and locks in the period. + * + * To program the period, the SOFT_ACK_MODE must be off. + */ + mmr_image &= ~((unsigned long)1 << + UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + /* + * Set the 4-bit period. + */ + mmr_image &= ~((unsigned long)0xf << + UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); + mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << + UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + /* + * Subsequent reversals of the timebase bit (3) cause an + * immediate timeout of one or all INTD resources as + * indicated in bits 2:0 (7 causes all of them to timeout). + */ + mmr_image |= ((unsigned long)1 << + UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); + uv_write_global_mmr64 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + } +} + +static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) +{ + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +{ + (*offset)++; + if (*offset < num_possible_cpus()) + return offset; + return NULL; +} + +static void uv_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static inline unsigned long long +microsec_2_cycles(unsigned long microsec) +{ + unsigned long ns; + unsigned long long cyc; + + ns = microsec * 1000; + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); + return cyc; +} + +/* + * Display the statistics thru /proc. + * 'data' points to the cpu number + */ +static int uv_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *)data; + + if (!cpu) { + seq_printf(file, + "# cpu sent stime self locals remotes ncpus localhub "); + seq_printf(file, + "remotehub numuvhubs numuvhubs16 numuvhubs8 "); + seq_printf(file, + "numuvhubs4 numuvhubs2 numuvhubs1 dto "); + seq_printf(file, + "retries rok resetp resett giveup sto bz throt "); + seq_printf(file, + "sw_ack recv rtime all "); + seq_printf(file, + "one mult none retry canc nocan reset rcan "); + seq_printf(file, + "disable enable\n"); + } + if (cpu < num_possible_cpus() && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + /* source side statistics */ + seq_printf(file, + "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + cpu, stat->s_requestor, cycles_2_us(stat->s_time), + stat->s_ntargself, stat->s_ntarglocals, + stat->s_ntargremotes, stat->s_ntargcpu, + stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, + stat->s_ntarguvhub, stat->s_ntarguvhub16); + seq_printf(file, "%ld %ld %ld %ld %ld ", + stat->s_ntarguvhub8, stat->s_ntarguvhub4, + stat->s_ntarguvhub2, stat->s_ntarguvhub1, + stat->s_dtimeout); + seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", + stat->s_retry_messages, stat->s_retriesok, + stat->s_resets_plug, stat->s_resets_timeout, + stat->s_giveup, stat->s_stimeout, + stat->s_busy, stat->s_throttles); + + /* destination side statistics */ + seq_printf(file, + "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", + uv_read_global_mmr64(uv_cpu_to_pnode(cpu), + UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), + stat->d_requestee, cycles_2_us(stat->d_time), + stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, + stat->d_nomsg, stat->d_retries, stat->d_canceled, + stat->d_nocanceled, stat->d_resets, + stat->d_rcanceled); + seq_printf(file, "%ld %ld\n", + stat->s_bau_disabled, stat->s_bau_reenabled); + } + + return 0; +} + +/* + * Display the tunables thru debugfs + */ +static ssize_t tunables_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + char *buf; + int ret; + + buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", + "max_bau_concurrent plugged_delay plugsb4reset", + "timeoutsb4reset ipi_reset_limit complete_threshold", + "congested_response_us congested_reps congested_period", + max_bau_concurrent, plugged_delay, plugsb4reset, + timeoutsb4reset, ipi_reset_limit, complete_threshold, + congested_response_us, congested_reps, congested_period); + + if (!buf) + return -ENOMEM; + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); + kfree(buf); + return ret; +} + +/* + * -1: resetf the statistics + * 0: display meaning of the statistics + */ +static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + long input_arg; + char optstr[64]; + struct ptc_stats *stat; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + if (strict_strtol(optstr, 10, &input_arg) < 0) { + printk(KERN_DEBUG "%s is invalid\n", optstr); + return -EINVAL; + } + + if (input_arg == 0) { + printk(KERN_DEBUG "# cpu: cpu number\n"); + printk(KERN_DEBUG "Sender statistics:\n"); + printk(KERN_DEBUG + "sent: number of shootdown messages sent\n"); + printk(KERN_DEBUG + "stime: time spent sending messages\n"); + printk(KERN_DEBUG + "numuvhubs: number of hubs targeted with shootdown\n"); + printk(KERN_DEBUG + "numuvhubs16: number times 16 or more hubs targeted\n"); + printk(KERN_DEBUG + "numuvhubs8: number times 8 or more hubs targeted\n"); + printk(KERN_DEBUG + "numuvhubs4: number times 4 or more hubs targeted\n"); + printk(KERN_DEBUG + "numuvhubs2: number times 2 or more hubs targeted\n"); + printk(KERN_DEBUG + "numuvhubs1: number times 1 hub targeted\n"); + printk(KERN_DEBUG + "numcpus: number of cpus targeted with shootdown\n"); + printk(KERN_DEBUG + "dto: number of destination timeouts\n"); + printk(KERN_DEBUG + "retries: destination timeout retries sent\n"); + printk(KERN_DEBUG + "rok: : destination timeouts successfully retried\n"); + printk(KERN_DEBUG + "resetp: ipi-style resource resets for plugs\n"); + printk(KERN_DEBUG + "resett: ipi-style resource resets for timeouts\n"); + printk(KERN_DEBUG + "giveup: fall-backs to ipi-style shootdowns\n"); + printk(KERN_DEBUG + "sto: number of source timeouts\n"); + printk(KERN_DEBUG + "bz: number of stay-busy's\n"); + printk(KERN_DEBUG + "throt: number times spun in throttle\n"); + printk(KERN_DEBUG "Destination side statistics:\n"); + printk(KERN_DEBUG + "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); + printk(KERN_DEBUG + "recv: shootdown messages received\n"); + printk(KERN_DEBUG + "rtime: time spent processing messages\n"); + printk(KERN_DEBUG + "all: shootdown all-tlb messages\n"); + printk(KERN_DEBUG + "one: shootdown one-tlb messages\n"); + printk(KERN_DEBUG + "mult: interrupts that found multiple messages\n"); + printk(KERN_DEBUG + "none: interrupts that found no messages\n"); + printk(KERN_DEBUG + "retry: number of retry messages processed\n"); + printk(KERN_DEBUG + "canc: number messages canceled by retries\n"); + printk(KERN_DEBUG + "nocan: number retries that found nothing to cancel\n"); + printk(KERN_DEBUG + "reset: number of ipi-style reset requests processed\n"); + printk(KERN_DEBUG + "rcan: number messages canceled by reset requests\n"); + printk(KERN_DEBUG + "disable: number times use of the BAU was disabled\n"); + printk(KERN_DEBUG + "enable: number times use of the BAU was re-enabled\n"); + } else if (input_arg == -1) { + for_each_present_cpu(cpu) { + stat = &per_cpu(ptcstats, cpu); + memset(stat, 0, sizeof(struct ptc_stats)); + } + } + + return count; +} + +static int local_atoi(const char *name) +{ + int val = 0; + + for (;; name++) { + switch (*name) { + case '0' ... '9': + val = 10*val+(*name-'0'); + break; + default: + return val; + } + } +} + +/* + * set the tunables + * 0 values reset them to defaults + */ +static ssize_t tunables_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + int cnt = 0; + int val; + char *p; + char *q; + char instr[64]; + struct bau_control *bcp; + + if (count == 0 || count > sizeof(instr)-1) + return -EINVAL; + if (copy_from_user(instr, user, count)) + return -EFAULT; + + instr[count] = '\0'; + /* count the fields */ + p = instr + strspn(instr, WHITESPACE); + q = p; + for (; *p; p = q + strspn(q, WHITESPACE)) { + q = p + strcspn(p, WHITESPACE); + cnt++; + if (q == p) + break; + } + if (cnt != 9) { + printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); + return -EINVAL; + } + + p = instr + strspn(instr, WHITESPACE); + q = p; + for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { + q = p + strcspn(p, WHITESPACE); + val = local_atoi(p); + switch (cnt) { + case 0: + if (val == 0) { + max_bau_concurrent = MAX_BAU_CONCURRENT; + max_bau_concurrent_constant = + MAX_BAU_CONCURRENT; + continue; + } + bcp = &per_cpu(bau_control, smp_processor_id()); + if (val < 1 || val > bcp->cpus_in_uvhub) { + printk(KERN_DEBUG + "Error: BAU max concurrent %d is invalid\n", + val); + return -EINVAL; + } + max_bau_concurrent = val; + max_bau_concurrent_constant = val; + continue; + case 1: + if (val == 0) + plugged_delay = PLUGGED_DELAY; + else + plugged_delay = val; + continue; + case 2: + if (val == 0) + plugsb4reset = PLUGSB4RESET; + else + plugsb4reset = val; + continue; + case 3: + if (val == 0) + timeoutsb4reset = TIMEOUTSB4RESET; + else + timeoutsb4reset = val; + continue; + case 4: + if (val == 0) + ipi_reset_limit = IPI_RESET_LIMIT; + else + ipi_reset_limit = val; + continue; + case 5: + if (val == 0) + complete_threshold = COMPLETE_THRESHOLD; + else + complete_threshold = val; + continue; + case 6: + if (val == 0) + congested_response_us = CONGESTED_RESPONSE_US; + else + congested_response_us = val; + continue; + case 7: + if (val == 0) + congested_reps = CONGESTED_REPS; + else + congested_reps = val; + continue; + case 8: + if (val == 0) + congested_period = CONGESTED_PERIOD; + else + congested_period = val; + continue; + } + if (q == p) + break; + } + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->max_bau_concurrent = max_bau_concurrent; + bcp->max_bau_concurrent_constant = max_bau_concurrent; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->congested_response_us = congested_response_us; + bcp->congested_reps = congested_reps; + bcp->congested_period = congested_period; + } + return count; +} + +static const struct seq_operations uv_ptc_seq_ops = { + .start = uv_ptc_seq_start, + .next = uv_ptc_seq_next, + .stop = uv_ptc_seq_stop, + .show = uv_ptc_seq_show +}; + +static int uv_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &uv_ptc_seq_ops); +} + +static int tunables_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations proc_uv_ptc_operations = { + .open = uv_ptc_proc_open, + .read = seq_read, + .write = uv_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations tunables_fops = { + .open = tunables_open, + .read = tunables_read, + .write = tunables_write, + .llseek = default_llseek, +}; + +static int __init uv_ptc_init(void) +{ + struct proc_dir_entry *proc_uv_ptc; + + if (!is_uv_system()) + return 0; + + proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, + &proc_uv_ptc_operations); + if (!proc_uv_ptc) { + printk(KERN_ERR "unable to create %s proc entry\n", + UV_PTC_BASENAME); + return -EINVAL; + } + + tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); + if (!tunables_dir) { + printk(KERN_ERR "unable to create debugfs directory %s\n", + UV_BAU_TUNABLES_DIR); + return -EINVAL; + } + tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, + tunables_dir, NULL, &tunables_fops); + if (!tunables_file) { + printk(KERN_ERR "unable to create debugfs file %s\n", + UV_BAU_TUNABLES_FILE); + return -EINVAL; + } + return 0; +} + +/* + * initialize the sending side's sending buffers + */ +static void +uv_activation_descriptor_init(int node, int pnode) +{ + int i; + int cpu; + unsigned long pa; + unsigned long m; + unsigned long n; + struct bau_desc *bau_desc; + struct bau_desc *bd2; + struct bau_control *bcp; + + /* + * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) + * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub + */ + bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* + UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); + BUG_ON(!bau_desc); + + pa = uv_gpa(bau_desc); /* need the real nasid*/ + n = pa >> uv_nshift; + m = pa & uv_mmask; + + uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); + + /* + * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * cpu even though we only use the first one; one descriptor can + * describe a broadcast to 256 uv hubs. + */ + for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); + i++, bd2++) { + memset(bd2, 0, sizeof(struct bau_desc)); + bd2->header.sw_ack_flag = 1; + /* + * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub + * in the partition. The bit map will indicate uvhub numbers, + * which are 0-N in a partition. Pnodes are unique system-wide. + */ + bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; + bd2->header.dest_subnodeid = 0x10; /* the LB */ + bd2->header.command = UV_NET_ENDPOINT_INTD; + bd2->header.int_both = 1; + /* + * all others need to be set to zero: + * fairness chaining multilevel count replied_to + */ + } + for_each_present_cpu(cpu) { + if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) + continue; + bcp = &per_cpu(bau_control, cpu); + bcp->descriptor_base = bau_desc; + } +} + +/* + * initialize the destination side's receiving buffers + * entered for each uvhub in the partition + * - node is first node (kernel memory notion) on the uvhub + * - pnode is the uvhub's physical identifier + */ +static void +uv_payload_queue_init(int node, int pnode) +{ + int pn; + int cpu; + char *cp; + unsigned long pa; + struct bau_payload_queue_entry *pqp; + struct bau_payload_queue_entry *pqp_malloc; + struct bau_control *bcp; + + pqp = (struct bau_payload_queue_entry *) kmalloc_node( + (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), + GFP_KERNEL, node); + BUG_ON(!pqp); + pqp_malloc = pqp; + + cp = (char *)pqp + 31; + pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); + + for_each_present_cpu(cpu) { + if (pnode != uv_cpu_to_pnode(cpu)) + continue; + /* for every cpu on this pnode: */ + bcp = &per_cpu(bau_control, cpu); + bcp->va_queue_first = pqp; + bcp->bau_msg_head = pqp; + bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); + } + /* + * need the pnode of where the memory was really allocated + */ + pa = uv_gpa(pqp); + pn = pa >> uv_nshift; + uv_write_global_mmr64(pnode, + UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, + ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | + uv_physnodeaddr(pqp)); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, + uv_physnodeaddr(pqp)); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, + (unsigned long) + uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); + /* in effect, all msg_type's are set to MSG_NOOP */ + memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); +} + +/* + * Initialization of each UV hub's structures + */ +static void __init uv_init_uvhub(int uvhub, int vector) +{ + int node; + int pnode; + unsigned long apicid; + + node = uvhub_to_first_node(uvhub); + pnode = uv_blade_to_pnode(uvhub); + uv_activation_descriptor_init(node, pnode); + uv_payload_queue_init(node, pnode); + /* + * the below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS + */ + apicid = uvhub_to_first_apicid(uvhub); + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, + ((apicid << 32) | vector)); +} + +/* + * We will set BAU_MISC_CONTROL with a timeout period. + * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. + * So the destination timeout period has be be calculated from them. + */ +static int +calculate_destination_timeout(void) +{ + unsigned long mmr_image; + int mult1; + int mult2; + int index; + int base; + int ret; + unsigned long ts_ns; + + mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; + mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); + mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; + base = timeout_base_ns[index]; + ts_ns = base * mult1 * mult2; + ret = ts_ns / 1000; + return ret; +} + +/* + * initialize the bau_control structure for each cpu + */ +static void __init uv_init_per_cpu(int nuvhubs) +{ + int i; + int cpu; + int pnode; + int uvhub; + int have_hmaster; + short socket = 0; + unsigned short socket_mask; + unsigned char *uvhub_mask; + struct bau_control *bcp; + struct uvhub_desc *bdp; + struct socket_desc *sdp; + struct bau_control *hmaster = NULL; + struct bau_control *smaster = NULL; + struct socket_desc { + short num_cpus; + short cpu_number[16]; + }; + struct uvhub_desc { + unsigned short socket_mask; + short num_cpus; + short uvhub; + short pnode; + struct socket_desc socket[2]; + }; + struct uvhub_desc *uvhub_descs; + + timeout_us = calculate_destination_timeout(); + + uvhub_descs = (struct uvhub_desc *) + kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + memset(bcp, 0, sizeof(struct bau_control)); + pnode = uv_cpu_hub_info(cpu)->pnode; + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); + bdp = &uvhub_descs[uvhub]; + bdp->num_cpus++; + bdp->uvhub = uvhub; + bdp->pnode = pnode; + /* kludge: 'assuming' one node per socket, and assuming that + disabling a socket just leaves a gap in node numbers */ + socket = (cpu_to_node(cpu) & 1); + bdp->socket_mask |= (1 << socket); + sdp = &bdp->socket[socket]; + sdp->cpu_number[sdp->num_cpus] = cpu; + sdp->num_cpus++; + } + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) + continue; + have_hmaster = 0; + bdp = &uvhub_descs[uvhub]; + socket_mask = bdp->socket_mask; + socket = 0; + while (socket_mask) { + if (!(socket_mask & 1)) + goto nextsocket; + sdp = &bdp->socket[socket]; + for (i = 0; i < sdp->num_cpus; i++) { + cpu = sdp->cpu_number[i]; + bcp = &per_cpu(bau_control, cpu); + bcp->cpu = cpu; + if (i == 0) { + smaster = bcp; + if (!have_hmaster) { + have_hmaster++; + hmaster = bcp; + } + } + bcp->cpus_in_uvhub = bdp->num_cpus; + bcp->cpus_in_socket = sdp->num_cpus; + bcp->socket_master = smaster; + bcp->uvhub = bdp->uvhub; + bcp->uvhub_master = hmaster; + bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> + blade_processor_id; + } +nextsocket: + socket++; + socket_mask = (socket_mask >> 1); + } + } + kfree(uvhub_descs); + kfree(uvhub_mask); + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->baudisabled = 0; + bcp->statp = &per_cpu(ptcstats, cpu); + /* time interval to catch a hardware stay-busy bug */ + bcp->timeout_interval = microsec_2_cycles(2*timeout_us); + bcp->max_bau_concurrent = max_bau_concurrent; + bcp->max_bau_concurrent_constant = max_bau_concurrent; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->congested_response_us = congested_response_us; + bcp->congested_reps = congested_reps; + bcp->congested_period = congested_period; + } +} + +/* + * Initialization of BAU-related structures + */ +static int __init uv_bau_init(void) +{ + int uvhub; + int pnode; + int nuvhubs; + int cur_cpu; + int vector; + unsigned long mmr; + + if (!is_uv_system()) + return 0; + + if (nobau) + return 0; + + for_each_possible_cpu(cur_cpu) + zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), + GFP_KERNEL, cpu_to_node(cur_cpu)); + + uv_nshift = uv_hub_info->m_val; + uv_mmask = (1UL << uv_hub_info->m_val) - 1; + nuvhubs = uv_num_possible_blades(); + spin_lock_init(&disable_lock); + congested_cycles = microsec_2_cycles(congested_response_us); + + uv_init_per_cpu(nuvhubs); + + uv_partition_base_pnode = 0x7fffffff; + for (uvhub = 0; uvhub < nuvhubs; uvhub++) + if (uv_blade_nr_possible_cpus(uvhub) && + (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) + uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + + vector = UV_BAU_MESSAGE; + for_each_possible_blade(uvhub) + if (uv_blade_nr_possible_cpus(uvhub)) + uv_init_uvhub(uvhub, vector); + + uv_enable_timeouts(); + alloc_intr_gate(vector, uv_bau_message_intr1); + + for_each_possible_blade(uvhub) { + if (uv_blade_nr_possible_cpus(uvhub)) { + pnode = uv_blade_to_pnode(uvhub); + /* INIT the bau */ + uv_write_global_mmr64(pnode, + UVH_LB_BAU_SB_ACTIVATION_CONTROL, + ((unsigned long)1 << 63)); + mmr = 1; /* should be 1 to broadcast to both sockets */ + uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, + mmr); + } + } + + return 0; +} +core_initcall(uv_bau_init); +fs_initcall(uv_ptc_init); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c new file mode 100644 index 00000000000..7b24460917d --- /dev/null +++ b/arch/x86/platform/uv/uv_irq.c @@ -0,0 +1,285 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include + +#include +#include +#include + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); + +static void uv_noop(struct irq_data *data) { } + +static void uv_ack_apic(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .irq_mask = uv_noop, + .irq_unmask = uv_noop, + .irq_eoi = uv_ack_apic, + .irq_set_affinity = uv_set_irq_affinity, +}; + +/* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int limit) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_cfg *cfg = get_irq_chip_data(irq); + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode, err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (limit == UV_AFFINITY_CPU) + irq_set_status_flags(irq, IRQ_NO_BALANCING); + else + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_cfg *cfg = data->chip_data; + unsigned int dest; + unsigned long mmr_value, mmr_offset; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode; + + if (__ioapic_set_affinity(data, mask, &dest)) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* + * Set up a mapping of an available irq and vector, and enable the specified + * MMR that defines the MSI that is to be sent to the specified CPU when an + * interrupt is raised. + */ +int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, + unsigned long mmr_offset, int limit) +{ + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); + + if (irq <= 0) + return -EBUSY; + + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + limit); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else + destroy_irq(irq); + + return ret; +} +EXPORT_SYMBOL_GPL(uv_setup_irq); + +/* + * Tear down a mapping of an irq and vector, and disable the specified MMR that + * defined the MSI that was to be sent to the specified CPU when an interrupt + * was raised. + * + * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). + */ +void uv_teardown_irq(unsigned int irq) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + destroy_irq(irq); +} +EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c new file mode 100644 index 00000000000..309c70fb775 --- /dev/null +++ b/arch/x86/platform/uv/uv_sysfs.c @@ -0,0 +1,76 @@ +/* + * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include +#include +#include + +struct kobject *sgi_uv_kobj; + +static ssize_t partition_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); +} + +static ssize_t coherence_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); +} + +static struct kobj_attribute partition_id_attr = + __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); + +static struct kobj_attribute coherence_id_attr = + __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); + + +static int __init sgi_uv_sysfs_init(void) +{ + unsigned long ret; + + if (!is_uv_system()) + return -ENODEV; + + if (!sgi_uv_kobj) + sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); + if (!sgi_uv_kobj) { + printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); + return -EINVAL; + } + + ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); + return ret; + } + + ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); + return ret; + } + + return 0; +} + +device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c new file mode 100644 index 00000000000..56e421bc379 --- /dev/null +++ b/arch/x86/platform/uv/uv_time.c @@ -0,0 +1,423 @@ +/* + * SGI RTC clock/timer routines. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Dimitri Sivanich + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +#define RTC_NAME "sgi_rtc" + +static cycle_t uv_read_rtc(struct clocksource *cs); +static int uv_rtc_next_event(unsigned long, struct clock_event_device *); +static void uv_rtc_timer_setup(enum clock_event_mode, + struct clock_event_device *); + +static struct clocksource clocksource_uv = { + .name = RTC_NAME, + .rating = 400, + .read = uv_read_rtc, + .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, + .shift = 10, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static struct clock_event_device clock_event_device_uv = { + .name = RTC_NAME, + .features = CLOCK_EVT_FEAT_ONESHOT, + .shift = 20, + .rating = 400, + .irq = -1, + .set_next_event = uv_rtc_next_event, + .set_mode = uv_rtc_timer_setup, + .event_handler = NULL, +}; + +static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); + +/* There is one of these allocated per node */ +struct uv_rtc_timer_head { + spinlock_t lock; + /* next cpu waiting for timer, local node relative: */ + int next_cpu; + /* number of cpus on this node: */ + int ncpus; + struct { + int lcpu; /* systemwide logical cpu number */ + u64 expires; /* next timer expiration for this cpu */ + } cpu[1]; +}; + +/* + * Access to uv_rtc_timer_head via blade id. + */ +static struct uv_rtc_timer_head **blade_info __read_mostly; + +static int uv_rtc_evt_enable; + +/* + * Hardware interface routines + */ + +/* Send IPIs to another node */ +static void uv_rtc_send_IPI(int cpu) +{ + unsigned long apicid, val; + int pnode; + + apicid = cpu_physical_id(cpu); + pnode = uv_apicid_to_pnode(apicid); + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); + + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); +} + +/* Check for an RTC interrupt pending */ +static int uv_intr_pending(int pnode) +{ + return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & + UVH_EVENT_OCCURRED0_RTC1_MASK; +} + +/* Setup interrupt and return non-zero if early expiration occurred. */ +static int uv_setup_intr(int cpu, u64 expires) +{ + u64 val; + int pnode = uv_cpu_to_pnode(cpu); + + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, + UVH_RTC1_INT_CONFIG_M_MASK); + uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); + + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, + UVH_EVENT_OCCURRED0_RTC1_MASK); + + val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | + ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); + + /* Set configuration */ + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); + /* Initialize comparator value */ + uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); + + if (uv_read_rtc(NULL) <= expires) + return 0; + + return !uv_intr_pending(pnode); +} + +/* + * Per-cpu timer tracking routines + */ + +static __init void uv_rtc_deallocate_timers(void) +{ + int bid; + + for_each_possible_blade(bid) { + kfree(blade_info[bid]); + } + kfree(blade_info); +} + +/* Allocate per-node list of cpu timer expiration times. */ +static __init int uv_rtc_allocate_timers(void) +{ + int cpu; + + blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); + if (!blade_info) + return -ENOMEM; + memset(blade_info, 0, uv_possible_blades * sizeof(void *)); + + for_each_present_cpu(cpu) { + int nid = cpu_to_node(cpu); + int bid = uv_cpu_to_blade_id(cpu); + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + struct uv_rtc_timer_head *head = blade_info[bid]; + + if (!head) { + head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + + (uv_blade_nr_possible_cpus(bid) * + 2 * sizeof(u64)), + GFP_KERNEL, nid); + if (!head) { + uv_rtc_deallocate_timers(); + return -ENOMEM; + } + spin_lock_init(&head->lock); + head->ncpus = uv_blade_nr_possible_cpus(bid); + head->next_cpu = -1; + blade_info[bid] = head; + } + + head->cpu[bcpu].lcpu = cpu; + head->cpu[bcpu].expires = ULLONG_MAX; + } + + return 0; +} + +/* Find and set the next expiring timer. */ +static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) +{ + u64 lowest = ULLONG_MAX; + int c, bcpu = -1; + + head->next_cpu = -1; + for (c = 0; c < head->ncpus; c++) { + u64 exp = head->cpu[c].expires; + if (exp < lowest) { + bcpu = c; + lowest = exp; + } + } + if (bcpu >= 0) { + head->next_cpu = bcpu; + c = head->cpu[bcpu].lcpu; + if (uv_setup_intr(c, lowest)) + /* If we didn't set it up in time, trigger */ + uv_rtc_send_IPI(c); + } else { + uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, + UVH_RTC1_INT_CONFIG_M_MASK); + } +} + +/* + * Set expiration time for current cpu. + * + * Returns 1 if we missed the expiration time. + */ +static int uv_rtc_set_timer(int cpu, u64 expires) +{ + int pnode = uv_cpu_to_pnode(cpu); + int bid = uv_cpu_to_blade_id(cpu); + struct uv_rtc_timer_head *head = blade_info[bid]; + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + u64 *t = &head->cpu[bcpu].expires; + unsigned long flags; + int next_cpu; + + spin_lock_irqsave(&head->lock, flags); + + next_cpu = head->next_cpu; + *t = expires; + + /* Will this one be next to go off? */ + if (next_cpu < 0 || bcpu == next_cpu || + expires < head->cpu[next_cpu].expires) { + head->next_cpu = bcpu; + if (uv_setup_intr(cpu, expires)) { + *t = ULLONG_MAX; + uv_rtc_find_next_timer(head, pnode); + spin_unlock_irqrestore(&head->lock, flags); + return -ETIME; + } + } + + spin_unlock_irqrestore(&head->lock, flags); + return 0; +} + +/* + * Unset expiration time for current cpu. + * + * Returns 1 if this timer was pending. + */ +static int uv_rtc_unset_timer(int cpu, int force) +{ + int pnode = uv_cpu_to_pnode(cpu); + int bid = uv_cpu_to_blade_id(cpu); + struct uv_rtc_timer_head *head = blade_info[bid]; + int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; + u64 *t = &head->cpu[bcpu].expires; + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&head->lock, flags); + + if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) + rc = 1; + + if (rc) { + *t = ULLONG_MAX; + /* Was the hardware setup for this timer? */ + if (head->next_cpu == bcpu) + uv_rtc_find_next_timer(head, pnode); + } + + spin_unlock_irqrestore(&head->lock, flags); + + return rc; +} + + +/* + * Kernel interface routines. + */ + +/* + * Read the RTC. + * + * Starting with HUB rev 2.0, the UV RTC register is replicated across all + * cachelines of it's own page. This allows faster simultaneous reads + * from a given socket. + */ +static cycle_t uv_read_rtc(struct clocksource *cs) +{ + unsigned long offset; + + if (uv_get_min_hub_revision_id() == 1) + offset = 0; + else + offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; + + return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); +} + +/* + * Program the next event, relative to now + */ +static int uv_rtc_next_event(unsigned long delta, + struct clock_event_device *ced) +{ + int ced_cpu = cpumask_first(ced->cpumask); + + return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); +} + +/* + * Setup the RTC timer in oneshot mode + */ +static void uv_rtc_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + int ced_cpu = cpumask_first(evt->cpumask); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here yet */ + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + uv_rtc_unset_timer(ced_cpu, 1); + break; + } +} + +static void uv_rtc_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); + + if (!ced || !ced->event_handler) + return; + + if (uv_rtc_unset_timer(cpu, 0) != 1) + return; + + ced->event_handler(ced); +} + +static int __init uv_enable_evt_rtc(char *str) +{ + uv_rtc_evt_enable = 1; + + return 1; +} +__setup("uvrtcevt", uv_enable_evt_rtc); + +static __init void uv_rtc_register_clockevents(struct work_struct *dummy) +{ + struct clock_event_device *ced = &__get_cpu_var(cpu_ced); + + *ced = clock_event_device_uv; + ced->cpumask = cpumask_of(smp_processor_id()); + clockevents_register_device(ced); +} + +static __init int uv_rtc_setup_clock(void) +{ + int rc; + + if (!is_uv_system()) + return -ENODEV; + + clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, + clocksource_uv.shift); + + /* If single blade, prefer tsc */ + if (uv_num_possible_blades() == 1) + clocksource_uv.rating = 250; + + rc = clocksource_register(&clocksource_uv); + if (rc) + printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); + else + printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", + sn_rtc_cycles_per_second/(unsigned long)1E6); + + if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) + return rc; + + /* Setup and register clockevents */ + rc = uv_rtc_allocate_timers(); + if (rc) + goto error; + + x86_platform_ipi_callback = uv_rtc_interrupt; + + clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, + NSEC_PER_SEC, clock_event_device_uv.shift); + + clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / + sn_rtc_cycles_per_second; + + clock_event_device_uv.max_delta_ns = clocksource_uv.mask * + (NSEC_PER_SEC / sn_rtc_cycles_per_second); + + rc = schedule_on_each_cpu(uv_rtc_register_clockevents); + if (rc) { + x86_platform_ipi_callback = NULL; + uv_rtc_deallocate_timers(); + goto error; + } + + printk(KERN_INFO "UV RTC clockevents registered\n"); + + return 0; + +error: + clocksource_unregister(&clocksource_uv); + printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); + + return rc; +} +arch_initcall(uv_rtc_setup_clock); -- cgit v1.2.3-70-g09d2 From 8654b1c2de1465120974899fc1c8aa00e91d4b7e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Oct 2010 11:28:42 +0200 Subject: x86: Move olpc to platform Signed-off-by: Thomas Gleixner Cc: Andres Salomon --- arch/x86/kernel/Makefile | 4 - arch/x86/kernel/olpc-xo1.c | 140 ------------------- arch/x86/kernel/olpc.c | 281 -------------------------------------- arch/x86/kernel/olpc_ofw.c | 112 --------------- arch/x86/platform/Makefile | 1 + arch/x86/platform/olpc/Makefile | 3 + arch/x86/platform/olpc/olpc-xo1.c | 140 +++++++++++++++++++ arch/x86/platform/olpc/olpc.c | 281 ++++++++++++++++++++++++++++++++++++++ arch/x86/platform/olpc/olpc_ofw.c | 112 +++++++++++++++ 9 files changed, 537 insertions(+), 537 deletions(-) delete mode 100644 arch/x86/kernel/olpc-xo1.c delete mode 100644 arch/x86/kernel/olpc.c delete mode 100644 arch/x86/kernel/olpc_ofw.c create mode 100644 arch/x86/platform/olpc/Makefile create mode 100644 arch/x86/platform/olpc/olpc-xo1.c create mode 100644 arch/x86/platform/olpc/olpc.c create mode 100644 arch/x86/platform/olpc/olpc_ofw.c (limited to 'arch/x86/kernel/Makefile') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 08e2e4bf839..9e13763b609 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -101,10 +101,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o -obj-$(CONFIG_OLPC) += olpc.o -obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o -obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o - microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c deleted file mode 100644 index f5442c03abc..00000000000 --- a/arch/x86/kernel/olpc-xo1.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Support for features of the OLPC XO-1 laptop - * - * Copyright (C) 2010 One Laptop per Child - * Copyright (C) 2006 Red Hat, Inc. - * Copyright (C) 2006 Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include - -#define DRV_NAME "olpc-xo1" - -#define PMS_BAR 4 -#define ACPI_BAR 5 - -/* PMC registers (PMS block) */ -#define PM_SCLK 0x10 -#define PM_IN_SLPCTL 0x20 -#define PM_WKXD 0x34 -#define PM_WKD 0x30 -#define PM_SSC 0x54 - -/* PM registers (ACPI block) */ -#define PM1_CNT 0x08 -#define PM_GPE0_STS 0x18 - -static unsigned long acpi_base; -static unsigned long pms_base; - -static void xo1_power_off(void) -{ - printk(KERN_INFO "OLPC XO-1 power off sequence...\n"); - - /* Enable all of these controls with 0 delay */ - outl(0x40000000, pms_base + PM_SCLK); - outl(0x40000000, pms_base + PM_IN_SLPCTL); - outl(0x40000000, pms_base + PM_WKXD); - outl(0x40000000, pms_base + PM_WKD); - - /* Clear status bits (possibly unnecessary) */ - outl(0x0002ffff, pms_base + PM_SSC); - outl(0xffffffff, acpi_base + PM_GPE0_STS); - - /* Write SLP_EN bit to start the machinery */ - outl(0x00002000, acpi_base + PM1_CNT); -} - -/* Read the base addresses from the PCI BAR info */ -static int __devinit setup_bases(struct pci_dev *pdev) -{ - int r; - - r = pci_enable_device_io(pdev); - if (r) { - dev_err(&pdev->dev, "can't enable device IO\n"); - return r; - } - - r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); - if (r) { - dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); - return r; - } - - r = pci_request_region(pdev, PMS_BAR, DRV_NAME); - if (r) { - dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); - pci_release_region(pdev, ACPI_BAR); - return r; - } - - acpi_base = pci_resource_start(pdev, ACPI_BAR); - pms_base = pci_resource_start(pdev, PMS_BAR); - - return 0; -} - -static int __devinit olpc_xo1_probe(struct platform_device *pdev) -{ - struct pci_dev *pcidev; - int r; - - pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, - NULL); - if (!pdev) - return -ENODEV; - - r = setup_bases(pcidev); - if (r) - return r; - - pm_power_off = xo1_power_off; - - printk(KERN_INFO "OLPC XO-1 support registered\n"); - return 0; -} - -static int __devexit olpc_xo1_remove(struct platform_device *pdev) -{ - pm_power_off = NULL; - return 0; -} - -static struct platform_driver olpc_xo1_driver = { - .driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - }, - .probe = olpc_xo1_probe, - .remove = __devexit_p(olpc_xo1_remove), -}; - -static int __init olpc_xo1_init(void) -{ - return platform_driver_register(&olpc_xo1_driver); -} - -static void __exit olpc_xo1_exit(void) -{ - platform_driver_unregister(&olpc_xo1_driver); -} - -MODULE_AUTHOR("Daniel Drake "); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:olpc-xo1"); - -module_init(olpc_xo1_init); -module_exit(olpc_xo1_exit); diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c deleted file mode 100644 index edaf3fe8dc5..00000000000 --- a/arch/x86/kernel/olpc.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Support for the OLPC DCON and OLPC EC access - * - * Copyright © 2006 Advanced Micro Devices, Inc. - * Copyright © 2007-2008 Andres Salomon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct olpc_platform_t olpc_platform_info; -EXPORT_SYMBOL_GPL(olpc_platform_info); - -static DEFINE_SPINLOCK(ec_lock); - -/* what the timeout *should* be (in ms) */ -#define EC_BASE_TIMEOUT 20 - -/* the timeout that bugs in the EC might force us to actually use */ -static int ec_timeout = EC_BASE_TIMEOUT; - -static int __init olpc_ec_timeout_set(char *str) -{ - if (get_option(&str, &ec_timeout) != 1) { - ec_timeout = EC_BASE_TIMEOUT; - printk(KERN_ERR "olpc-ec: invalid argument to " - "'olpc_ec_timeout=', ignoring!\n"); - } - printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n", - ec_timeout); - return 1; -} -__setup("olpc_ec_timeout=", olpc_ec_timeout_set); - -/* - * These {i,o}bf_status functions return whether the buffers are full or not. - */ - -static inline unsigned int ibf_status(unsigned int port) -{ - return !!(inb(port) & 0x02); -} - -static inline unsigned int obf_status(unsigned int port) -{ - return inb(port) & 0x01; -} - -#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d)) -static int __wait_on_ibf(unsigned int line, unsigned int port, int desired) -{ - unsigned int timeo; - int state = ibf_status(port); - - for (timeo = ec_timeout; state != desired && timeo; timeo--) { - mdelay(1); - state = ibf_status(port); - } - - if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && - timeo < (ec_timeout - EC_BASE_TIMEOUT)) { - printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n", - line, ec_timeout - timeo); - } - - return !(state == desired); -} - -#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d)) -static int __wait_on_obf(unsigned int line, unsigned int port, int desired) -{ - unsigned int timeo; - int state = obf_status(port); - - for (timeo = ec_timeout; state != desired && timeo; timeo--) { - mdelay(1); - state = obf_status(port); - } - - if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && - timeo < (ec_timeout - EC_BASE_TIMEOUT)) { - printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n", - line, ec_timeout - timeo); - } - - return !(state == desired); -} - -/* - * This allows the kernel to run Embedded Controller commands. The EC is - * documented at , and the - * available EC commands are here: - * . Unfortunately, while - * OpenFirmware's source is available, the EC's is not. - */ -int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, - unsigned char *outbuf, size_t outlen) -{ - unsigned long flags; - int ret = -EIO; - int i; - int restarts = 0; - - spin_lock_irqsave(&ec_lock, flags); - - /* Clear OBF */ - for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++) - inb(0x68); - if (i == 10) { - printk(KERN_ERR "olpc-ec: timeout while attempting to " - "clear OBF flag!\n"); - goto err; - } - - if (wait_on_ibf(0x6c, 0)) { - printk(KERN_ERR "olpc-ec: timeout waiting for EC to " - "quiesce!\n"); - goto err; - } - -restart: - /* - * Note that if we time out during any IBF checks, that's a failure; - * we have to return. There's no way for the kernel to clear that. - * - * If we time out during an OBF check, we can restart the command; - * reissuing it will clear the OBF flag, and we should be alright. - * The OBF flag will sometimes misbehave due to what we believe - * is a hardware quirk.. - */ - pr_devel("olpc-ec: running cmd 0x%x\n", cmd); - outb(cmd, 0x6c); - - if (wait_on_ibf(0x6c, 0)) { - printk(KERN_ERR "olpc-ec: timeout waiting for EC to read " - "command!\n"); - goto err; - } - - if (inbuf && inlen) { - /* write data to EC */ - for (i = 0; i < inlen; i++) { - if (wait_on_ibf(0x6c, 0)) { - printk(KERN_ERR "olpc-ec: timeout waiting for" - " EC accept data!\n"); - goto err; - } - pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); - outb(inbuf[i], 0x68); - } - } - if (outbuf && outlen) { - /* read data from EC */ - for (i = 0; i < outlen; i++) { - if (wait_on_obf(0x6c, 1)) { - printk(KERN_ERR "olpc-ec: timeout waiting for" - " EC to provide data!\n"); - if (restarts++ < 10) - goto restart; - goto err; - } - outbuf[i] = inb(0x68); - pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); - } - } - - ret = 0; -err: - spin_unlock_irqrestore(&ec_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(olpc_ec_cmd); - -static bool __init check_ofw_architecture(void) -{ - size_t propsize; - char olpc_arch[5]; - const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; - void *res[] = { &propsize }; - - if (olpc_ofw("getprop", args, res)) { - printk(KERN_ERR "ofw: getprop call failed!\n"); - return false; - } - return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; -} - -static u32 __init get_board_revision(void) -{ - size_t propsize; - __be32 rev; - const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; - void *res[] = { &propsize }; - - if (olpc_ofw("getprop", args, res) || propsize != 4) { - printk(KERN_ERR "ofw: getprop call failed!\n"); - return cpu_to_be32(0); - } - return be32_to_cpu(rev); -} - -static bool __init platform_detect(void) -{ - if (!check_ofw_architecture()) - return false; - olpc_platform_info.flags |= OLPC_F_PRESENT; - olpc_platform_info.boardrev = get_board_revision(); - return true; -} - -static int __init add_xo1_platform_devices(void) -{ - struct platform_device *pdev; - - pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - return 0; -} - -static int __init olpc_init(void) -{ - int r = 0; - - if (!olpc_ofw_present() || !platform_detect()) - return 0; - - spin_lock_init(&ec_lock); - - /* assume B1 and above models always have a DCON */ - if (olpc_board_at_least(olpc_board(0xb1))) - olpc_platform_info.flags |= OLPC_F_DCON; - - /* get the EC revision */ - olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, - (unsigned char *) &olpc_platform_info.ecver, 1); - -#ifdef CONFIG_PCI_OLPC - /* If the VSA exists let it emulate PCI, if not emulate in kernel. - * XO-1 only. */ - if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) && - !cs5535_has_vsa2()) - x86_init.pci.arch_init = pci_olpc_init; -#endif - - printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", - ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", - olpc_platform_info.boardrev >> 4, - olpc_platform_info.ecver); - - if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */ - r = add_xo1_platform_devices(); - if (r) - return r; - } - - return 0; -} - -postcore_initcall(olpc_init); diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c deleted file mode 100644 index 78732046437..00000000000 --- a/arch/x86/kernel/olpc_ofw.c +++ /dev/null @@ -1,112 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -/* address of OFW callback interface; will be NULL if OFW isn't found */ -static int (*olpc_ofw_cif)(int *); - -/* page dir entry containing OFW's pgdir table; filled in by head_32.S */ -u32 olpc_ofw_pgd __initdata; - -static DEFINE_SPINLOCK(ofw_lock); - -#define MAXARGS 10 - -void __init setup_olpc_ofw_pgd(void) -{ - pgd_t *base, *ofw_pde; - - if (!olpc_ofw_cif) - return; - - /* fetch OFW's PDE */ - base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); - if (!base) { - printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n"); - olpc_ofw_cif = NULL; - return; - } - ofw_pde = &base[OLPC_OFW_PDE_NR]; - - /* install OFW's PDE permanently into the kernel's pgtable */ - set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde); - /* implicit optimization barrier here due to uninline function return */ - - early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); -} - -int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, - void **res) -{ - int ofw_args[MAXARGS + 3]; - unsigned long flags; - int ret, i, *p; - - BUG_ON(nr_args + nr_res > MAXARGS); - - if (!olpc_ofw_cif) - return -EIO; - - ofw_args[0] = (int)name; - ofw_args[1] = nr_args; - ofw_args[2] = nr_res; - - p = &ofw_args[3]; - for (i = 0; i < nr_args; i++, p++) - *p = (int)args[i]; - - /* call into ofw */ - spin_lock_irqsave(&ofw_lock, flags); - ret = olpc_ofw_cif(ofw_args); - spin_unlock_irqrestore(&ofw_lock, flags); - - if (!ret) { - for (i = 0; i < nr_res; i++, p++) - *((int *)res[i]) = *p; - } - - return ret; -} -EXPORT_SYMBOL_GPL(__olpc_ofw); - -bool olpc_ofw_present(void) -{ - return olpc_ofw_cif != NULL; -} -EXPORT_SYMBOL_GPL(olpc_ofw_present); - -/* OFW cif _should_ be above this address */ -#define OFW_MIN 0xff000000 - -/* OFW starts on a 1MB boundary */ -#define OFW_BOUND (1<<20) - -void __init olpc_ofw_detect(void) -{ - struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header; - unsigned long start; - - /* ensure OFW booted us by checking for "OFW " string */ - if (hdr->ofw_magic != OLPC_OFW_SIG) - return; - - olpc_ofw_cif = (int (*)(int *))hdr->cif_handler; - - if ((unsigned long)olpc_ofw_cif < OFW_MIN) { - printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n", - (unsigned long)olpc_ofw_cif); - olpc_ofw_cif = NULL; - return; - } - - /* determine where OFW starts in memory */ - start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND); - printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n", - (unsigned long)olpc_ofw_cif, (-start) >> 20); - reserve_top_address(-start); -} diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 8519b01f1ac..7bf70b812fa 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -1,6 +1,7 @@ # Platform specific code goes here obj-y += efi/ obj-y += mrst/ +obj-y += olpc/ obj-y += scx200/ obj-y += sfi/ obj-y += visws/ diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile new file mode 100644 index 00000000000..c31b8fcb5a8 --- /dev/null +++ b/arch/x86/platform/olpc/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_OLPC) += olpc.o +obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o +obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c new file mode 100644 index 00000000000..f5442c03abc --- /dev/null +++ b/arch/x86/platform/olpc/olpc-xo1.c @@ -0,0 +1,140 @@ +/* + * Support for features of the OLPC XO-1 laptop + * + * Copyright (C) 2010 One Laptop per Child + * Copyright (C) 2006 Red Hat, Inc. + * Copyright (C) 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define DRV_NAME "olpc-xo1" + +#define PMS_BAR 4 +#define ACPI_BAR 5 + +/* PMC registers (PMS block) */ +#define PM_SCLK 0x10 +#define PM_IN_SLPCTL 0x20 +#define PM_WKXD 0x34 +#define PM_WKD 0x30 +#define PM_SSC 0x54 + +/* PM registers (ACPI block) */ +#define PM1_CNT 0x08 +#define PM_GPE0_STS 0x18 + +static unsigned long acpi_base; +static unsigned long pms_base; + +static void xo1_power_off(void) +{ + printk(KERN_INFO "OLPC XO-1 power off sequence...\n"); + + /* Enable all of these controls with 0 delay */ + outl(0x40000000, pms_base + PM_SCLK); + outl(0x40000000, pms_base + PM_IN_SLPCTL); + outl(0x40000000, pms_base + PM_WKXD); + outl(0x40000000, pms_base + PM_WKD); + + /* Clear status bits (possibly unnecessary) */ + outl(0x0002ffff, pms_base + PM_SSC); + outl(0xffffffff, acpi_base + PM_GPE0_STS); + + /* Write SLP_EN bit to start the machinery */ + outl(0x00002000, acpi_base + PM1_CNT); +} + +/* Read the base addresses from the PCI BAR info */ +static int __devinit setup_bases(struct pci_dev *pdev) +{ + int r; + + r = pci_enable_device_io(pdev); + if (r) { + dev_err(&pdev->dev, "can't enable device IO\n"); + return r; + } + + r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); + if (r) { + dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); + return r; + } + + r = pci_request_region(pdev, PMS_BAR, DRV_NAME); + if (r) { + dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); + pci_release_region(pdev, ACPI_BAR); + return r; + } + + acpi_base = pci_resource_start(pdev, ACPI_BAR); + pms_base = pci_resource_start(pdev, PMS_BAR); + + return 0; +} + +static int __devinit olpc_xo1_probe(struct platform_device *pdev) +{ + struct pci_dev *pcidev; + int r; + + pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, + NULL); + if (!pdev) + return -ENODEV; + + r = setup_bases(pcidev); + if (r) + return r; + + pm_power_off = xo1_power_off; + + printk(KERN_INFO "OLPC XO-1 support registered\n"); + return 0; +} + +static int __devexit olpc_xo1_remove(struct platform_device *pdev) +{ + pm_power_off = NULL; + return 0; +} + +static struct platform_driver olpc_xo1_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = olpc_xo1_probe, + .remove = __devexit_p(olpc_xo1_remove), +}; + +static int __init olpc_xo1_init(void) +{ + return platform_driver_register(&olpc_xo1_driver); +} + +static void __exit olpc_xo1_exit(void) +{ + platform_driver_unregister(&olpc_xo1_driver); +} + +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:olpc-xo1"); + +module_init(olpc_xo1_init); +module_exit(olpc_xo1_exit); diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c new file mode 100644 index 00000000000..edaf3fe8dc5 --- /dev/null +++ b/arch/x86/platform/olpc/olpc.c @@ -0,0 +1,281 @@ +/* + * Support for the OLPC DCON and OLPC EC access + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * Copyright © 2007-2008 Andres Salomon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct olpc_platform_t olpc_platform_info; +EXPORT_SYMBOL_GPL(olpc_platform_info); + +static DEFINE_SPINLOCK(ec_lock); + +/* what the timeout *should* be (in ms) */ +#define EC_BASE_TIMEOUT 20 + +/* the timeout that bugs in the EC might force us to actually use */ +static int ec_timeout = EC_BASE_TIMEOUT; + +static int __init olpc_ec_timeout_set(char *str) +{ + if (get_option(&str, &ec_timeout) != 1) { + ec_timeout = EC_BASE_TIMEOUT; + printk(KERN_ERR "olpc-ec: invalid argument to " + "'olpc_ec_timeout=', ignoring!\n"); + } + printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n", + ec_timeout); + return 1; +} +__setup("olpc_ec_timeout=", olpc_ec_timeout_set); + +/* + * These {i,o}bf_status functions return whether the buffers are full or not. + */ + +static inline unsigned int ibf_status(unsigned int port) +{ + return !!(inb(port) & 0x02); +} + +static inline unsigned int obf_status(unsigned int port) +{ + return inb(port) & 0x01; +} + +#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d)) +static int __wait_on_ibf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = ibf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = ibf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d)) +static int __wait_on_obf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = obf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = obf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +/* + * This allows the kernel to run Embedded Controller commands. The EC is + * documented at , and the + * available EC commands are here: + * . Unfortunately, while + * OpenFirmware's source is available, the EC's is not. + */ +int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen) +{ + unsigned long flags; + int ret = -EIO; + int i; + int restarts = 0; + + spin_lock_irqsave(&ec_lock, flags); + + /* Clear OBF */ + for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++) + inb(0x68); + if (i == 10) { + printk(KERN_ERR "olpc-ec: timeout while attempting to " + "clear OBF flag!\n"); + goto err; + } + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to " + "quiesce!\n"); + goto err; + } + +restart: + /* + * Note that if we time out during any IBF checks, that's a failure; + * we have to return. There's no way for the kernel to clear that. + * + * If we time out during an OBF check, we can restart the command; + * reissuing it will clear the OBF flag, and we should be alright. + * The OBF flag will sometimes misbehave due to what we believe + * is a hardware quirk.. + */ + pr_devel("olpc-ec: running cmd 0x%x\n", cmd); + outb(cmd, 0x6c); + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to read " + "command!\n"); + goto err; + } + + if (inbuf && inlen) { + /* write data to EC */ + for (i = 0; i < inlen; i++) { + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC accept data!\n"); + goto err; + } + pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); + outb(inbuf[i], 0x68); + } + } + if (outbuf && outlen) { + /* read data from EC */ + for (i = 0; i < outlen; i++) { + if (wait_on_obf(0x6c, 1)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC to provide data!\n"); + if (restarts++ < 10) + goto restart; + goto err; + } + outbuf[i] = inb(0x68); + pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); + } + } + + ret = 0; +err: + spin_unlock_irqrestore(&ec_lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_cmd); + +static bool __init check_ofw_architecture(void) +{ + size_t propsize; + char olpc_arch[5]; + const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; + void *res[] = { &propsize }; + + if (olpc_ofw("getprop", args, res)) { + printk(KERN_ERR "ofw: getprop call failed!\n"); + return false; + } + return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; +} + +static u32 __init get_board_revision(void) +{ + size_t propsize; + __be32 rev; + const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 }; + void *res[] = { &propsize }; + + if (olpc_ofw("getprop", args, res) || propsize != 4) { + printk(KERN_ERR "ofw: getprop call failed!\n"); + return cpu_to_be32(0); + } + return be32_to_cpu(rev); +} + +static bool __init platform_detect(void) +{ + if (!check_ofw_architecture()) + return false; + olpc_platform_info.flags |= OLPC_F_PRESENT; + olpc_platform_info.boardrev = get_board_revision(); + return true; +} + +static int __init add_xo1_platform_devices(void) +{ + struct platform_device *pdev; + + pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} + +static int __init olpc_init(void) +{ + int r = 0; + + if (!olpc_ofw_present() || !platform_detect()) + return 0; + + spin_lock_init(&ec_lock); + + /* assume B1 and above models always have a DCON */ + if (olpc_board_at_least(olpc_board(0xb1))) + olpc_platform_info.flags |= OLPC_F_DCON; + + /* get the EC revision */ + olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, + (unsigned char *) &olpc_platform_info.ecver, 1); + +#ifdef CONFIG_PCI_OLPC + /* If the VSA exists let it emulate PCI, if not emulate in kernel. + * XO-1 only. */ + if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) && + !cs5535_has_vsa2()) + x86_init.pci.arch_init = pci_olpc_init; +#endif + + printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4, + olpc_platform_info.ecver); + + if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */ + r = add_xo1_platform_devices(); + if (r) + return r; + } + + return 0; +} + +postcore_initcall(olpc_init); diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c new file mode 100644 index 00000000000..78732046437 --- /dev/null +++ b/arch/x86/platform/olpc/olpc_ofw.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* address of OFW callback interface; will be NULL if OFW isn't found */ +static int (*olpc_ofw_cif)(int *); + +/* page dir entry containing OFW's pgdir table; filled in by head_32.S */ +u32 olpc_ofw_pgd __initdata; + +static DEFINE_SPINLOCK(ofw_lock); + +#define MAXARGS 10 + +void __init setup_olpc_ofw_pgd(void) +{ + pgd_t *base, *ofw_pde; + + if (!olpc_ofw_cif) + return; + + /* fetch OFW's PDE */ + base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); + if (!base) { + printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n"); + olpc_ofw_cif = NULL; + return; + } + ofw_pde = &base[OLPC_OFW_PDE_NR]; + + /* install OFW's PDE permanently into the kernel's pgtable */ + set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde); + /* implicit optimization barrier here due to uninline function return */ + + early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD); +} + +int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, + void **res) +{ + int ofw_args[MAXARGS + 3]; + unsigned long flags; + int ret, i, *p; + + BUG_ON(nr_args + nr_res > MAXARGS); + + if (!olpc_ofw_cif) + return -EIO; + + ofw_args[0] = (int)name; + ofw_args[1] = nr_args; + ofw_args[2] = nr_res; + + p = &ofw_args[3]; + for (i = 0; i < nr_args; i++, p++) + *p = (int)args[i]; + + /* call into ofw */ + spin_lock_irqsave(&ofw_lock, flags); + ret = olpc_ofw_cif(ofw_args); + spin_unlock_irqrestore(&ofw_lock, flags); + + if (!ret) { + for (i = 0; i < nr_res; i++, p++) + *((int *)res[i]) = *p; + } + + return ret; +} +EXPORT_SYMBOL_GPL(__olpc_ofw); + +bool olpc_ofw_present(void) +{ + return olpc_ofw_cif != NULL; +} +EXPORT_SYMBOL_GPL(olpc_ofw_present); + +/* OFW cif _should_ be above this address */ +#define OFW_MIN 0xff000000 + +/* OFW starts on a 1MB boundary */ +#define OFW_BOUND (1<<20) + +void __init olpc_ofw_detect(void) +{ + struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header; + unsigned long start; + + /* ensure OFW booted us by checking for "OFW " string */ + if (hdr->ofw_magic != OLPC_OFW_SIG) + return; + + olpc_ofw_cif = (int (*)(int *))hdr->cif_handler; + + if ((unsigned long)olpc_ofw_cif < OFW_MIN) { + printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n", + (unsigned long)olpc_ofw_cif); + olpc_ofw_cif = NULL; + return; + } + + /* determine where OFW starts in memory */ + start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND); + printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n", + (unsigned long)olpc_ofw_cif, (-start) >> 20); + reserve_top_address(-start); +} -- cgit v1.2.3-70-g09d2