diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/acpi.c | 55 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 62 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 15 | ||||
-rw-r--r-- | arch/ia64/kernel/gate.lds.S | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/iosapic.c | 271 | ||||
-rw-r--r-- | arch/ia64/kernel/irq.c | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/ivt.S | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 51 | ||||
-rw-r--r-- | arch/ia64/kernel/machvec.c | 19 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 197 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 22 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.h | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv_asm.S | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/numa.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/patch.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 5 | ||||
-rw-r--r-- | arch/ia64/kernel/process.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 10 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 71 | ||||
-rw-r--r-- | arch/ia64/kernel/signal.c | 101 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 221 | ||||
-rw-r--r-- | arch/ia64/kernel/time.c | 9 | ||||
-rw-r--r-- | arch/ia64/kernel/topology.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/vmlinux.lds.S | 54 |
26 files changed, 693 insertions, 534 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 09a0dbc17fb..59e871dae74 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o + unwind.o mca.o mca_asm.o topology.o dmi_scan.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o @@ -30,6 +30,7 @@ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o +dmi_scan-y += ../../i386/kernel/dmi_scan.o # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bdc839..58c93a30348 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header, return 0; } +#ifdef CONFIG_HOTPLUG_CPU unsigned int can_cpei_retarget(void) { extern int cpe_vector; + extern unsigned int force_cpei_retarget; /* * Only if CPEI is supported and the override flag * is present, otherwise return that its re-targettable * if we are in polling mode. */ - if (cpe_vector > 0 && !acpi_cpei_override) - return 0; - else - return 1; + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; } unsigned int is_cpu_cpei_target(unsigned int cpu) @@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu) { acpi_cpei_phys_cpuid = cpu_physical_id(cpu); } +#endif unsigned int get_cpei_target_cpu(void) { @@ -414,6 +420,26 @@ int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; int __initdata nid_to_pxm_map[MAX_NUMNODES]; static struct acpi_table_slit __initdata *slit_table; +static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) +{ + int pxm; + + pxm = pa->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += pa->reserved[0] << 8; + return pxm; +} + +static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma) +{ + int pxm; + + pxm = ma->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += ma->reserved1[0] << 8; + return pxm; +} + /* * ACPI 2.0 SLIT (System Locality Information Table) * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf @@ -437,13 +463,20 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) void __init acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) { + int pxm; + + if (!pa->flags.enabled) + return; + + pxm = get_processor_proximity_domain(pa); + /* record this node in proximity bitmap */ - pxm_bit_set(pa->proximity_domain); + pxm_bit_set(pxm); node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + node_cpuid[srat_num_cpus].nid = pxm; srat_num_cpus++; } @@ -451,10 +484,10 @@ void __init acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) { unsigned long paddr, size; - u8 pxm; + int pxm; struct node_memblk_s *p, *q, *pend; - pxm = ma->proximity_domain; + pxm = get_memory_proximity_domain(ma); /* fill node memory chunk structure */ paddr = ma->base_addr_hi; @@ -618,9 +651,9 @@ unsigned long __init acpi_find_rsdp(void) { unsigned long rsdp_phys = 0; - if (efi.acpi20) - rsdp_phys = __pa(efi.acpi20); - else if (efi.acpi) + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + rsdp_phys = efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); return rsdp_phys; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 9990320b6f9..12cfedce73b 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -458,24 +458,33 @@ efi_init (void) printk(KERN_INFO "EFI v%u.%.02u by %s:", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < (int) efi.systab->nr_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = __va(config_tables[i].table); + efi.mps = config_tables[i].table; printk(" MPS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(" ACPI 2.0=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(" ACPI=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = __va(config_tables[i].table); + efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { - efi.sal_systab = __va(config_tables[i].table); + efi.sal_systab = config_tables[i].table; printk(" SALsystab=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = __va(config_tables[i].table); + efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx", config_tables[i].table); } } @@ -677,27 +686,34 @@ EXPORT_SYMBOL(efi_mem_attributes); /* * Determines whether the memory at phys_addr supports the desired * attribute (WB, UC, etc). If this returns 1, the caller can safely - * access *size bytes at phys_addr with the specified attribute. + * access size bytes at phys_addr with the specified attribute. */ -static int -efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr) +int +efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr) { + unsigned long end = phys_addr + size; efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - unsigned long md_end; - if (!md || (md->attribute & attr) != attr) + /* + * Some firmware doesn't report MMIO regions in the EFI memory + * map. The Intel BigSur (a.k.a. HP i2000) has this problem. + * On those platforms, we have to assume UC is valid everywhere. + */ + if (!md || (md->attribute & attr) != attr) { + if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio()) + return 1; return 0; + } do { - md_end = efi_md_end(md); - if (phys_addr + *size <= md_end) + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) return 1; md = efi_memory_descriptor(md_end); - if (!md || (md->attribute & attr) != attr) { - *size = md_end - phys_addr; - return 1; - } + if (!md || (md->attribute & attr) != attr) + return 0; } while (md); return 0; } @@ -708,7 +724,7 @@ efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr) * control access size. */ int -valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) +valid_phys_addr_range (unsigned long phys_addr, unsigned long size) { return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); } @@ -723,7 +739,7 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) * because that doesn't appear in the boot-time EFI memory map. */ int -valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size) +valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) { if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) return 1; @@ -731,14 +747,6 @@ valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size) if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) return 1; - /* - * Some firmware doesn't report MMIO regions in the EFI memory map. - * The Intel BigSur (a.k.a. HP i2000) has this problem. In this - * case, we can't use the EFI memory map to validate mmap requests. - */ - if (!efi_memmap_has_mmio()) - return 1; - return 0; } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfca6dd..750e8e7fbdc 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1102,9 +1102,6 @@ skip_rbs_switch: st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? -(p6) br.cond.sptk.few .sigdelayed - ;; tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? (p6) br.cond.sptk.few .notify #ifdef CONFIG_PREEMPT @@ -1131,17 +1128,6 @@ skip_rbs_switch: (pLvSys)br.cond.sptk.few .work_pending_syscall_end br.cond.sptk.many .work_processed_kernel // don't re-check -// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where -// it could not be delivered. Deliver it now. The signal might be for us and -// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed -// signal. - -.sigdelayed: - br.call.sptk.many rp=do_sigdelayed - cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - .work_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 @@ -1619,5 +1605,6 @@ sys_call_table: data8 sys_ni_syscall // reserved for pselect data8 sys_ni_syscall // 1295 reserved for ppoll data8 sys_unshare + data8 sys_splice .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S index e1e4aba9ecd..7c99e6ec3da 100644 --- a/arch/ia64/kernel/gate.lds.S +++ b/arch/ia64/kernel/gate.lds.S @@ -59,6 +59,7 @@ SECTIONS *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) *(__ex_table) + *(__mca_table) } } diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f343f..7956eb9058f 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -9,54 +9,65 @@ * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> * - * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. - * In particular, we now have separate handlers for edge - * and level triggered interrupts. - * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation - * PCI to vector mapping, shared PCI interrupts. - * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. - * Clean up much of the old IOSAPIC cruft. - * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for - * ACPI S5(SoftOff) support. + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O + * APIC code. In particular, we now have separate + * handlers for edge and level triggered + * interrupts. + * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector + * allocation PCI to vector mapping, shared PCI + * interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more + * understandable. Clean up much of the old + * IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts + * and fixes for ACPI S5(SoftOff) support. * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT - * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in - * iosapic_set_affinity(), initializations for - * /proc/irq/#/smp_affinity + * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt + * vectors in iosapic_set_affinity(), + * initializations for /proc/irq/#/smp_affinity * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq - * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping - * error + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to + * IOSAPIC mapping error * 02/07/29 T. Kochi Allocate interrupt vectors dynamically - * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.) - * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code. + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system + * interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's + * pci_irq code. * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. - * Remove iosapic_address & gsi_base from external interfaces. - * Rationalize __init/__devinit attributes. + * Remove iosapic_address & gsi_base from + * external interfaces. Rationalize + * __init/__devinit attributes. * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 - * Updated to work with irq migration necessary for CPU Hotplug + * Updated to work with irq migration necessary + * for CPU Hotplug */ /* - * Here is what the interrupt logic between a PCI device and the kernel looks like: + * Here is what the interrupt logic between a PCI device and the kernel looks + * like: * - * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The - * device is uniquely identified by its bus--, and slot-number (the function - * number does not matter here because all functions share the same interrupt - * lines). + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, + * INTD). The device is uniquely identified by its bus-, and slot-number + * (the function number does not matter here because all functions share + * the same interrupt lines). * - * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. - * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level - * triggered and use the same polarity). Each interrupt line has a unique Global - * System Interrupt (GSI) number which can be calculated as the sum of the controller's - * base GSI number and the IOSAPIC pin number to which the line connects. + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC + * controller. Multiple interrupt lines may have to share the same + * IOSAPIC pin (if they're level triggered and use the same polarity). + * Each interrupt line has a unique Global System Interrupt (GSI) number + * which can be calculated as the sum of the controller's base GSI number + * and the IOSAPIC pin number to which the line connects. * - * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin - * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU. + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the + * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then + * sent to the CPU. * - * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as - * architecture-independent interrupt handling mechanism in Linux. As an - * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number - * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and - * IRQ. A platform can implement platform_irq_to_vector(irq) and + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is + * used as architecture-independent interrupt handling mechanism in Linux. + * As an IRQ is a number, we have to have + * IA-64 interrupt vector number <-> IRQ number mapping. On smaller + * systems, we use one-to-one mapping between IA-64 vector and IRQ. A + * platform can implement platform_irq_to_vector(irq) and * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. * Please see also include/asm-ia64/hw_irq.h for those APIs. * @@ -64,9 +75,9 @@ * * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ * - * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts. - * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this - * source code. + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to + * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * (isa_irq) is the only exception in this source code. */ #include <linux/config.h> @@ -90,7 +101,6 @@ #include <asm/ptrace.h> #include <asm/system.h> - #undef DEBUG_INTERRUPT_ROUTING #ifdef DEBUG_INTERRUPT_ROUTING @@ -99,36 +109,46 @@ #define DBG(fmt...) #endif -#define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info)) +#define NR_PREALLOCATE_RTE_ENTRIES \ + (PAGE_SIZE / sizeof(struct iosapic_rte_info)) #define RTE_PREALLOCATED (1) static DEFINE_SPINLOCK(iosapic_lock); -/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ +/* + * These tables map IA-64 vectors to the IOSAPIC pin that generates this + * vector. + */ struct iosapic_rte_info { - struct list_head rte_list; /* node in list of RTEs sharing the same vector */ + struct list_head rte_list; /* node in list of RTEs sharing the + * same vector */ char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ char rte_index; /* IOSAPIC RTE index */ int refcnt; /* reference counter */ unsigned int flags; /* flags */ } ____cacheline_aligned; static struct iosapic_intr_info { - struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */ + struct list_head rtes; /* RTEs using this vector (empty => + * not an IOSAPIC interrupt) */ int count; /* # of RTEs that shares this vector */ - u32 low32; /* current value of low word of Redirection table entry */ + u32 low32; /* current value of low word of + * Redirection table entry */ unsigned int dest; /* destination CPU physical ID */ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ - unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity + * (see iosapic.h) */ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ } iosapic_intr_info[IA64_NUM_VECTORS]; static struct iosapic { char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ - unsigned short num_rte; /* number of RTE in this IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ #ifdef CONFIG_NUMA unsigned short node; /* numa node association via pxm */ @@ -149,7 +169,8 @@ find_iosapic (unsigned int gsi) int i; for (i = 0; i < NR_IOSAPICS; i++) { - if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte) + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < + iosapic_lists[i].num_rte) return i; } @@ -162,7 +183,8 @@ _gsi_to_vector (unsigned int gsi) struct iosapic_intr_info *info; struct iosapic_rte_info *rte; - for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info) list_for_each_entry(rte, &info->rtes, rte_list) if (rte->gsi_base + rte->rte_index == gsi) return info - iosapic_intr_info; @@ -185,8 +207,8 @@ gsi_to_irq (unsigned int gsi) unsigned long flags; int irq; /* - * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq - * numbers... + * XXX fix me: this assumes an identity mapping between IA-64 vector + * and Linux irq numbers... */ spin_lock_irqsave(&iosapic_lock, flags); { @@ -197,7 +219,8 @@ gsi_to_irq (unsigned int gsi) return irq; } -static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec) +static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, + unsigned int vec) { struct iosapic_rte_info *rte; @@ -237,7 +260,9 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) for (irq = 0; irq < NR_IRQS; ++irq) if (irq_to_vector(irq) == vector) { - set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); + set_irq_affinity_info(irq, + (int)(dest & 0xffff), + redir); break; } } @@ -259,7 +284,7 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) } static void -nop (unsigned int vector) +nop (unsigned int irq) { /* do nothing... */ } @@ -281,7 +306,8 @@ mask_irq (unsigned int irq) { /* set only the mask bit */ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); @@ -306,7 +332,8 @@ unmask_irq (unsigned int irq) spin_lock_irqsave(&iosapic_lock, flags); { low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); @@ -346,21 +373,25 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) spin_lock_irqsave(&iosapic_lock, flags); { - low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + low32 = iosapic_intr_info[vec].low32 & + ~(7 << IOSAPIC_DELIVERY_SHIFT); if (redir) /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + low32 |= (IOSAPIC_LOWEST_PRIORITY << + IOSAPIC_DELIVERY_SHIFT); else /* change delivery mode to fixed */ low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); iosapic_intr_info[vec].low32 = low32; iosapic_intr_info[vec].dest = dest; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { addr = rte->addr; rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), + high32); iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); } } @@ -433,7 +464,8 @@ iosapic_ack_edge_irq (unsigned int irq) * interrupt for real. This prevents IRQ storms from unhandled * devices. */ - if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) + if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == + (IRQ_PENDING|IRQ_DISABLED)) mask_irq(irq); } @@ -467,7 +499,8 @@ iosapic_version (char __iomem *addr) return iosapic_read(addr, IOSAPIC_VERSION); } -static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol) +static int iosapic_find_sharable_vector (unsigned long trigger, + unsigned long pol) { int i, vector = -1, min_count = -1; struct iosapic_intr_info *info; @@ -482,7 +515,8 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { info = &iosapic_intr_info[i]; if (info->trigger == trigger && info->polarity == pol && - (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) { + (info->dmode == IOSAPIC_FIXED || info->dmode == + IOSAPIC_LOWEST_PRIORITY)) { if (min_count == -1 || info->count < min_count) { vector = i; min_count = info->count; @@ -506,12 +540,15 @@ iosapic_reassign_vector (int vector) new_vector = assign_irq_vector(AUTO_ASSIGN); if (new_vector < 0) panic("%s: out of interrupt vectors!\n", __FUNCTION__); - printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); + printk(KERN_INFO "Reassigning vector %d to %d\n", + vector, new_vector); memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], sizeof(struct iosapic_intr_info)); INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); - list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes); - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + list_move(iosapic_intr_info[vector].rtes.next, + &iosapic_intr_info[new_vector].rtes); + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); iosapic_intr_info[vector].low32 = IOSAPIC_MASK; INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } @@ -524,7 +561,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void) int preallocated = 0; if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { - rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES); + rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * + NR_PREALLOCATE_RTE_ENTRIES); if (!rte) return NULL; for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) @@ -532,7 +570,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void) } if (!list_empty(&free_rte_list)) { - rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list); + rte = list_entry(free_rte_list.next, struct iosapic_rte_info, + rte_list); list_del(&rte->rte_list); preallocated++; } else { @@ -575,7 +614,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, index = find_iosapic(gsi); if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi); + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __FUNCTION__, gsi); return -ENODEV; } @@ -586,7 +626,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, if (!rte) { rte = iosapic_alloc_rte(); if (!rte) { - printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__); + printk(KERN_WARNING "%s: cannot allocate memory\n", + __FUNCTION__); return -ENOMEM; } @@ -602,7 +643,9 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, else if (vector_is_shared(vector)) { struct iosapic_intr_info *info = &iosapic_intr_info[vector]; if (info->trigger != trigger || info->polarity != polarity) { - printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__); + printk (KERN_WARNING + "%s: cannot override the interrupt\n", + __FUNCTION__); return -EINVAL; } } @@ -619,8 +662,10 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, idesc = irq_descp(vector); if (idesc->handler != irq_type) { if (idesc->handler != &no_irq_type) - printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", - __FUNCTION__, vector, idesc->handler->typename, irq_type->typename); + printk(KERN_WARNING + "%s: changing vector %d from %s to %s\n", + __FUNCTION__, vector, + idesc->handler->typename, irq_type->typename); idesc->handler = irq_type; } return 0; @@ -631,6 +676,7 @@ get_target_cpu (unsigned int gsi, int vector) { #ifdef CONFIG_SMP static int cpu = -1; + extern int cpe_vector; /* * In case of vector shared by multiple RTEs, all RTEs that @@ -653,6 +699,11 @@ get_target_cpu (unsigned int gsi, int vector) if (!cpu_online(smp_processor_id())) return cpu_physical_id(smp_processor_id()); +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif + #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; @@ -675,7 +726,7 @@ get_target_cpu (unsigned int gsi, int vector) if (!num_cpus) goto skip_numa_setup; - /* Use vector assigment to distribute across cpus in node */ + /* Use vector assignment to distribute across cpus in node */ cpu_index = vector % num_cpus; for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) @@ -697,7 +748,7 @@ skip_numa_setup: } while (!cpu_online(cpu)); return cpu_physical_id(cpu); -#else +#else /* CONFIG_SMP */ return cpu_physical_id(smp_processor_id()); #endif } @@ -749,7 +800,8 @@ again: if (list_empty(&iosapic_intr_info[vector].rtes)) free_irq_vector(vector); spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); + spin_unlock_irqrestore(&irq_descp(vector)->lock, + flags); goto again; } @@ -758,7 +810,8 @@ again: polarity, trigger); if (err < 0) { spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_descp(vector)->lock, flags); + spin_unlock_irqrestore(&irq_descp(vector)->lock, + flags); return err; } @@ -800,7 +853,8 @@ iosapic_unregister_intr (unsigned int gsi) */ irq = gsi_to_irq(gsi); if (irq < 0) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); return; } @@ -811,7 +865,9 @@ iosapic_unregister_intr (unsigned int gsi) spin_lock(&iosapic_lock); { if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + printk(KERN_ERR + "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); goto out; } @@ -821,7 +877,8 @@ iosapic_unregister_intr (unsigned int gsi) /* Mask the interrupt */ low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; - iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32); + iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), + low32); /* Remove the rte entry from the list */ list_del(&rte->rte_list); @@ -834,7 +891,9 @@ iosapic_unregister_intr (unsigned int gsi) trigger = iosapic_intr_info[vector].trigger; polarity = iosapic_intr_info[vector].polarity; dest = iosapic_intr_info[vector].dest; - printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d unregistered\n", gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, vector); @@ -847,12 +906,15 @@ iosapic_unregister_intr (unsigned int gsi) idesc->handler = &no_irq_type; /* Clear the interrupt information */ - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); if (idesc->action) { - printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq); + printk(KERN_ERR + "interrupt handlers still exist on" + "IRQ %u\n", irq); WARN_ON(1); } @@ -867,7 +929,6 @@ iosapic_unregister_intr (unsigned int gsi) /* * ACPI calls this when it finds an entry for a platform interrupt. - * Note that the irq_base and IOSAPIC address must be set in iosapic_init(). */ int __init iosapic_register_platform_intr (u32 int_type, unsigned int gsi, @@ -901,13 +962,16 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, mask = 1; break; default: - printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type); + printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__, + int_type); return -1; } register_intr(gsi, vector, delivery, polarity, trigger); - printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + printk(KERN_INFO + "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d\n", int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), @@ -917,10 +981,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, return vector; } - /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. - * Note that the gsi_base and IOSAPIC address must be set in iosapic_init(). */ void __init iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, @@ -949,16 +1011,19 @@ iosapic_system_init (int system_pcat_compat) for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { iosapic_intr_info[vector].low32 = IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */ + /* mark as unused */ + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } pcat_compat = system_pcat_compat; if (pcat_compat) { /* - * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support - * enabled. + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. */ - printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__); + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __FUNCTION__); outb(0xff, 0xA1); outb(0xff, 0x21); } @@ -998,10 +1063,7 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) base = iosapic_lists[index].gsi_base; end = base + iosapic_lists[index].num_rte - 1; - if (gsi_base < base && gsi_end < base) - continue;/* OK */ - - if (gsi_base > end && gsi_end > end) + if (gsi_end < base || end < gsi_base) continue; /* OK */ return -EBUSY; @@ -1047,12 +1109,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) if ((gsi_base == 0) && pcat_compat) { /* - * Map the legacy ISA devices into the IOSAPIC data. Some of these may - * get reprogrammed later on with data from the ACPI Interrupt Source - * Override table. + * Map the legacy ISA devices into the IOSAPIC data. Some of + * these may get reprogrammed later on with data from the ACPI + * Interrupt Source Override table. */ for (isa_irq = 0; isa_irq < 16; ++isa_irq) - iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); + iosapic_override_isa_irq(isa_irq, isa_irq, + IOSAPIC_POL_HIGH, + IOSAPIC_EDGE); } return 0; } @@ -1075,7 +1139,8 @@ iosapic_remove (unsigned int gsi_base) if (iosapic_lists[index].rtes_inuse) { err = -EBUSY; - printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", + printk(KERN_WARNING + "%s: IOSAPIC for GSI base %u is busy\n", __FUNCTION__, gsi_base); goto out; } diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c3275..5ce908ef9c9 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -163,8 +163,19 @@ void fixup_irqs(void) { unsigned int irq; extern void ia64_process_pending_intr(void); + extern void ia64_disable_timer(void); + extern volatile int time_keeper_id; + + ia64_disable_timer(); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = first_cpu(cpu_online_map); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } - ia64_set_itv(1<<16); /* * Phase 1: Locate irq's bound to this cpu and * relocate them for cpu removal. diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index dcd906fe574..829a43cab79 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -865,6 +865,7 @@ ENTRY(interrupt) ;; SAVE_REST ;; + MCA_RECOVER_RANGE(interrupt) alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group mov out0=cr.ivr // pass cr.ivr as first arg add out1=16,sp // pass pointer to pt_regs as second arg diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 50ae8c7d453..789881ca83d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -34,6 +34,7 @@ #include <asm/pgtable.h> #include <asm/kdebug.h> #include <asm/sections.h> +#include <asm/uaccess.h> extern void jprobe_inst_return(void); @@ -722,13 +723,50 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs); - reset_current_kprobe(); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the instruction pointer points back to + * the probe address and allow the page fault handler + * to continue as a normal page fault. + */ + regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL; + ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * Let ia64_do_page_fault() fix it. + */ + break; + default: + break; } return 0; @@ -740,6 +778,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch(val) { case DIE_BREAK: /* err is break number from ia64_bad_break() */ diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index c3a04ee7f4f..4b0b71d5aef 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -14,7 +14,15 @@ struct ia64_machine_vector ia64_mv; EXPORT_SYMBOL(ia64_mv); -static struct ia64_machine_vector * +static __initdata const char *mvec_name; +static __init int setup_mvec(char *s) +{ + mvec_name = s; + return 0; +} +early_param("machvec", setup_mvec); + +static struct ia64_machine_vector * __init lookup_machvec (const char *name) { extern struct ia64_machine_vector machvec_start[]; @@ -33,10 +41,13 @@ machvec_init (const char *name) { struct ia64_machine_vector *mv; + if (!name) + name = mvec_name ? mvec_name : acpi_get_sysname(); mv = lookup_machvec(name); - if (!mv) { - panic("generic kernel failed to find machine vector for platform %s!", name); - } + if (!mv) + panic("generic kernel failed to find machine vector for" + " platform %s!", name); + ia64_mv = *mv; printk(KERN_INFO "booting generic kernel on platform %s\n", name); } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9ee57..8963171788d 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -69,6 +69,7 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/workqueue.h> +#include <linux/cpumask.h> #include <asm/delay.h> #include <asm/kdebug.h> @@ -83,6 +84,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> +#include "mca_drv.h" #include "entry.h" #if defined(IA64_MCA_DEBUG_INFO) @@ -133,7 +135,7 @@ static int cpe_poll_enabled = 1; extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); -static int mca_init; +static int mca_init __initdata; static void inline @@ -184,7 +186,7 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) * Outputs : None */ -static void +static void __init ia64_log_init(int sal_info_type) { u64 max_size = 0; @@ -282,13 +284,53 @@ ia64_mca_log_sal_error_record(int sal_info_type) } /* - * platform dependent error handling + * search_mca_table + * See if the MCA surfaced in an instruction range + * that has been tagged as recoverable. + * + * Inputs + * first First address range to check + * last Last address range to check + * ip Instruction pointer, address we are looking for + * + * Return value: + * 1 on Success (in the table)/ 0 on Failure (not in the table) */ -#ifndef PLATFORM_MCA_HANDLERS +int +search_mca_table (const struct mca_table_entry *first, + const struct mca_table_entry *last, + unsigned long ip) +{ + const struct mca_table_entry *curr; + u64 curr_start, curr_end; + + curr = first; + while (curr <= last) { + curr_start = (u64) &curr->start_addr + curr->start_addr; + curr_end = (u64) &curr->end_addr + curr->end_addr; + + if ((ip >= curr_start) && (ip <= curr_end)) { + return 1; + } + curr++; + } + return 0; +} + +/* Given an address, look for it in the mca tables. */ +int mca_recover_range(unsigned long addr) +{ + extern struct mca_table_entry __start___mca_table[]; + extern struct mca_table_entry __stop___mca_table[]; + + return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); +} +EXPORT_SYMBOL_GPL(mca_recover_range); #ifdef CONFIG_ACPI int cpe_vector = -1; +int ia64_cpe_irq = -1; static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) @@ -359,7 +401,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) * Outputs * None */ -static void +static void __init ia64_mca_register_cpev (int cpev) { /* Register the CPE interrupt vector with SAL */ @@ -377,8 +419,6 @@ ia64_mca_register_cpev (int cpev) } #endif /* CONFIG_ACPI */ -#endif /* PLATFORM_MCA_HANDLERS */ - /* * ia64_mca_cmc_vector_setup * @@ -392,7 +432,7 @@ ia64_mca_register_cpev (int cpev) * Outputs * None */ -void +void __cpuinit ia64_mca_cmc_vector_setup (void) { cmcv_reg_t cmcv; @@ -630,6 +670,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) *tnat |= (nat << tslot); } +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const task_t *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -648,7 +714,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, struct ia64_sal_os_state *sos, const char *type) { - char *p, comm[sizeof(current->comm)]; + char *p; ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; @@ -721,54 +787,43 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, /* Verify the previous stack state before we change it */ if (user_mode(regs)) { msg = "occurred in user space"; - goto no_mod; - } - if (r13 != sos->prev_IA64_KR_CURRENT) { - msg = "inconsistent previous current and r13"; - goto no_mod; - } - if ((r12 - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent r12 and r13"; - goto no_mod; - } - if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bspstore and r13"; - goto no_mod; - } - va.p = old_bspstore; - if (va.f.reg < 5) { - msg = "old_bspstore is in the wrong region"; - goto no_mod; - } - if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bsp and r13"; - goto no_mod; - } - size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; - if (ar_bspstore + size > r12) { - msg = "no room for blocked state"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); goto no_mod; } - /* Change the comm field on the MCA/INT task to include the pid that - * was interrupted, it makes for easier debugging. If that pid was 0 - * (swapper or nested MCA/INIT) then use the start of the previous comm - * field suffixed with its cpu. - */ - if (previous_current->pid) - snprintf(comm, sizeof(comm), "%s %d", - current->comm, previous_current->pid); - else { - int l; - if ((p = strchr(previous_current->comm, ' '))) - l = p - previous_current->comm; - else - l = strlen(previous_current->comm); - snprintf(comm, sizeof(comm), "%s %*s %d", - current->comm, l, previous_current->comm, - task_thread_info(previous_current)->cpu); + if (!mca_recover_range(ms->pmsa_iip)) { + if (r13 != sos->prev_IA64_KR_CURRENT) { + msg = "inconsistent previous current and r13"; + goto no_mod; + } + if ((r12 - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent r12 and r13"; + goto no_mod; + } + if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bspstore and r13"; + goto no_mod; + } + va.p = old_bspstore; + if (va.f.reg < 5) { + msg = "old_bspstore is in the wrong region"; + goto no_mod; + } + if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bsp and r13"; + goto no_mod; + } + size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; + if (ar_bspstore + size > r12) { + msg = "no room for blocked state"; + goto no_mod; + } } - memcpy(current->comm, comm, sizeof(current->comm)); + + ia64_mca_modify_comm(previous_current); /* Make the original task look blocked. First stack a struct pt_regs, * describing the state at the time of interrupt. mca_asm.S built a @@ -908,7 +963,7 @@ no_mod: static void ia64_wait_for_slaves(int monarch) { - int c, wait = 0; + int c, wait = 0, missing = 0; for_each_online_cpu(c) { if (c == monarch) continue; @@ -919,15 +974,32 @@ ia64_wait_for_slaves(int monarch) } } if (!wait) - return; + goto all_in; for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + missing = 1; break; } } + if (!missing) + goto all_in; + printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + printk(" %d", c); + } + printk("\n"); + return; + +all_in: + printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); + return; } /* @@ -953,6 +1025,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, task_t *previous_current; oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ + console_loglevel = 15; /* make sure printks make it to console */ + printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) @@ -1416,7 +1492,7 @@ static struct irqaction mca_cpep_irqaction = { * format most of the fields. */ -static void +static void __cpuinit format_mca_init_stack(void *mca_data, unsigned long offset, const char *type, int cpu) { @@ -1430,7 +1506,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->cpu = cpu; p->thread_info = ti; p->state = TASK_UNINTERRUPTIBLE; - __set_bit(cpu, &p->cpus_allowed); + cpu_set(cpu, p->cpus_allowed); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); @@ -1440,15 +1516,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset, /* Do per-CPU MCA-related initialization. */ -void __devinit +void __cpuinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + static int first_time = 1; - if (smp_processor_id() == 0) { + if (first_time) { void *mca_data; int cpu; + first_time = 0; mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + KERNEL_STACK_SIZE); mca_data = (void *)(((unsigned long)mca_data + @@ -1704,6 +1782,7 @@ ia64_mca_late_init(void) desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; setup_irq(irq, &mca_cpe_irqaction); + ia64_cpe_irq = irq; } ia64_mca_register_cpev(cpe_vector); IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index e883d85906d..37c88eb5587 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -6,6 +6,7 @@ * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) * Copyright (C) 2005 Silicon Graphics, Inc * Copyright (C) 2005 Keith Owens <kaos@sgi.com> + * Copyright (C) 2006 Russ Anderson <rja@sgi.com> */ #include <linux/config.h> #include <linux/types.h> @@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr) */ void -mca_handler_bh(unsigned long paddr) +mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { - printk(KERN_ERR - "OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n", - current->pid, current->comm, paddr); + printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " + "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", + raw_smp_processor_id(), current->pid, current->uid, + iip, ipsr, paddr, current->comm); spin_lock(&mca_bh_lock); switch (mca_page_isolate(paddr)) { @@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx, if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) return 0; psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); /* * Check the privilege level of interrupted context. * If it is user-mode, then terminate affected process. */ - if (psr1->cpl != 0) { + + pmsa = sos->pal_min_state; + if (psr1->cpl != 0 || + ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { smei = peidx_bus_check(peidx, 0); if (smei->valid.target_identifier) { /* * setup for resume to bottom half of MCA, * "mca_handler_bhhook" */ - pmsa = sos->pal_min_state; - /* pass to bhhook as 1st argument (gr8) */ + /* pass to bhhook as argument (gr8, ...) */ pmsa->pmsa_gr[8-1] = smei->target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; /* set interrupted return address (but no use) */ pmsa->pmsa_br0 = pmsa->pmsa_iip; /* change resume address to bottom half */ @@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx, psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; psr2->cpl = 0; psr2->ri = 0; + psr2->bn = 1; psr2->i = 0; return 1; diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index e2f6fa1e0ef..31a2e52bb16 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h @@ -111,3 +111,10 @@ typedef struct slidx_table { slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ __count; }) +struct mca_table_entry { + int start_addr; /* location-relative starting address of MCA recoverable range */ + int end_addr; /* location-relative ending address of MCA recoverable range */ +}; + +extern const struct mca_table_entry *search_mca_tables (unsigned long addr); +extern int mca_recover_range(unsigned long); diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S index 3f298ee4d00..e6a580d354b 100644 --- a/arch/ia64/kernel/mca_drv_asm.S +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -14,15 +14,12 @@ GLOBAL_ENTRY(mca_handler_bhhook) invala // clear RSE ? - ;; cover ;; clrrrb ;; - alloc r16=ar.pfs,0,2,1,0 // make a new frame - ;; + alloc r16=ar.pfs,0,2,3,0 // make a new frame mov ar.rsc=0 - ;; mov r13=IA64_KR(CURRENT) // current task pointer ;; mov r2=r13 @@ -30,7 +27,6 @@ GLOBAL_ENTRY(mca_handler_bhhook) addl r22=IA64_RBS_OFFSET,r2 ;; mov ar.bspstore=r22 - ;; addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 ;; adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 @@ -40,12 +36,12 @@ GLOBAL_ENTRY(mca_handler_bhhook) movl loc1=mca_handler_bh // recovery C function ;; mov out0=r8 // poisoned address + mov out1=r9 // iip + mov out2=r10 // psr mov b6=loc1 ;; mov loc1=rp - ;; - ssm psr.i - ;; + ssm psr.i | psr.ic br.call.sptk.many rp=b6 // does not return ... ;; mov ar.pfs=loc0 @@ -53,5 +49,4 @@ GLOBAL_ENTRY(mca_handler_bhhook) ;; mov r8=r0 br.ret.sptk.many rp - ;; END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index a68ce667809..0766493d4d0 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -25,7 +25,7 @@ #include <asm/processor.h> #include <asm/smp.h> -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index 6a4ac7d70b3..bc11bb096f5 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end) ia64_srlz_i(); } -void +void __init ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) { static int first_time = 1; @@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) ia64_srlz_i(); } -static void +static void __init patch_fsyscall_table (unsigned long start, unsigned long end) { extern unsigned long fsyscall_table[NR_syscalls]; @@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end) ia64_srlz_i(); } -static void +static void __init patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) { extern char fsys_bubble_down[]; @@ -184,7 +184,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) ia64_srlz_i(); } -void +void __init ia64_patch_gate (void) { # define START(name) ((unsigned long) __start_gate_##name##_patchlist) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b385d..077f21216b6 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6722,6 +6722,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6734,8 +6735,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 309d59658e5..355d57970ba 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -30,7 +30,6 @@ #include <linux/efi.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/kprobes.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -738,13 +737,6 @@ void exit_thread (void) { - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(current); - ia64_drop_fpu(current); #ifdef CONFIG_PERFMON /* if needed, stop monitoring and flush state to perfmon context */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index eaed14aac6a..9887c8787e7 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1656,8 +1656,14 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8); + if (unlikely(current->audit_context)) { + int success = AUDITSC_RESULT(regs.r10); + long result = regs.r8; + + if (success != AUDITSC_SUCCESS) + result = -result; + audit_syscall_exit(current, success, result); + } if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 3258e09278d..e4dfda1eb7d 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -37,11 +37,11 @@ #include <linux/string.h> #include <linux/threads.h> #include <linux/tty.h> +#include <linux/dmi.h> #include <linux/serial.h> #include <linux/serial_core.h> #include <linux/efi.h> #include <linux/initrd.h> -#include <linux/platform.h> #include <linux/pm.h> #include <linux/cpufreq.h> @@ -131,8 +131,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask); /* * We use a special marker for the end of memory and it uses the extra (+1) slot */ -struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; -int num_rsvd_regions; +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; +int num_rsvd_regions __initdata; /* @@ -141,7 +141,7 @@ int num_rsvd_regions; * caller-specified function is called with the memory ranges that remain after filtering. * This routine does not assume the incoming segments are sorted. */ -int +int __init filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) { unsigned long range_start, range_end, prev_start; @@ -177,7 +177,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) return 0; } -static void +static void __init sort_regions (struct rsvd_region *rsvd_region, int max) { int j; @@ -218,7 +218,7 @@ __initcall(register_memory); * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, * see include/asm-ia64/meminit.h if you need to define more. */ -void +void __init reserve_memory (void) { int n = 0; @@ -270,7 +270,7 @@ reserve_memory (void) * Grab the initrd start and end from the boot parameter struct given us by * the boot loader. */ -void +void __init find_initrd (void) { #ifdef CONFIG_BLK_DEV_INITRD @@ -362,7 +362,7 @@ mark_bsp_online (void) } #ifdef CONFIG_SMP -static void +static void __init check_for_logical_procs (void) { pal_logical_to_physical_t info; @@ -389,6 +389,14 @@ check_for_logical_procs (void) } #endif +static __initdata int nomca; +static __init int setup_nomca(char *s) +{ + nomca = 1; + return 0; +} +early_param("nomca", setup_nomca); + void __init setup_arch (char **cmdline_p) { @@ -402,35 +410,15 @@ setup_arch (char **cmdline_p) efi_init(); io_port_init(); + parse_early_param(); + #ifdef CONFIG_IA64_GENERIC - { - const char *mvec_name = strstr (*cmdline_p, "machvec="); - char str[64]; - - if (mvec_name) { - const char *end; - size_t len; - - mvec_name += 8; - end = strchr (mvec_name, ' '); - if (end) - len = end - mvec_name; - else - len = strlen (mvec_name); - len = min(len, sizeof (str) - 1); - strncpy (str, mvec_name, len); - str[len] = '\0'; - mvec_name = str; - } else - mvec_name = acpi_get_sysname(); - machvec_init(mvec_name); - } + machvec_init(NULL); #endif if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); - parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -446,7 +434,7 @@ setup_arch (char **cmdline_p) find_memory(); /* process SAL system table: */ - ia64_sal_init(efi.sal_systab); + ia64_sal_init(__va(efi.sal_systab)); ia64_setup_printk_clock(); @@ -493,7 +481,7 @@ setup_arch (char **cmdline_p) #endif /* enable IA-64 Machine Check Abort Handling unless disabled */ - if (!strstr(saved_command_line, "nomca")) + if (!nomca) ia64_mca_init(); platform_setup(cmdline_p); @@ -623,7 +611,7 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; -void +static void __cpuinit identify_cpu (struct cpuinfo_ia64 *c) { union { @@ -700,7 +688,7 @@ setup_per_cpu_areas (void) * In addition, the minimum of the i-cache stride sizes is calculated for * "flush_icache_range()". */ -static void +static void __cpuinit get_max_cacheline_size (void) { unsigned long line_size, max = 1; @@ -763,10 +751,10 @@ get_max_cacheline_size (void) * cpu_init() initializes state that is per-CPU. This function acts * as a 'CPU state barrier', nothing should get across. */ -void +void __cpuinit cpu_init (void) { - extern void __devinit ia64_mmu_init (void *); + extern void __cpuinit ia64_mmu_init (void *); unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; unsigned int max_ctx; @@ -894,9 +882,16 @@ void sched_cacheflush(void) ia64_sal_cache_flush(3); } -void +void __init check_bugs (void) { ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, (unsigned long) __end___mckinley_e9_bundles); } + +static int __init run_dmi_scan(void) +{ + dmi_scan_machine(); + return 0; +} +core_initcall(run_dmi_scan); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb44d0..1d7903ee212 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) } return 0; } - -/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it - * could not be delivered. It is important that the target process is not - * allowed to do any more work in user space. Possible cases for the target - * process: - * - * - It is sleeping and will wake up soon. Store the data in the current task, - * the signal will be sent when the current task returns from the next - * interrupt. - * - * - It is running in user context. Store the data in the current task, the - * signal will be sent when the current task returns from the next interrupt. - * - * - It is running in kernel context on this or another cpu and will return to - * user context. Store the data in the target task, the signal will be sent - * to itself when the target task returns to user space. - * - * - It is running in kernel context on this cpu and will sleep before - * returning to user context. Because this is also the current task, the - * signal will not get delivered and the task could sleep indefinitely. - * Store the data in the idle task for this cpu, the signal will be sent - * after the idle task processes its next interrupt. - * - * To cover all cases, store the data in the target task, the current task and - * the idle task on this cpu. Whatever happens, the signal will be delivered - * to the target task before it can do any useful user space work. Multiple - * deliveries have no unwanted side effects. - * - * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts - * disabled. It must not take any locks nor use kernel structures or services - * that require locks. - */ - -/* To ensure that we get the right pid, check its start time. To avoid extra - * include files in thread_info.h, convert the task start_time to unsigned long, - * giving us a cycle time of > 580 years. - */ -static inline unsigned long -start_time_ul(const struct task_struct *t) -{ - return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; -} - -void -set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) -{ - struct task_struct *t; - unsigned long start_time = 0; - int i; - - for (i = 1; i <= 3; ++i) { - switch (i) { - case 1: - t = find_task_by_pid(pid); - if (t) - start_time = start_time_ul(t); - break; - case 2: - t = current; - break; - default: - t = idle_task(smp_processor_id()); - break; - } - - if (!t) - return; - task_thread_info(t)->sigdelayed.signo = signo; - task_thread_info(t)->sigdelayed.code = code; - task_thread_info(t)->sigdelayed.addr = addr; - task_thread_info(t)->sigdelayed.start_time = start_time; - task_thread_info(t)->sigdelayed.pid = pid; - wmb(); - set_tsk_thread_flag(t, TIF_SIGDELAYED); - } -} - -/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that - * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. - */ - -void -do_sigdelayed(void) -{ - struct siginfo siginfo; - pid_t pid; - struct task_struct *t; - - clear_thread_flag(TIF_SIGDELAYED); - memset(&siginfo, 0, sizeof(siginfo)); - siginfo.si_signo = current_thread_info()->sigdelayed.signo; - siginfo.si_code = current_thread_info()->sigdelayed.code; - siginfo.si_addr = current_thread_info()->sigdelayed.addr; - pid = current_thread_info()->sigdelayed.pid; - t = find_task_by_pid(pid); - if (!t) - return; - if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) - return; - force_sig_info(siginfo.si_signo, &siginfo, t); -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef34a86..44e9547878a 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -70,6 +70,12 @@ #endif #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + /* * Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality @@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; /* * ITC synchronization related stuff: */ -#define MASTER 0 +#define MASTER (0) #define SLAVE (SMP_CACHE_BYTES/8) #define NUM_ROUNDS 64 /* magic value */ @@ -151,6 +157,27 @@ char __initdata no_int_routing; unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + static int __init nointroute (char *str) { @@ -161,6 +188,27 @@ nointroute (char *str) __setup("nointroute", nointroute); +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + void sync_master (void *arg) { @@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) static void __devinit smp_callin (void) { - int cpuid, phys_id; + int cpuid, phys_id, itc_master; extern void ia64_init_itm(void); + extern volatile int time_keeper_id; #ifdef CONFIG_PERFMON extern void pfm_init_percpu(void); @@ -336,6 +385,7 @@ smp_callin (void) cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; if (cpu_online(cpuid)) { printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", @@ -343,6 +393,8 @@ smp_callin (void) BUG(); } + fix_b0_for_bsp(); + lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); @@ -365,8 +417,8 @@ smp_callin (void) * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls * local_bh_enable(), which bugs out if irqs are not enabled... */ - Dprintk("Going to syncup ITC with BP.\n"); - ia64_sync_itc(0); + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); } /* @@ -572,32 +624,8 @@ void __devinit smp_prepare_boot_cpu(void) per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } -/* - * mt_info[] is a temporary store for all info returned by - * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the - * specific cpu comes. - */ -static struct { - __u32 socket_id; - __u16 core_id; - __u16 thread_id; - __u16 proc_fixed_addr; - __u8 valid; -} mt_info[NR_CPUS] __devinitdata; - #ifdef CONFIG_HOTPLUG_CPU static inline void -remove_from_mtinfo(int cpu) -{ - int i; - - for_each_cpu(i) - if (mt_info[i].valid && mt_info[i].socket_id == - cpu_data(cpu)->socket_id) - mt_info[i].valid = 0; -} - -static inline void clear_cpu_sibling_map(int cpu) { int i; @@ -626,15 +654,50 @@ remove_siblinginfo(int cpu) /* remove it from all sibling map's */ clear_cpu_sibling_map(cpu); +} + +extern void fixup_irqs(void); - /* if this cpu is the last in the core group, remove all its info - * from mt_info structure +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + irq_desc_t *desc = NULL; + cpumask_t mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. */ - if (last) - remove_from_mtinfo(cpu); + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + desc = irq_descp(ia64_cpe_irq); + /* + * Switch for now, immediatly, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (desc) { + desc->handler->disable(ia64_cpe_irq); + desc->handler->set_affinity(ia64_cpe_irq, mask); + desc->handler->enable(ia64_cpe_irq); + printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!desc) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; } -extern void fixup_irqs(void); /* must be called with cpucontrol mutex held */ int __cpu_disable(void) { @@ -643,8 +706,17 @@ int __cpu_disable(void) /* * dont permit boot processor for now */ - if (cpu == 0) - return -EBUSY; + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + cpu_clear(cpu, cpu_online_map); + + if (migrate_platform_irqs(cpu)) { + cpu_set(cpu, cpu_online_map); + return (-EBUSY); + } remove_siblinginfo(cpu); cpu_clear(cpu, cpu_online_map); @@ -776,40 +848,6 @@ init_smp_config(void) ia64_sal_strerror(sal_ret)); } -static inline int __devinit -check_for_mtinfo_index(void) -{ - int i; - - for_each_cpu(i) - if (!mt_info[i].valid) - return i; - - return -1; -} - -/* - * Search the mt_info to find out if this socket's cid/tid information is - * cached or not. If the socket exists, fill in the core_id and thread_id - * in cpuinfo - */ -static int __devinit -check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c) -{ - int i; - __u32 sid = c->socket_id; - - for_each_cpu(i) { - if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address - && mt_info[i].socket_id == sid) { - c->core_id = mt_info[i].core_id; - c->thread_id = mt_info[i].thread_id; - return 1; /* not a new socket */ - } - } - return 0; -} - /* * identify_siblings(cpu) gets called from identify_cpu. This populates the * information related to logical execution units in per_cpu_data structure. @@ -819,14 +857,12 @@ identify_siblings(struct cpuinfo_ia64 *c) { s64 status; u16 pltid; - u64 proc_fixed_addr; - int count, i; pal_logical_to_physical_t info; if (smp_num_cpucores == 1 && smp_num_siblings == 1) return; - if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) { + if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) { printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", status); return; @@ -835,47 +871,12 @@ identify_siblings(struct cpuinfo_ia64 *c) printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status); return; } - if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) { - printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status); - return; - } c->socket_id = (pltid << 8) | info.overview_ppid; c->cores_per_socket = info.overview_cpp; c->threads_per_core = info.overview_tpc; - count = c->num_log = info.overview_num_log; - - /* If the thread and core id information is already cached, then - * we will simply update cpu_info and return. Otherwise, we will - * do the PAL calls and cache core and thread id's of all the siblings. - */ - if (check_for_new_socket(proc_fixed_addr, c)) - return; - - for (i = 0; i < count; i++) { - int index; - - if (i && (status = ia64_pal_logical_to_phys(i, &info)) - != PAL_STATUS_SUCCESS) { - printk(KERN_ERR "ia64_pal_logical_to_phys failed" - " with %ld\n", status); - return; - } - if (info.log2_la == proc_fixed_addr) { - c->core_id = info.log1_cid; - c->thread_id = info.log1_tid; - } + c->num_log = info.overview_num_log; - index = check_for_mtinfo_index(); - /* We will not do the mt_info caching optimization in this case. - */ - if (index < 0) - continue; - - mt_info[index].valid = 1; - mt_info[index].socket_id = c->socket_id; - mt_info[index].core_id = info.log1_cid; - mt_info[index].thread_id = info.log1_tid; - mt_info[index].proc_fixed_addr = info.log2_la; - } + c->core_id = info.log1_cid; + c->thread_id = info.log1_tid; } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e15b2..ac167436e93 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -32,7 +32,7 @@ extern unsigned long wall_jiffies; -#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == TIME_KEEPER_ID) { + if (smp_processor_id() == time_keeper_id) { /* * Here we are in the timer irq handler. We have irqs locally * disabled, but we don't know if the timer_bh is running on @@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = { .name = "timer" }; +void __devinit ia64_disable_timer(void) +{ + ia64_set_itv(1 << 16); +} + void __init time_init (void) { diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea19fa6..3b6fd798c4d 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,7 +36,7 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ -#ifdef CONFIG_ACPI +#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* * If CPEI cannot be re-targetted, and this is * CPEI target, then dont create the control file diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index dabd6c32641..7c1ddc8ac44 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -30,19 +30,19 @@ extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); -struct notifier_block *ia64die_chain; +ATOMIC_NOTIFIER_HEAD(ia64die_chain); int register_die_notifier(struct notifier_block *nb) { - return notifier_chain_register(&ia64die_chain, nb); + return atomic_notifier_chain_register(&ia64die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); int unregister_die_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&ia64die_chain, nb); + return atomic_notifier_chain_unregister(&ia64die_chain, nb); } EXPORT_SYMBOL_GPL(unregister_die_notifier); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 73af6267d2e..783600fe52b 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -70,34 +70,18 @@ SECTIONS __stop___ex_table = .; } - .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) - { - __start___vtop_patchlist = .; - *(.data.patch.vtop) - __end___vtop_patchlist = .; - } - - .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + /* MCA table */ + . = ALIGN(16); + __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { - __start___mckinley_e9_bundles = .; - *(.data.patch.mckinley_e9) - __end___mckinley_e9_bundles = .; + __start___mca_table = .; + *(__mca_table) + __stop___mca_table = .; } /* Global data */ _data = .; -#if defined(CONFIG_IA64_GENERIC) - /* Machine Vector */ - . = ALIGN(16); - .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) - { - machvec_start = .; - *(.machvec) - machvec_end = .; - } -#endif - /* Unwind info & table: */ . = ALIGN(8); .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) @@ -154,6 +138,32 @@ SECTIONS *(.initcall7.init) __initcall_end = .; } + + .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) + { + __start___vtop_patchlist = .; + *(.data.patch.vtop) + __end___vtop_patchlist = .; + } + + .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + { + __start___mckinley_e9_bundles = .; + *(.data.patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) + { + machvec_start = .; + *(.machvec) + machvec_end = .; + } +#endif + __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { *(.con_initcall.init) } |