diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/acpi.c | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 15 | ||||
-rw-r--r-- | arch/ia64/kernel/crash.c | 223 | ||||
-rw-r--r-- | arch/ia64/kernel/crash_dump.c | 48 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 71 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/ia64_ksyms.c | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/iosapic.c | 21 | ||||
-rw-r--r-- | arch/ia64/kernel/jprobes.S | 3 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 230 | ||||
-rw-r--r-- | arch/ia64/kernel/machine_kexec.c | 136 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/palinfo.c | 24 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 15 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon_montecito.h | 12 | ||||
-rw-r--r-- | arch/ia64/kernel/process.c | 10 | ||||
-rw-r--r-- | arch/ia64/kernel/relocate_kernel.S | 334 | ||||
-rw-r--r-- | arch/ia64/kernel/salinfo.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 63 | ||||
-rw-r--r-- | arch/ia64/kernel/smp.c | 28 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 12 | ||||
-rw-r--r-- | arch/ia64/kernel/topology.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 50 |
24 files changed, 1193 insertions, 139 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cfa099b04cd..098ee605bf5 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -28,6 +28,8 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 73ef4a85b86..ef2fe474f10 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -590,6 +590,9 @@ void __init acpi_numa_arch_fixup(void) */ int acpi_register_gsi(u32 gsi, int triggering, int polarity) { + if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) + return gsi; + if (has_8259 && gsi < 16) return isa_irq_to_vector(gsi); diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index 86faf221a07..15c08d52f09 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -68,7 +68,8 @@ processor_get_pstate ( dprintk("processor_get_pstate\n"); - retval = ia64_pal_get_pstate(&pstate_index); + retval = ia64_pal_get_pstate(&pstate_index, + PAL_GET_PSTATE_TYPE_INSTANT); *value = (u32) pstate_index; if (retval) @@ -91,7 +92,7 @@ extract_clock ( dprintk("extract_clock\n"); for (i = 0; i < data->acpi_data.state_count; i++) { - if (value >= data->acpi_data.states[i].control) + if (value == data->acpi_data.states[i].status) return data->acpi_data.states[i].core_frequency; } return data->acpi_data.states[i-1].core_frequency; @@ -117,11 +118,7 @@ processor_get_freq ( goto migrate_end; } - /* - * processor_get_pstate gets the average frequency since the - * last get. So, do two PAL_get_freq()... - */ - ret = processor_get_pstate(&value); + /* processor_get_pstate gets the instantaneous frequency */ ret = processor_get_pstate(&value); if (ret) { @@ -279,12 +276,10 @@ acpi_cpufreq_cpu_init ( dprintk("acpi_cpufreq_cpu_init\n"); - data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) return (-ENOMEM); - memset(data, 0, sizeof(struct cpufreq_acpi_io)); - acpi_io_data[cpu] = data; result = acpi_processor_register_performance(&data->acpi_data, cpu); diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c new file mode 100644 index 00000000000..bc2f64d7224 --- /dev/null +++ b/arch/ia64/kernel/crash.c @@ -0,0 +1,223 @@ +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com> + * + */ +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/kexec.h> +#include <linux/elfcore.h> +#include <linux/sysctl.h> +#include <linux/init.h> + +#include <asm/kdebug.h> +#include <asm/mca.h> + +int kdump_status[NR_CPUS]; +atomic_t kdump_cpu_freezed; +atomic_t kdump_in_progress; +int kdump_on_init = 1; + +static inline Elf64_Word +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void +final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu() +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_freezed) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + kdump_smp_send_stop(); + if (kdump_wait_cpu_freeze() && kdump_on_init) { + //not all cpu response to IPI, send INIT to freeze them + kdump_smp_send_init(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + int cpuid; + local_irq_disable(); + cpuid = smp_processor_id(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + atomic_inc(&kdump_cpu_freezed); + kdump_status[cpuid] = 1; + mb(); + if (cpuid == 0) { + for (;;) + cpu_relax(); + } else + ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (!kdump_on_init) + return NOTIFY_DONE; + + if (val != DIE_INIT_MONARCH_ENTER && + val != DIE_INIT_SLAVE_ENTER && + val != DIE_MCA_RENDZVOUS_LEAVE && + val != DIE_MCA_MONARCH_LEAVE) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + /* Reason code 1 means machine check rendezous*/ + if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) && + nd->sos->rv_rc == 1) + return NOTIFY_DONE; + + switch (val) { + case DIE_INIT_MONARCH_ENTER: + machine_kdump_on_init(); + break; + case DIE_INIT_SLAVE_ENTER: + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_RENDZVOUS_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_MONARCH_LEAVE: + /* die_register->signr indicate if MCA is recoverable */ + if (!args->signr) + machine_kdump_on_init(); + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static ctl_table kdump_on_init_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table sys_table[] = { + { + .ctl_name = CTL_KERN, + .procname = "kernel", + .mode = 0555, + .child = kdump_on_init_table, + }, + { .ctl_name = 0 } +}; +#endif + +static int +machine_crash_setup(void) +{ + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + }; + int ret; + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl_table(sys_table, 0); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c new file mode 100644 index 00000000000..83b8c91c140 --- /dev/null +++ b/arch/ia64/kernel/crash_dump.c @@ -0,0 +1,48 @@ +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Simon Horman <horms@verge.net.au> + * Original code moved from kernel/crash.c + * Original code comment copied from the i386 version of this file + */ + +#include <linux/errno.h> +#include <linux/types.h> + +#include <linux/uaccess.h> + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + * + * Calling copy_to_user() in atomic context is not desirable. Hence first + * copying the data to a pre-allocated kernel page and then copying to user + * space in non-atomic context. + */ +ssize_t +copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn<<PAGE_SHIFT); + if (userbuf) { + if (copy_to_user(buf, (vaddr + offset), csize)) { + return -EFAULT; + } + } else + memcpy(buf, (vaddr + offset), csize); + return csize; +} + diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index bb8770a177b..0b25a7d4e1e 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/efi.h> +#include <linux/kexec.h> #include <asm/io.h> #include <asm/kregs.h> @@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -224,7 +225,7 @@ efi_gettimeofday (struct timespec *ts) } static int -is_available_memory (efi_memory_desc_t *md) +is_memory_available (efi_memory_desc_t *md) { if (!(md->attribute & EFI_MEMORY_WB)) return 0; @@ -421,6 +422,8 @@ efi_init (void) mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -428,6 +431,8 @@ efi_init (void) ++cp; } } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20); if (max_addr != ~0UL) printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); @@ -887,14 +892,15 @@ find_memmap_space (void) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) + if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) continue; /* Round ends inward to granule boundaries */ as = max(contig_low, md->phys_addr); ae = min(contig_high, efi_md_end(md)); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -962,7 +968,7 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md)) + if (!is_memory_available(md)) continue; /* @@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } else ae = efi_md_end(md); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource, */ insert_resource(res, code_resource); insert_resource(res, data_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif } } } + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + rsvd_regions are sorted + */ +unsigned long +kdump_find_rsvd_region (unsigned long size, + struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n-1 && __pa(r[i+1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n", + size); + return ~0UL; +} +#endif diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3390b7c5a63..15234ed3a34 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1575,7 +1575,7 @@ sys_call_table: data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 879c1817bd1..bd17190bebb 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -14,6 +14,7 @@ EXPORT_SYMBOL(strlen); #include <asm/checksum.h> EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ +EXPORT_SYMBOL(csum_ipv6_magic); #include <asm/semaphore.h> EXPORT_SYMBOL(__down); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 60d64950e3c..0fc5fb7865c 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -288,6 +288,27 @@ nop (unsigned int irq) /* do nothing... */ } + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + u8 vec = 0; + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->addr, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->addr, vec); + } + } +} +#endif + static void mask_irq (unsigned int irq) { diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S index 5cd6226f44f..621630256c4 100644 --- a/arch/ia64/kernel/jprobes.S +++ b/arch/ia64/kernel/jprobes.S @@ -45,13 +45,14 @@ * to the correct location. */ #include <asm/asmmacro.h> +#include <asm-ia64/break.h> /* * void jprobe_break(void) */ .section .kprobes.text, "ax" ENTRY(jprobe_break) - break.m 0x80300 + break.m __IA64_BREAK_JPROBE END(jprobe_break) /* diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 51217d63285..6cb56dd4056 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -88,6 +88,7 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, { p->ainsn.inst_flag = 0; p->ainsn.target_br_reg = 0; + p->ainsn.slot = slot; /* Check for Break instruction * Bits 37:40 Major opcode to be zero @@ -129,48 +130,6 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, /* * In this function we check to see if the instruction - * on which we are inserting kprobe is supported. - * Returns 0 if supported - * Returns -EINVAL if unsupported - */ -static int __kprobes unsupported_inst(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst, - unsigned long addr) -{ - if (bundle_encoding[template][slot] == I) { - switch (major_opcode) { - case 0x0: //I_UNIT_MISC_OPCODE: - /* - * Check for Integer speculation instruction - * - Bit 33-35 to be equal to 0x1 - */ - if (((kprobe_inst >> 33) & 0x7) == 1) { - printk(KERN_WARNING - "Kprobes on speculation inst at <0x%lx> not supported\n", - addr); - return -EINVAL; - } - - /* - * IP relative mov instruction - * - Bit 27-35 to be equal to 0x30 - */ - if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { - printk(KERN_WARNING - "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", - addr); - return -EINVAL; - - } - } - } - return 0; -} - - -/* - * In this function we check to see if the instruction * (qp) cmpx.crel.ctype p1,p2=r2,r3 * on which we are inserting kprobe is cmp instruction * with ctype as unc. @@ -206,26 +165,136 @@ out: } /* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns qp value if supported + * Returns -EINVAL if unsupported + */ +static int __kprobes unsupported_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + unsigned long addr) +{ + int qp; + + qp = kprobe_inst & 0x3f; + if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) { + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on cmp unc" + "instruction on slot 1 at <0x%lx>" + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + else if (bundle_encoding[template][slot] == I) { + if (major_opcode == 0) { + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + else if ((major_opcode == 5) && !(kprobe_inst & (0xFUl << 33)) && + (kprobe_inst & (0x1UL << 12))) { + /* test bit instructions, tbit,tnat,tf + * bit 33-36 to be equal to 0 + * bit 12 to be equal to 1 + */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on test bit" + "instruction on slot at <0x%lx>" + "is not supported\n", addr); + return -EINVAL; + } + qp = 0; + } + } + else if (bundle_encoding[template][slot] == B) { + if (major_opcode == 7) { + /* IP-Relative Predict major code is 7 */ + printk(KERN_WARNING "Kprobes on IP-Relative" + "Predict is not supported\n"); + return -EINVAL; + } + else if (major_opcode == 2) { + /* Indirect Predict, major code is 2 + * bit 27-32 to be equal to 10 or 11 + */ + int x6=(kprobe_inst >> 27) & 0x3F; + if ((x6 == 0x10) || (x6 == 0x11)) { + printk(KERN_WARNING "Kprobes on" + "Indirect Predict is not supported\n"); + return -EINVAL; + } + } + } + /* kernel does not use float instruction, here for safety kprobe + * will judge whether it is fcmp/flass/float approximation instruction + */ + else if (unlikely(bundle_encoding[template][slot] == F)) { + if ((major_opcode == 4 || major_opcode == 5) && + (kprobe_inst & (0x1 << 12))) { + /* fcmp/fclass unc instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on fcmp/fclass " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + if ((major_opcode == 0 || major_opcode == 1) && + (kprobe_inst & (0x1UL << 33))) { + /* float Approximation instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on float Approx " + "instr at <0x%lx> is not supported\n", + addr); + return -EINVAL; + } + qp = 0; + } + } + return qp; +} + +/* * In this function we override the bundle with * the break instruction at the given slot. */ static void __kprobes prepare_break_inst(uint template, uint slot, uint major_opcode, unsigned long kprobe_inst, - struct kprobe *p) + struct kprobe *p, + int qp) { unsigned long break_inst = BREAK_INST; bundle_t *bundle = &p->opcode.bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) - * to the break instruction iff !is_cmp_ctype_unc_inst - * because for cmp instruction with ctype equal to unc, - * which is a special instruction always needs to be - * executed regradless of qp + * to the break instruction */ - if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) - break_inst |= (0x3f & kprobe_inst); + break_inst |= qp; switch (slot) { case 0: @@ -296,12 +365,6 @@ static int __kprobes valid_kprobe_addr(int template, int slot, return -EINVAL; } - if (slot == 1 && bundle_encoding[template][1] != L) { - printk(KERN_WARNING "Inserting kprobes on slot #1 " - "is not supported\n"); - return -EINVAL; - } - return 0; } @@ -427,6 +490,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; bundle_t *bundle; + int qp; bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; template = bundle->quad0.template; @@ -441,9 +505,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* Get kprobe_inst and major_opcode from the bundle */ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr)) - return -EINVAL; - + qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr); + if (qp < 0) + return -EINVAL; p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -451,37 +515,63 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); - prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) { - unsigned long addr = (unsigned long)p->addr; - unsigned long arm_addr = addr & ~0xFULL; + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + src = &p->opcode.bundle; flush_icache_range((unsigned long)p->ainsn.insn, (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); - memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t)); + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - unsigned long addr = (unsigned long)p->addr; - unsigned long arm_addr = addr & ~0xFULL; + unsigned long arm_addr; + bundle_t *src, *dest; + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ - memcpy((char *) arm_addr, (char *) p->ainsn.insn, - sizeof(kprobe_opcode_t)); + src = &p->ainsn.insn->bundle; + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } /* @@ -807,7 +897,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, switch(val) { case DIE_BREAK: /* err is break number from ia64_bad_break() */ - if (args->err == 0x80200 || args->err == 0x80300 || args->err == 0) + if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12) + || args->err == __IA64_BREAK_JPROBE + || args->err == 0) if (pre_kprobes_handler(args)) ret = NOTIFY_STOP; break; @@ -851,7 +943,7 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg) return; } } while (unw_unwind(info) >= 0); - lp->bsp = 0; + lp->bsp = NULL; lp->cfm = 0; return; } diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 00000000000..e2ccc9f660c --- /dev/null +++ b/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,136 @@ +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> +#include <asm/delay.h> +#include <asm/meminit.h> + +typedef NORET_TYPE void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long start_address, + struct ia64_boot_param *boot_param, + unsigned long pal_addr) ATTRIB_NORET; + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void machine_shutdown(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } + kexec_disable_iosapic(); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +extern void *efi_get_pal_addr(void); +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr = (unsigned long)page_address(image->control_code_page); + unsigned long vector; + int ii; + + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + } + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + vector = ia64_get_ivr(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + ia64_eoi(); + vector = ia64_get_ivr(); + } + platform_kernel_launch_event(); + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + unw_init_running(ia64_machine_kexec, image); + for(;;); +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 7cfa63a98cb..a76add3e76a 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -82,6 +82,7 @@ #include <asm/system.h> #include <asm/sal.h> #include <asm/mca.h> +#include <asm/kexec.h> #include <asm/irq.h> #include <asm/hw_irq.h> @@ -678,7 +679,7 @@ ia64_mca_cmc_vector_enable (void *dummy) * disable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_disable_keventd(void *unused) +ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); } @@ -690,7 +691,7 @@ ia64_mca_cmc_vector_disable_keventd(void *unused) * enable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_enable_keventd(void *unused) +ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); } @@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, } else { /* Dump buffered message to console */ ia64_mlogbuf_finish(1); +#ifdef CONFIG_KEXEC + atomic_set(&kdump_in_progress, 1); + monarch_cpu = -1; +#endif } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1247,8 +1252,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, monarch_cpu = -1; } -static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); -static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); /* * ia64_mca_cmc_int_handler diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 0b546e2b36a..a71df9ae039 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -16,6 +16,7 @@ * 02/05/2001 S.Eranian fixed module support * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec */ #include <linux/types.h> #include <linux/errno.h> @@ -314,13 +315,20 @@ vm_info(char *page) "Protection Key Registers(PKR) : %d\n" "Implemented bits in PKR.key : %d\n" "Hash Tag ID : 0x%x\n" - "Size of RR.rid : %d\n", + "Size of RR.rid : %d\n" + "Max Purges : ", vm_info_1.pal_vm_info_1_s.phys_add_size, vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1, vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id, vm_info_2.pal_vm_info_2_s.rid_size); + if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES) + p += sprintf(p, "unlimited\n"); + else + p += sprintf(p, "%d\n", + vm_info_2.pal_vm_info_2_s.max_purges ? + vm_info_2.pal_vm_info_2_s.max_purges : 1); } if (ia64_pal_mem_attrib(&attrib) == 0) { @@ -467,7 +475,11 @@ static const char *proc_features[]={ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL, + "Unimplemented instruction address fault", + "INIT, PMI, and LINT pins", + "Simple unimplemented instr addresses", + "Variable P-state performance", + "Virtual machine features implemented", "XIP,XPSR,XFS implemented", "XR1-XR3 implemented", "Disable dynamic predicate prediction", @@ -475,7 +487,11 @@ static const char *proc_features[]={ "Disable dynamic data cache prefetch", "Disable dynamic inst cache prefetch", "Disable dynamic branch prediction", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Disable P-states", + "Enable MCA on Data Poisoning", + "Enable vmsw instruction", + "Enable extern environmental notification", "Disable BINIT on processor time-out", "Disable dynamic power management (DPM)", "Disable coherency", @@ -952,7 +968,6 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -#ifdef CONFIG_HOTPLUG_CPU static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -974,7 +989,6 @@ static struct notifier_block palinfo_cpu_notifier = .notifier_call = palinfo_cpu_callback, .priority = 0, }; -#endif static int __init palinfo_init(void) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3aaede0d698..aa94f60fa8e 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -853,9 +853,8 @@ pfm_context_alloc(void) * allocate context descriptor * must be able to free with interrupts disabled */ - ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) { - memset(ctx, 0, sizeof(pfm_context_t)); DPRINT(("alloc ctx @%p\n", ctx)); } return ctx; @@ -2189,13 +2188,13 @@ pfm_alloc_fd(struct file **cfile) /* * allocate a new dcache entry */ - file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) goto out; + file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); + if (!file->f_path.dentry) goto out; - file->f_dentry->d_op = &pfmfs_dentry_operations; + file->f_path.dentry->d_op = &pfmfs_dentry_operations; - d_add(file->f_dentry, inode); - file->f_vfsmnt = mntget(pfmfs_mnt); + d_add(file->f_path.dentry, inode); + file->f_path.mnt = mntget(pfmfs_mnt); file->f_mapping = inode->i_mapping; file->f_op = &pfm_file_ops; @@ -2302,7 +2301,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h index cd06ac6a686..7f8da4c7ca6 100644 --- a/arch/ia64/kernel/perfmon_montecito.h +++ b/arch/ia64/kernel/perfmon_montecito.h @@ -45,16 +45,16 @@ static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ /* pmc29 */ { PFM_REG_NOTIMPL, }, /* pmc30 */ { PFM_REG_NOTIMPL, }, /* pmc31 */ { PFM_REG_NOTIMPL, }, -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, /* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, /* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ }; @@ -185,7 +185,7 @@ pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cn DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); if (cnum == 41 && is_loaded - && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 51922b98086..17685abaf49 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -268,10 +268,16 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { - if (can_do_pal_halt) + if (can_do_pal_halt) { current_thread_info()->status &= ~TS_POLLING; - else + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + } else { current_thread_info()->status |= TS_POLLING; + } if (!need_resched()) { void (*idle)(void); diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 00000000000..ae473e3f2a0 --- /dev/null +++ b/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,334 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <asm/asmmacro.h> +#include <asm/kregs.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mca_asm.h> + + /* Must be relocatable PIC code callable as a C function + */ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 //to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + //purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for percpu data + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14;; + fc.i r17 + add r17=8, r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +.global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) + + diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e63b8ca5344..e375a2f0f2c 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -302,7 +302,7 @@ salinfo_event_open(struct inode *inode, struct file *file) static ssize_t salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; @@ -464,7 +464,7 @@ retry: static ssize_t salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; u8 *buf; @@ -525,7 +525,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) static ssize_t salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; @@ -575,7 +575,6 @@ static struct file_operations salinfo_data_fops = { .write = salinfo_log_write, }; -#ifdef CONFIG_HOTPLUG_CPU static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -620,7 +619,6 @@ static struct notifier_block salinfo_cpu_notifier = .notifier_call = salinfo_cpu_callback, .priority = 0, }; -#endif /* CONFIG_HOTPLUG_CPU */ static int __init salinfo_init(void) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index d10404a4175..ad567b8d432 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,8 @@ #include <linux/initrd.h> #include <linux/pm.h> #include <linux/cpufreq.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <asm/ia32.h> #include <asm/machvec.h> @@ -252,6 +254,47 @@ reserve_memory (void) efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; +#ifdef CONFIG_KEXEC + /* crashkernel=size@offset specifies the size to reserve for a crash + * kernel. If offset is 0, then it is determined automatically. + * By reserving this memory we guarantee that linux never set's it + * up as a DMA target.Useful for holding code to do something + * appropriate after a kernel panic. + */ + { + char *from = strstr(saved_command_line, "crashkernel="); + unsigned long base, size; + if (from) { + size = memparse(from + 12, &from); + if (*from == '@') + base = memparse(from+1, &from); + else + base = 0; + if (size) { + if (!base) { + sort_regions(rsvd_region, n); + base = kdump_find_rsvd_region(size, + rsvd_region, n); + } + if (base != ~0UL) { + rsvd_region[n].start = + (unsigned long)__va(base); + rsvd_region[n].end = + (unsigned long)__va(base + size); + n++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); + } +#endif /* end of memory marker */ rsvd_region[n].start = ~0UL; rsvd_region[n].end = ~0UL; @@ -263,6 +306,7 @@ reserve_memory (void) sort_regions(rsvd_region, num_rsvd_regions); } + /** * find_initrd - get initrd parameters from the boot parameter structure * @@ -396,6 +440,21 @@ static __init int setup_nomca(char *s) } early_param("nomca", setup_nomca); +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; + + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ + void __init setup_arch (char **cmdline_p) { @@ -615,6 +674,7 @@ get_model_name(__u8 family, __u8 model) { char brand[128]; + memcpy(brand, "Unknown", 8); if (ia64_pal_get_brand_info(brand)) { if (family == 0x7) memcpy(brand, "Merced", 7); @@ -622,8 +682,7 @@ get_model_name(__u8 family, __u8 model) case 0: memcpy(brand, "McKinley", 9); break; case 1: memcpy(brand, "Madison", 8); break; case 2: memcpy(brand, "Madison up to 9M cache", 23); break; - } else - memcpy(brand, "Unknown", 8); + } } if (brandname[0] == '\0') return strcpy(brandname, brand); diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 6ab95ceaf9d..f4c7f7769cf 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -30,6 +30,7 @@ #include <linux/delay.h> #include <linux/efi.h> #include <linux/bitops.h> +#include <linux/kexec.h> #include <asm/atomic.h> #include <asm/current.h> @@ -66,6 +67,7 @@ static volatile struct call_data_struct *call_data; #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; @@ -155,7 +157,11 @@ handle_IPI (int irq, void *dev_id) case IPI_CPU_STOP: stop_this_cpu(); break; - +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif default: printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); break; @@ -213,6 +219,26 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop() +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init() +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif /* * Called with preeemption disabled. */ diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index f7d7f566814..b21ddecea94 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -463,15 +463,17 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) } struct create_idle { + struct work_struct work; struct task_struct *idle; struct completion done; int cpu; }; void -do_fork_idle(void *_c_idle) +do_fork_idle(struct work_struct *work) { - struct create_idle *c_idle = _c_idle; + struct create_idle *c_idle = + container_of(work, struct create_idle, work); c_idle->idle = fork_idle(c_idle->cpu); complete(&c_idle->done); @@ -482,10 +484,10 @@ do_boot_cpu (int sapicid, int cpu) { int timeout; struct create_idle c_idle = { + .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), .cpu = cpu, .done = COMPLETION_INITIALIZER(c_idle.done), }; - DECLARE_WORK(work, do_fork_idle, &c_idle); c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { @@ -497,9 +499,9 @@ do_boot_cpu (int sapicid, int cpu) * We can't use kernel_thread since we must avoid to reschedule the child. */ if (!keventd_up() || current_is_keventd()) - work.func(work.data); + c_idle.work.func(&c_idle.work); else { - schedule_work(&work); + schedule_work(&c_idle.work); wait_for_completion(&c_idle.done); } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5629b45e89c..687500ddb4b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -31,11 +31,11 @@ int arch_register_cpu(int num) { #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* - * If CPEI cannot be re-targetted, and this is - * CPEI target, then dont create the control file + * If CPEI can be re-targetted or if this is not + * CPEI target, then it is hotpluggable */ - if (!can_cpei_retarget() && is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.no_control = 1; + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index fffa9e0826b..ab684747036 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -307,6 +307,15 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long return ret.status; } +struct fpu_swa_msg { + unsigned long count; + unsigned long time; +}; +static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast); +DECLARE_PER_CPU(struct fpu_swa_msg, cpulast); +static struct fpu_swa_msg last __cacheline_aligned; + + /* * Handle floating-point assist faults and traps. */ @@ -316,8 +325,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) long exception, bundle[2]; unsigned long fault_ip; struct siginfo siginfo; - static int fpu_swa_count = 0; - static unsigned long last_time; fault_ip = regs->cr_iip; if (!fp_fault && (ia64_psr(regs)->ri == 0)) @@ -325,14 +332,37 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) return -1; - if (jiffies - last_time > 5*HZ) - fpu_swa_count = 0; - if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { - last_time = jiffies; - ++fpu_swa_count; - printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { + unsigned long count, current_jiffies = jiffies; + struct fpu_swa_msg *cp = &__get_cpu_var(cpulast); + + if (unlikely(current_jiffies > cp->time)) + cp->count = 0; + if (unlikely(cp->count < 5)) { + cp->count++; + cp->time = current_jiffies + 5 * HZ; + + /* minimize races by grabbing a copy of count BEFORE checking last.time. */ + count = last.count; + barrier(); + + /* + * Lower 4 bits are used as a count. Upper bits are a sequence + * number that is updated when count is reset. The cmpxchg will + * fail is seqno has changed. This minimizes mutiple cpus + * reseting the count. + */ + if (current_jiffies > last.time) + (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); + + /* used fetchadd to atomically update the count */ + if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { + last.time = current_jiffies + 5 * HZ; + printk(KERN_WARNING + "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + } + } } exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, |