diff options
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/acpi.c | 14 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 14 | ||||
-rw-r--r-- | arch/ia64/kernel/iosapic.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/irq.c | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 90 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 5 | ||||
-rw-r--r-- | arch/ia64/kernel/signal.c | 101 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 114 | ||||
-rw-r--r-- | arch/ia64/kernel/time.c | 9 | ||||
-rw-r--r-- | arch/ia64/kernel/topology.c | 2 |
10 files changed, 208 insertions, 160 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index ecd44bdc839..4722ec51c70 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header, return 0; } +#ifdef CONFIG_HOTPLUG_CPU unsigned int can_cpei_retarget(void) { extern int cpe_vector; + extern unsigned int force_cpei_retarget; /* * Only if CPEI is supported and the override flag * is present, otherwise return that its re-targettable * if we are in polling mode. */ - if (cpe_vector > 0 && !acpi_cpei_override) - return 0; - else - return 1; + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; } unsigned int is_cpu_cpei_target(unsigned int cpu) @@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu) { acpi_cpei_phys_cpuid = cpu_physical_id(cpu); } +#endif unsigned int get_cpei_target_cpu(void) { diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 930fdfca6dd..0e3eda99e54 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1102,9 +1102,6 @@ skip_rbs_switch: st8 [r2]=r8 st8 [r3]=r10 .work_pending: - tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? -(p6) br.cond.sptk.few .sigdelayed - ;; tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? (p6) br.cond.sptk.few .notify #ifdef CONFIG_PREEMPT @@ -1131,17 +1128,6 @@ skip_rbs_switch: (pLvSys)br.cond.sptk.few .work_pending_syscall_end br.cond.sptk.many .work_processed_kernel // don't re-check -// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where -// it could not be delivered. Deliver it now. The signal might be for us and -// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed -// signal. - -.sigdelayed: - br.call.sptk.many rp=do_sigdelayed - cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check -(pLvSys)br.cond.sptk.few .work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel // re-check - .work_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 574084f343f..8832c553230 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector) { #ifdef CONFIG_SMP static int cpu = -1; + extern int cpe_vector; /* * In case of vector shared by multiple RTEs, all RTEs that @@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector) if (!cpu_online(smp_processor_id())) return cpu_physical_id(smp_processor_id()); +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif + #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index d33244c3275..5ce908ef9c9 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -163,8 +163,19 @@ void fixup_irqs(void) { unsigned int irq; extern void ia64_process_pending_intr(void); + extern void ia64_disable_timer(void); + extern volatile int time_keeper_id; + + ia64_disable_timer(); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = first_cpu(cpu_online_map); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } - ia64_set_itv(1<<16); /* * Phase 1: Locate irq's bound to this cpu and * relocate them for cpu removal. diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index ee7eec9ee57..b57e723f194 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type) ia64_sal_clear_state_info(sal_info_type); } -/* - * platform dependent error handling - */ -#ifndef PLATFORM_MCA_HANDLERS - #ifdef CONFIG_ACPI int cpe_vector = -1; +int ia64_cpe_irq = -1; static irqreturn_t ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) @@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev) } #endif /* CONFIG_ACPI */ -#endif /* PLATFORM_MCA_HANDLERS */ - /* * ia64_mca_cmc_vector_setup * @@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) *tnat |= (nat << tslot); } +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const task_t *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, struct ia64_sal_os_state *sos, const char *type) { - char *p, comm[sizeof(current->comm)]; + char *p; ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; @@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, /* Verify the previous stack state before we change it */ if (user_mode(regs)) { msg = "occurred in user space"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); goto no_mod; } if (r13 != sos->prev_IA64_KR_CURRENT) { @@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, goto no_mod; } - /* Change the comm field on the MCA/INT task to include the pid that - * was interrupted, it makes for easier debugging. If that pid was 0 - * (swapper or nested MCA/INIT) then use the start of the previous comm - * field suffixed with its cpu. - */ - if (previous_current->pid) - snprintf(comm, sizeof(comm), "%s %d", - current->comm, previous_current->pid); - else { - int l; - if ((p = strchr(previous_current->comm, ' '))) - l = p - previous_current->comm; - else - l = strlen(previous_current->comm); - snprintf(comm, sizeof(comm), "%s %*s %d", - current->comm, l, previous_current->comm, - task_thread_info(previous_current)->cpu); - } - memcpy(current->comm, comm, sizeof(current->comm)); + ia64_mca_modify_comm(previous_current); /* Make the original task look blocked. First stack a struct pt_regs, * describing the state at the time of interrupt. mca_asm.S built a @@ -908,7 +914,7 @@ no_mod: static void ia64_wait_for_slaves(int monarch) { - int c, wait = 0; + int c, wait = 0, missing = 0; for_each_online_cpu(c) { if (c == monarch) continue; @@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch) } } if (!wait) - return; + goto all_in; for_each_online_cpu(c) { if (c == monarch) continue; if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + missing = 1; break; } } + if (!missing) + goto all_in; + printk(KERN_INFO "OS MCA slave did not rendezvous on cpu"); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + printk(" %d", c); + } + printk("\n"); + return; + +all_in: + printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n"); + return; } /* @@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, task_t *previous_current; oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ + console_loglevel = 15; /* make sure printks make it to console */ + printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) @@ -1444,11 +1471,13 @@ void __devinit ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; + static int first_time = 1; - if (smp_processor_id() == 0) { + if (first_time) { void *mca_data; int cpu; + first_time = 0; mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + KERNEL_STACK_SIZE); mca_data = (void *)(((unsigned long)mca_data + @@ -1704,6 +1733,7 @@ ia64_mca_late_init(void) desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; setup_irq(irq, &mca_cpe_irqaction); + ia64_cpe_irq = irq; } ia64_mca_register_cpev(cpe_vector); IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 9c5194b385d..077f21216b6 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6722,6 +6722,7 @@ __initcall(pfm_init); void pfm_init_percpu (void) { + static int first_time=1; /* * make sure no measurement is active * (may inherit programmed PMCs from EFI). @@ -6734,8 +6735,10 @@ pfm_init_percpu (void) */ pfm_unfreeze_pmu(); - if (smp_processor_id() == 0) + if (first_time) { register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); ia64_srlz_d(); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 463f6bb44d0..1d7903ee212 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) } return 0; } - -/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it - * could not be delivered. It is important that the target process is not - * allowed to do any more work in user space. Possible cases for the target - * process: - * - * - It is sleeping and will wake up soon. Store the data in the current task, - * the signal will be sent when the current task returns from the next - * interrupt. - * - * - It is running in user context. Store the data in the current task, the - * signal will be sent when the current task returns from the next interrupt. - * - * - It is running in kernel context on this or another cpu and will return to - * user context. Store the data in the target task, the signal will be sent - * to itself when the target task returns to user space. - * - * - It is running in kernel context on this cpu and will sleep before - * returning to user context. Because this is also the current task, the - * signal will not get delivered and the task could sleep indefinitely. - * Store the data in the idle task for this cpu, the signal will be sent - * after the idle task processes its next interrupt. - * - * To cover all cases, store the data in the target task, the current task and - * the idle task on this cpu. Whatever happens, the signal will be delivered - * to the target task before it can do any useful user space work. Multiple - * deliveries have no unwanted side effects. - * - * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts - * disabled. It must not take any locks nor use kernel structures or services - * that require locks. - */ - -/* To ensure that we get the right pid, check its start time. To avoid extra - * include files in thread_info.h, convert the task start_time to unsigned long, - * giving us a cycle time of > 580 years. - */ -static inline unsigned long -start_time_ul(const struct task_struct *t) -{ - return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; -} - -void -set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) -{ - struct task_struct *t; - unsigned long start_time = 0; - int i; - - for (i = 1; i <= 3; ++i) { - switch (i) { - case 1: - t = find_task_by_pid(pid); - if (t) - start_time = start_time_ul(t); - break; - case 2: - t = current; - break; - default: - t = idle_task(smp_processor_id()); - break; - } - - if (!t) - return; - task_thread_info(t)->sigdelayed.signo = signo; - task_thread_info(t)->sigdelayed.code = code; - task_thread_info(t)->sigdelayed.addr = addr; - task_thread_info(t)->sigdelayed.start_time = start_time; - task_thread_info(t)->sigdelayed.pid = pid; - wmb(); - set_tsk_thread_flag(t, TIF_SIGDELAYED); - } -} - -/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that - * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. - */ - -void -do_sigdelayed(void) -{ - struct siginfo siginfo; - pid_t pid; - struct task_struct *t; - - clear_thread_flag(TIF_SIGDELAYED); - memset(&siginfo, 0, sizeof(siginfo)); - siginfo.si_signo = current_thread_info()->sigdelayed.signo; - siginfo.si_code = current_thread_info()->sigdelayed.code; - siginfo.si_addr = current_thread_info()->sigdelayed.addr; - pid = current_thread_info()->sigdelayed.pid; - t = find_task_by_pid(pid); - if (!t) - return; - if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) - return; - force_sig_info(siginfo.si_signo, &siginfo, t); -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index b681ef34a86..c4b633b36da 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -70,6 +70,12 @@ #endif #ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + /* * Store all idle threads, this can be reused instead of creating * a new thread. Also avoids complicated thread destroy functionality @@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; /* * ITC synchronization related stuff: */ -#define MASTER 0 +#define MASTER (0) #define SLAVE (SMP_CACHE_BYTES/8) #define NUM_ROUNDS 64 /* magic value */ @@ -151,6 +157,27 @@ char __initdata no_int_routing; unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + static int __init nointroute (char *str) { @@ -161,6 +188,27 @@ nointroute (char *str) __setup("nointroute", nointroute); +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + void sync_master (void *arg) { @@ -327,8 +375,9 @@ smp_setup_percpu_timer (void) static void __devinit smp_callin (void) { - int cpuid, phys_id; + int cpuid, phys_id, itc_master; extern void ia64_init_itm(void); + extern volatile int time_keeper_id; #ifdef CONFIG_PERFMON extern void pfm_init_percpu(void); @@ -336,6 +385,7 @@ smp_callin (void) cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; if (cpu_online(cpuid)) { printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", @@ -343,6 +393,8 @@ smp_callin (void) BUG(); } + fix_b0_for_bsp(); + lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); @@ -365,8 +417,8 @@ smp_callin (void) * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls * local_bh_enable(), which bugs out if irqs are not enabled... */ - Dprintk("Going to syncup ITC with BP.\n"); - ia64_sync_itc(0); + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); } /* @@ -635,6 +687,47 @@ remove_siblinginfo(int cpu) } extern void fixup_irqs(void); + +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + irq_desc_t *desc = NULL; + cpumask_t mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. + */ + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + desc = irq_descp(ia64_cpe_irq); + /* + * Switch for now, immediatly, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (desc) { + desc->handler->disable(ia64_cpe_irq); + desc->handler->set_affinity(ia64_cpe_irq, mask); + desc->handler->enable(ia64_cpe_irq); + printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!desc) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; +} + /* must be called with cpucontrol mutex held */ int __cpu_disable(void) { @@ -643,8 +736,17 @@ int __cpu_disable(void) /* * dont permit boot processor for now */ - if (cpu == 0) - return -EBUSY; + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + cpu_clear(cpu, cpu_online_map); + + if (migrate_platform_irqs(cpu)) { + cpu_set(cpu, cpu_online_map); + return (-EBUSY); + } remove_siblinginfo(cpu); cpu_clear(cpu, cpu_online_map); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 307d01e15b2..ac167436e93 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -32,7 +32,7 @@ extern unsigned long wall_jiffies; -#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ @@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == TIME_KEEPER_ID) { + if (smp_processor_id() == time_keeper_id) { /* * Here we are in the timer irq handler. We have irqs locally * disabled, but we don't know if the timer_bh is running on @@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = { .name = "timer" }; +void __devinit ia64_disable_timer(void) +{ + ia64_set_itv(1 << 16); +} + void __init time_init (void) { diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 6e5eea19fa6..3b6fd798c4d 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,7 +36,7 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ -#ifdef CONFIG_ACPI +#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* * If CPEI cannot be re-targetted, and this is * CPEI target, then dont create the control file |