diff options
Diffstat (limited to 'arch/x86_64/kernel/process.c')
-rw-r--r-- | arch/x86_64/kernel/process.c | 110 |
1 files changed, 71 insertions, 39 deletions
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index bb6745d13b8..458006ae19f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -80,25 +80,25 @@ void idle_notifier_unregister(struct notifier_block *n) } EXPORT_SYMBOL(idle_notifier_unregister); -enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; - void enter_idle(void) { - __get_cpu_var(idle_state) = CPU_IDLE; + write_pda(isidle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - __get_cpu_var(idle_state) = CPU_NOT_IDLE; + if (read_pda(isidle) == 0) + return; + write_pda(isidle, 0); atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } /* Called from interrupts to signify idle end */ void exit_idle(void) { - if (current->pid | read_pda(irqcount)) + /* idle loop has pid 0 */ + if (current->pid) return; __exit_idle(); } @@ -220,6 +220,9 @@ void cpu_idle (void) play_dead(); enter_idle(); idle(); + /* In many cases the interrupt that ended idle + has already called exit_idle. But some idle + loops can be woken up without interrupt. */ __exit_idle(); } @@ -350,6 +353,7 @@ void exit_thread(void) kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); /* * Careful, clear this in the TSS too: */ @@ -369,6 +373,7 @@ void flush_thread(void) if (t->flags & _TIF_IA32) current_thread_info()->status |= TS_COMPAT; } + t->flags &= ~_TIF_DEBUG; tsk->thread.debugreg0 = 0; tsk->thread.debugreg1 = 0; @@ -461,7 +466,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); - if (unlikely(me->thread.io_bitmap_ptr != NULL)) { + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; @@ -469,6 +474,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, } memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); + set_tsk_thread_flag(p, TIF_IO_BITMAP); } /* @@ -498,6 +504,40 @@ out: */ #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) +static inline void __switch_to_xtra(struct task_struct *prev_p, + struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + + prev = &prev_p->thread, + next = &next_p->thread; + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { + loaddebug(next, 0); + loaddebug(next, 1); + loaddebug(next, 2); + loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + loaddebug(next, 7); + } + + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + /* + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: + */ + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + /* + * Clear any possible leftover bits: + */ + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } +} + /* * switch_to(x,y) should switch tasks from x to y. * @@ -515,6 +555,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter>5) + prefetch(&next->i387.fxsave); + /* * Reload esp0, LDT and the page table pointer: */ @@ -583,41 +627,29 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) And the AMD workaround requires it to be after DS reload. */ unlazy_fpu(prev_p); write_pda(kernelstack, - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); - + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); +#ifdef CONFIG_CC_STACKPROTECTOR + write_pda(stack_canary, next_p->stack_canary); /* - * Now maybe reload the debug registers + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement */ - if (unlikely(next->debugreg7)) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); - /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); - } - + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); +#endif - /* - * Handle the IO bitmap - */ - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { - if (next->io_bitmap_ptr) - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - else { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } - } + /* + * Now maybe reload the debug registers and handle I/O bitmaps + */ + if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) + || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) + __switch_to_xtra(prev_p, next_p, tss); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter>5) + math_state_restore(); return prev_p; } @@ -834,7 +866,7 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) unsigned long arch_align_stack(unsigned long sp) { - if (randomize_va_space) + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } |