From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: x86: change flush_tlb_others to take a const struct cpumask Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/xen/enlighten.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..965539ec425 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr) preempt_enable(); } -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) { struct { struct mmuext_op op; - cpumask_t mask; + DECLARE_BITMAP(mask, NR_CPUS); } *args; - cpumask_t cpumask = *cpus; struct multicall_space mcs; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpus)); BUG_ON(!mm); - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); +issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } -- cgit v1.2.3-70-g09d2 From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 8 +++ arch/x86/include/asm/smp.h | 2 - arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/kernel/cpu/common.c | 6 +-- arch/x86/kernel/head64.c | 8 ++- arch/x86/kernel/head_64.S | 15 ++++-- arch/x86/kernel/setup_percpu.c | 107 ++++++++++++++++---------------------- arch/x86/kernel/smpboot.c | 60 +-------------------- arch/x86/kernel/vmlinux_64.lds.S | 6 ++- arch/x86/xen/smp.c | 10 ---- include/asm-generic/vmlinux.lds.h | 25 ++++++++- 11 files changed, 104 insertions(+), 144 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index df644f3e53e..0ed77cf33f7 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,6 +1,14 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif +#endif + #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a8cea7b0943..127415402ea 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,8 +19,6 @@ #include #include -extern int __cpuinit get_local_pda(int cpu); - extern int smp_num_siblings; extern unsigned int num_processors; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edb..f8d1b047ef4 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,7 @@ int main(void) ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(data_offset); + DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c116c599326..7041acdf557 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu) /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); + + load_pda_offset(cpu); pda->cpunumber = cpu; pda->irqcount = -1; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 462d0beccb6..1a311293f73 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,12 +26,18 @@ #include #include -/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +#ifndef CONFIG_SMP +/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ struct x8664_pda _boot_cpu_pda; +#endif void __init x86_64_init_pda(void) { +#ifdef CONFIG_SMP + cpu_pda(0) = (void *)__per_cpu_load; +#else cpu_pda(0) = &_boot_cpu_pda; +#endif cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f0ab008988..7a995d0e9f7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -245,10 +245,13 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * %gs should point to the pda. For initial boot, make %gs point - * to the _boot_cpu_pda in data section. For a secondary CPU, - * initial_gs should be set to its pda address before the CPU runs - * this code. + * On SMP, %gs should point to the per-cpu area. For initial + * boot, make %gs point to the init data section. For a + * secondary CPU,initial_gs should be set to its pda address + * before the CPU runs this code. + * + * On UP, initial_gs points to _boot_cpu_pda and doesn't + * change. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -278,7 +281,11 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) +#ifdef CONFIG_SMP + .quad __per_cpu_load +#else .quad _boot_cpu_pda +#endif __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 73ab01b297c..63d46280227 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS @@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +#ifdef CONFIG_X86_64 +void __cpuinit load_pda_offset(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); + mb(); +} + +#endif /* CONFIG_SMP && CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 + +/* correctly size the local cpu masks */ +static void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + +#else /* CONFIG_X86_32 */ + +static inline void setup_cpu_local_masks(void) +{ +} + +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the @@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void) */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long asize = size * (nr_cpu_ids - 1); - - pda = alloc_bootmem(asize); - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - continue; - } - cpu_pda(cpu) = (struct x8664_pda *)pda; - cpu_pda(cpu)->in_bootmem = 1; - pda += size; - } -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ +#endif /* * Great future plan: @@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); +#ifdef CONFIG_X86_64 + cpu_pda(cpu) = (void *)ptr; + + /* + * CPU0 modified pda in the init data area, reload pda + * offset for CPU0 and clear the area for others. + */ + if (cpu == 0) + load_pda_offset(0); + else + memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); +#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 70d846628bb..f2f77ca494d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -931,9 +875,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index f50280db0df..962f21f1d4d 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include +#include #include #undef i386 /* in case the preprocessor is a 32bit one */ @@ -215,10 +216,11 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. + * switch it back to data.init. Also, pda should be at the head of + * percpu area. Preallocate it. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) + PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) #else PERCPU(PAGE_SIZE) #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e2069c7c..83fa4236477 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -283,16 +283,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - #ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fc2f55f2dcd..e53319cf29c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -441,9 +441,10 @@ . = __per_cpu_load + SIZEOF(.data.percpu); /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) + * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -455,11 +456,33 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * + * If @prealloc is non-zero, the specified number of bytes will be + * reserved at the start of percpu area. As the prealloc area is + * likely to break alignment, this macro puts areas in increasing + * alignment order. + * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ +#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ + PERCPU_PROLOG(vaddr) \ + . += prealloc; \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + *(.data.percpu.page_aligned) \ + PERCPU_EPILOG(segment) + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. Mostly + * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than + * using slighly different layout. + */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ -- cgit v1.2.3-70-g09d2 From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: x86: misc clean up after the percpu update Do the following cleanups: * kill x86_64_init_pda() which now is equivalent to pda_init() * use per_cpu_offset() instead of cpu_pda() when initializing initial_gs Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/setup.h | 1 - arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head64.c | 7 +------ arch/x86/kernel/smpboot.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..536949749bc 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 9ff67f8dc2c..4abff454c55 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,7 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); - initial_gs = (unsigned long)cpu_pda(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 71b6f6ec96a..af67d3227ea 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,11 +26,6 @@ #include #include -void __init x86_64_init_pda(void) -{ - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -96,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f2f77ca494d..2f0e0f1090f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,7 +798,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - initial_gs = (unsigned long)cpu_pda(cpu); + initial_gs = per_cpu_offset(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 965539ec425..312414ef936 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - x86_64_init_pda(); + pda_init(0); #endif xen_smp_init(); -- cgit v1.2.3-70-g09d2 From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/irq_regs_32.h | 4 +-- arch/x86/include/asm/mmu_context_32.h | 12 ++++---- arch/x86/include/asm/pda.h | 10 +++---- arch/x86/include/asm/percpu.h | 24 ++++++++-------- arch/x86/include/asm/smp.h | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/tlb_32.c | 10 +++---- arch/x86/mach-voyager/voyager_smp.c | 4 +-- arch/x86/xen/enlighten.c | 14 +++++----- arch/x86/xen/irq.c | 8 +++--- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/multicalls.h | 2 +- arch/x86/xen/smp.c | 2 +- include/asm-generic/percpu.h | 52 +++++++++++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 48 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d67..0728480f5c5 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -10,7 +10,7 @@ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return x86_read_percpu(current_task); + return percpu_read(current_task); } #else /* X86_32 */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd747345..d7ed33ee94e 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h @@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return x86_read_percpu(irq_regs); + return percpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); + percpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0..08b53454f83 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h @@ -4,8 +4,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e3d3a081d79..47f274fe695 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,11 +45,11 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -#define read_pda(field) x86_read_percpu(__pda.field) -#define write_pda(field, val) x86_write_percpu(__pda.field, val) -#define add_pda(field, val) x86_add_percpu(__pda.field, val) -#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) -#define or_pda(field, val) x86_or_percpu(__pda.field, val) +#define read_pda(field) percpu_read(__pda.field) +#define write_pda(field, val) percpu_write(__pda.field, val) +#define add_pda(field, val) percpu_add(__pda.field, val) +#define sub_pda(field, val) percpu_sub(__pda.field, val) +#define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d..03aa4b00a1c 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -40,16 +40,11 @@ #ifdef CONFIG_SMP #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" -#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#define __my_cpu_offset percpu_read(this_cpu_off) #else #define __percpu_seg_str #endif -#include - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); - /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); @@ -115,11 +110,13 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ @@ -131,6 +128,11 @@ do { \ old__; \ }) +#include + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 127415402ea..c7bbbbe65d3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b..77d546817d9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e3..e65449d0f7d 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); */ void leave_mm(int cpu) { - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); + BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { + if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 1a48368acb0..96f15b09a4c 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -402,7 +402,7 @@ void __init find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); + percpu_write(cpu_number, boot_cpu_id); } /* @@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); + percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 312414ef936..75b94139e1f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0) static void xen_write_cr2(unsigned long cr2) { - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; + percpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return x86_read_percpu(xen_vcpu)->arch.cr2; + return percpu_read(xen_vcpu)->arch.cr2; } static unsigned long xen_read_cr2_direct(void) { - return x86_read_percpu(xen_vcpu_info.arch.cr2); + return percpu_read(xen_vcpu_info.arch.cr2); } static void xen_write_cr4(unsigned long cr4) @@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4) static unsigned long xen_read_cr3(void) { - return x86_read_percpu(xen_cr3); + return percpu_read(xen_cr3); } static void set_current_cr3(void *v) { - x86_write_percpu(xen_current_cr3, (unsigned long)v); + percpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); if (kernel) { - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index bb042608c60..2e8271431e1 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -83,7 +83,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } @@ -96,7 +96,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240e26c..7bc7852cc5c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { load_cr3(swapper_pg_dir); arch_flush_lazy_cpu_mode(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 85893824161..e786fa7f261 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); + local_irq_restore(percpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 83fa4236477..3bfd6dd0b47 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); cpu_set(cpu, cpu_online_map); - x86_write_percpu(cpu_state, CPU_ONLINE); + percpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672eb..00f45ff081a 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ __typeof__(type) per_cpu_var(name) +/* + * Optional methods for optimized non-lvalue per-cpu variable access. + * + * @var can be a percpu variable or a field of it and its size should + * equal char, int or long. percpu_read() evaluates to a lvalue and + * all others to void. + * + * These operations are guaranteed to be atomic w.r.t. preemption. + * The generic versions use plain get/put_cpu_var(). Archs are + * encouraged to implement single-instruction alternatives which don't + * require preemption protection. + */ +#ifndef percpu_read +# define percpu_read(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp_var__; \ + __tmp_var__ = get_cpu_var(var); \ + put_cpu_var(var); \ + __tmp_var__; \ + }) +#endif + +#define __percpu_generic_to_op(var, val, op) \ +do { \ + get_cpu_var(var) op val; \ + put_cpu_var(var); \ +} while (0) + +#ifndef percpu_write +# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =) +#endif + +#ifndef percpu_add +# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=) +#endif + +#ifndef percpu_sub +# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=) +#endif + +#ifndef percpu_and +# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=) +#endif + +#ifndef percpu_or +# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=) +#endif + +#ifndef percpu_xor +# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=) +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ -- cgit v1.2.3-70-g09d2 From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++----- arch/x86/include/asm/pda.h | 10 ---------- arch/x86/kernel/irq.c | 6 +----- arch/x86/kernel/irq_64.c | 3 +++ arch/x86/kernel/nmi.c | 10 +--------- arch/x86/xen/smp.c | 18 +++--------------- 6 files changed, 27 insertions(+), 44 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d5670..a65bab20f6c 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,36 @@ #include #include -#include #include +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 47f274fe695..69a40757e21 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,19 +25,9 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f8..8b30d0c2512 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b21cb1ea11..1db05247b47 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7..23b6d9e6e4f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3bfd6dd0b47..9ff3b0999cf 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; -- cgit v1.2.3-70-g09d2 From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context_64.h | 16 +++++++--------- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/tlbflush.h | 7 ++----- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/tlb_32.c | 12 ++---------- arch/x86/kernel/tlb_64.c | 13 ++++++++----- arch/x86/xen/mmu.c | 6 +----- 7 files changed, 20 insertions(+), 38 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e9540..c4572505ab3 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -1,13 +1,11 @@ #ifndef _ASM_X86_MMU_CONTEXT_64_H #define _ASM_X86_MMU_CONTEXT_64_H -#include - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 69a40757e21..8ee835ed10e 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,9 +25,7 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - short mmu_state; short isidle; - struct mm_struct *active_mm; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 17feaa9c7e7..d3539f998f8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c49498d4083..3d0cc6f1711 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu) pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e65449d0f7d..abf0808d6fc 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include @@ -231,14 +231,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - static int init_flush_cpumask(void) { alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7f4141d3b66..e64a32c4882 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include /* * Smarter SMP flushing macros. @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7bc7852cc5c..98cb9869eb2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = percpu_read(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); -- cgit v1.2.3-70-g09d2 From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 24 +++--------------------- arch/x86/include/asm/pda.h | 4 ++-- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 5 +---- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/process_64.c | 5 ++++- arch/x86/kernel/smpboot.c | 3 +-- arch/x86/xen/smp.c | 3 +-- 9 files changed, 15 insertions(+), 36 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0728480f5c5..c68c361697e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include #include +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return percpu_read(current_task); -} - -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include - -struct task_struct; static __always_inline struct task_struct *get_current(void) { - return read_pda(pcurrent); + return percpu_read(current_task); } -#else /* __ASSEMBLY__ */ - -#include -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 668d5a5b6f7..7209302d922 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,8 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long dummy; + unsigned long unused1; + unsigned long unused2; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1..4399aac680e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -106,7 +106,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cae6697c099..4f7a210e1e5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -51,7 +51,6 @@ int main(void) #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) ENTRY(kernelstack); ENTRY(oldrsp); - ENTRY(pcurrent); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4221e920886..b50e38d1688 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu) pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - } else { + if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 28e26a4315d..d35db5993fd 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..e00c31a4b3c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -57,6 +57,9 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + percpu_write(current_task, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2f0e0f1090f..5854be0fb80 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; +#ifdef CONFIG_X86_32 init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9ff3b0999cf..72c2eb9b64c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -279,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; + per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 init_gdt(cpu); - per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); -- cgit v1.2.3-70-g09d2 From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move kernelstack from PDA to per-cpu. Also clean up PER_CPU_VAR usage in xen-asm_64.S tj: * remove now unused stack_thread_info() * s/kernelstack/kernel_stack/ * added FIXME comment in xen-asm_64.S Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/ia32/ia32entry.S | 8 ++++---- arch/x86/include/asm/pda.h | 4 +--- arch/x86/include/asm/thread_info.h | 20 ++++++++------------ arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 3 +++ arch/x86/xen/xen-asm_64.S | 23 +++++++++++------------ 9 files changed, 35 insertions(+), 38 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b6189..9c79b247700 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target) CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: @@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 7209302d922..4d28ffba6e1 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -13,7 +13,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; - unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long unused3; unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ @@ -44,6 +44,4 @@ extern void pda_init(int); #endif -#define PDA_STACKOFFSET (5*8) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa..b46f8ca007b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include +#include +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} +DECLARE_PER_CPU(unsigned long, kernel_stack); -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) +static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4f7a210e1e5..cafff5f4a03 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b50e38d1688..06b6290088f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; #endif +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d22677a6643..0dd45859a7a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -468,7 +468,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -480,7 +480,7 @@ ENTRY(system_call) ENTRY(system_call_after_swapgs) movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e00c31a4b3c..6c5f5760210 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(oldrsp, next->usersp); percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5854be0fb80..869b98840fd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,6 +798,9 @@ do_rest: #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 05794c566e8..5a23e899367 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -28,12 +29,10 @@ #if 1 /* - x86-64 does not yet support direct access to percpu variables - via a segment override, so we just need to make sure this code - never gets used + FIXME: x86_64 now can support direct access to percpu variables + via a segment override. Update xen accordingly. */ #define BUG ud2a -#define PER_CPU_VAR(var, off) 0xdeadbeef #endif /* @@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct) BUG /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events @@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct) ENTRY(xen_irq_disable_direct) BUG - movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask ENDPATCH(xen_irq_disable_direct) ret ENDPROC(xen_irq_disable_direct) @@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct) ENTRY(xen_save_fl_direct) BUG - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah,%ah ENDPATCH(xen_save_fl_direct) @@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct) BUG testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events 1: @@ -196,7 +195,7 @@ ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS pushq %gs:pda_oldrsp @@ -213,7 +212,7 @@ ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack, %rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS pushq %gs:pda_oldrsp -- cgit v1.2.3-70-g09d2 From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Move oldrsp from PDA to per-cpu. tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda is still referenced in the file * s/oldrsp/old_rsp/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 10 +++++----- arch/x86/kernel/process_64.c | 8 +++++--- arch/x86/xen/xen-asm_64.S | 8 ++++---- 5 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4d28ffba6e1..ae23deb9955 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -14,7 +14,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; unsigned long unused3; - unsigned long oldrsp; /* 24 user rsp for system call */ + unsigned long unused4; int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cafff5f4a03..afda6deb851 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0dd45859a7a..7c27da407da 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -479,7 +479,7 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp + movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight @@ -523,7 +523,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -833,7 +833,7 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c5f5760210..48012891892 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(unsigned long, old_rsp); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); percpu_write(kernel_stack, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5a23e899367..d6fc51f4ce8 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -194,11 +194,11 @@ RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER_CS pushq %rcx @@ -211,11 +211,11 @@ RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER32_CS pushq %rcx -- cgit v1.2.3-70-g09d2 From 8ce031972b40da58c268caba8c5ea3c0856d7131 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: x86: remove pda_init() Impact: cleanup Copy the code to cpu_init() to satisfy the requirement that the cpu be reinitialized. Remove all other calls, since the segments are already initialized in head_64.S. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/kernel/cpu/common.c | 15 +++------------ arch/x86/kernel/head64.c | 2 -- arch/x86/xen/enlighten.c | 1 - 4 files changed, 3 insertions(+), 16 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4a8c9d382c9..b473e952439 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -24,7 +24,6 @@ struct x8664_pda { } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); -extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7976a6a0f65..f83a4d6160f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; -void __cpuinit pda_init(int cpu) -{ - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - - load_pda_offset(cpu); -} - static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); @@ -967,9 +958,9 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); + loadsegment(fs, 0); + loadsegment(gs, 0); + load_pda_offset(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index af67d3227ea..f5b27224769 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - pda_init(0); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75b94139e1f..bef941f6145 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - pda_init(0); #endif xen_smp_init(); -- cgit v1.2.3-70-g09d2 From ab897d2013128f470240a541b31cf5e636984e71 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:16 -0800 Subject: x86/pvops: remove pte_flags pvop pte_flags() was introduced as a new pvop in order to extract just the flags portion of a pte, which is a potentially cheaper operation than extracting the page number as well. It turns out this operation is not needed, because simply using a mask to extract the flags from a pte is sufficient for all current users. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 3 +-- arch/x86/include/asm/paravirt.h | 18 ------------------ arch/x86/kernel/paravirt.c | 1 - arch/x86/xen/enlighten.c | 1 - 4 files changed, 1 insertion(+), 22 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..6b9810859da 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t native_pte_flags(pte_t pte) +static inline pteval_t pte_flags(pte_t pte) { return native_pte_val(pte) & PTE_FLAGS_MASK; } @@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..e25c410f3d8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -279,7 +279,6 @@ struct pv_mmu_ops { pte_t *ptep, pte_t pte); pteval_t (*pte_val)(pte_t); - pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -1084,23 +1083,6 @@ static inline pteval_t pte_val(pte_t pte) return ret; } -static inline pteval_t pte_flags(pte_t pte) -{ - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, - pte.pte); - -#ifdef CONFIG_PARAVIRT_DEBUG - BUG_ON(ret & PTE_PFN_MASK); -#endif - return ret; -} - static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..202514be592 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, - .pte_flags = native_pte_flags, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..6f1bb71aa13 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1314,7 +1314,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, - .pte_flags = native_pte_flags, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, -- cgit v1.2.3-70-g09d2 From 99d0000f710f3432182761f65f9658f1cf0bf455 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 23 Jan 2009 11:09:15 +0100 Subject: x86, xen: fix hardirq.h merge fallout Impact: build fix This build error: arch/x86/xen/suspend.c:22: error: implicit declaration of function 'fix_to_virt' arch/x86/xen/suspend.c:22: error: 'FIX_PARAVIRT_BOOTMAP' undeclared (first use in this function) arch/x86/xen/suspend.c:22: error: (Each undeclared identifier is reported only once arch/x86/xen/suspend.c:22: error: for each function it appears in.) triggers because the hardirq.h unification removed an implicit fixmap.h include - on which arch/x86/xen/suspend.c depended. Add the fixmap.h include explicitly. Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 212ffe012b7..95be7b43472 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -6,6 +6,7 @@ #include #include +#include #include "xen-ops.h" #include "mmu.h" -- cgit v1.2.3-70-g09d2 From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup_percpu.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 4 ---- arch/x86/kernel/smpcommon.c | 25 ------------------------- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/xen/smp.c | 1 - 7 files changed, 15 insertions(+), 35 deletions(-) delete mode 100644 arch/x86/kernel/smpcommon.c (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 48676b943b9..32c30b02b51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -778,7 +778,6 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73de055c29c..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,8 +60,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 599dc1cc1da..bcca3a7b374 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; + + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif +} + /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index def770b57b5..f9dbcff4354 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) do_rest: per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else @@ -1186,9 +1185,6 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif switch_to_new_gdt(); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 5ec29a1a846..00000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -#include - -#ifdef CONFIG_X86_32 -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -} -#endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index dd82f2052f3..331cd6d5648 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.sp = (void *)idle->thread.sp; - init_gdt(cpu); per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - init_gdt(smp_processor_id()); switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72c2eb9b64c..7735e3dd359 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); -- cgit v1.2.3-70-g09d2 From 319f3ba52c71630865b10ac3b99dd020440d681d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:01 -0800 Subject: xen: move remaining mmu-related stuff into mmu.c Impact: Cleanup Move remaining mmu-related stuff into mmu.c. A general cleanup, and lay the groundwork for later patches. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 731 +---------------------------------------------- arch/x86/xen/mmu.c | 700 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/mmu.h | 3 + arch/x86/xen/xen-ops.h | 8 + 4 files changed, 712 insertions(+), 730 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6b3f7eef57e..0cd2a165f17 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -61,35 +61,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -237,7 +208,7 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy(void) +void xen_leave_lazy(void) { paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); @@ -598,76 +569,6 @@ static struct apic_ops xen_basic_apic_ops = { #endif -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long va) -{ - struct { - struct mmuext_op op; - DECLARE_BITMAP(mask, NR_CPUS); - } *args; - struct multicall_space mcs; - - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->op.arg2.vcpumask = to_cpumask(args->mask); - - /* Remove us, and any offline CPUS. */ - cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - if (unlikely(cpumask_empty(to_cpumask(args->mask)))) - goto issue; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - -issue: - xen_mc_issue(PARAVIRT_LAZY_MMU); -} static void xen_clts(void) { @@ -693,21 +594,6 @@ static void xen_write_cr0(unsigned long cr0) xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_write_cr2(unsigned long cr2) -{ - percpu_read(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return percpu_read(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return percpu_read(xen_vcpu_info.arch.cr2); -} - static void xen_write_cr4(unsigned long cr4) { cr4 &= ~X86_CR4_PGE; @@ -716,71 +602,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static unsigned long xen_read_cr3(void) -{ - return percpu_read(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - percpu_write(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op *op; - struct multicall_space mcs; - unsigned long mfn; - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - if (kernel) { - percpu_write(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - percpu_write(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -822,185 +643,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) { - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -#ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} -#endif - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1021,37 +663,6 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); -} - -static __init void xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -1126,49 +737,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: -#endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif - pte = pfn_pte(phys, prot); - break; - - default: - pte = mfn_pte(phys, prot); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1264,86 +832,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { #endif }; -static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, - .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else - .set_pte = xen_set_pte_init, -#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, - -#if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, - }, - - .set_fixmap = xen_set_fixmap, -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1386,223 +874,6 @@ static const struct machine_ops __initdata xen_machine_ops = { }; -static void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - -/* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. - */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) -{ - phys_addr_t paddr; - - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; - - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); -} - -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the inital kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pud_t *l3; - pmd_t *l2; - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - /* Switch over */ - pgd = init_level4_pgt; - - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - reserve_early(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; - -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - - return swapper_pg_dir; -} -#endif /* CONFIG_X86_64 */ - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 98cb9869eb2..94e452c0b00 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,8 @@ #include #include +#include +#include #include "multicalls.h" #include "mmu.h" @@ -114,6 +117,37 @@ static inline void check_zero(void) #endif /* CONFIG_XEN_DEBUG_FS */ + +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ + + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -1152,6 +1186,672 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } +static __init void xen_pagetable_setup_start(pgd_t *base) +{ +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + xen_setup_shared_info(); +} + +static void xen_write_cr2(unsigned long cr2) +{ + percpu_read(xen_vcpu)->arch.cr2 = cr2; +} + +static unsigned long xen_read_cr2(void) +{ + return percpu_read(xen_vcpu)->arch.cr2; +} + +unsigned long xen_read_cr2_direct(void) +{ + return percpu_read(xen_vcpu_info.arch.cr2); +} + +static void xen_flush_tlb(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_LOCAL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + op = mcs.args; + op->cmd = MMUEXT_INVLPG_LOCAL; + op->arg1.linear_addr = addr & PAGE_MASK; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) +{ + struct { + struct mmuext_op op; + DECLARE_BITMAP(mask, NR_CPUS); + } *args; + struct multicall_space mcs; + + BUG_ON(cpumask_empty(cpus)); + BUG_ON(!mm); + + mcs = xen_mc_entry(sizeof(*args)); + args = mcs.args; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; + + if (va == TLB_FLUSH_ALL) { + args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; + } else { + args->op.cmd = MMUEXT_INVLPG_MULTI; + args->op.arg1.linear_addr = va; + } + + MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + +issue: + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static unsigned long xen_read_cr3(void) +{ + return percpu_read(xen_cr3); +} + +static void set_current_cr3(void *v) +{ + percpu_write(xen_current_cr3, (unsigned long)v); +} + +static void __xen_write_cr3(bool kernel, unsigned long cr3) +{ + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn; + + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; + + WARN_ON(mfn == 0 && kernel); + + mcs = __xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + if (kernel) { + percpu_write(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + percpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ +} + +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + + BUG_ON(page->private != 0); + + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + + +/* Early in boot, while setting up the initial pagetable, assume + everything is pinned. */ +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +/* Early release_pte assumes that all pts are pinned, since there's + only init_mm and anything attached to that is pinned. */ +static void xen_release_pte_init(unsigned long pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +/* This needs to make sure the new pte page is pinned iff its being + attached to a pinned pagetable. */ +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(virt_to_page(mm->pgd))) { + SetPagePinned(page); + + vm_unmap_aliases(); + if (!PageHighMem(page)) { + make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else { + /* make sure there are no stray mappings of + this page */ + kmap_flush_unused(); + } + } +} + +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PTE); +} + +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PMD); +} + +/* This should never happen until we're OK to use struct page */ +static void xen_release_ptpage(unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(page)) { + if (!PageHighMem(page)) { + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } + ClearPagePinned(page); + } +} + +static void xen_release_pte(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pmd(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + +void __init xen_reserve_top(void) +{ +#ifdef CONFIG_X86_32 + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top); +#endif /* CONFIG_X86_32 */ +} + +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ +#ifdef CONFIG_X86_64 + return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif +} + +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_PFN_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + /* Reuse or allocate a page of ptes */ + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + } + + /* Install mappings */ + for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for (i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Set up identity map */ + xen_map_identity_early(level2_ident_pgt, max_pfn); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + + return pgd; +} +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pmd_t *kernel_pmd; + + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; +} +#endif /* CONFIG_X86_64 */ + +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif +#ifdef CONFIG_X86_32 + case FIX_WP_TEST: + case FIX_VDSO: +# ifdef CONFIG_HIGHMEM + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif +} + +__init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pte = xen_set_pte; + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif + + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif + xen_mark_init_mm_pinned(); +} + + +const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, + + .alloc_pte = xen_alloc_pte_init, + .release_pte = xen_release_pte_init, + .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd_clone = paravirt_nop, + .release_pmd = xen_release_pte_init, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = xen_kmap_atomic_pte, +#endif + +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else + .set_pte = xen_set_pte_init, +#endif + .set_pte_at = xen_set_pte_at, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + + .pte_val = xen_pte_val, + .pgd_val = xen_pgd_val, + + .make_pte = xen_make_pte, + .make_pgd = xen_make_pgd, + +#ifdef CONFIG_X86_PAE + .set_pte_atomic = xen_set_pte_atomic, + .set_pte_present = xen_set_pte_at, + .pte_clear = xen_pte_clear, + .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, + + .make_pmd = xen_make_pmd, + .pmd_val = xen_pmd_val, + +#if PAGETABLE_LEVELS == 4 + .pud_val = xen_pud_val, + .make_pud = xen_make_pud, + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + + .activate_mm = xen_activate_mm, + .dup_mmap = xen_dup_mmap, + .exit_mmap = xen_exit_mmap, + + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, + + .set_fixmap = xen_set_fixmap, +}; + + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 98d71659da5..24d1b44a337 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +unsigned long xen_read_cr2_direct(void); + +extern const struct pv_mmu_ops xen_mmu_ops; #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c1f8faf0a2c..11913fc94c1 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -13,6 +13,7 @@ extern const char xen_failsafe_callback[]; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); +DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -22,6 +23,13 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_setup_machphys_mapping(void); +pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_ident_map_ISA(void); +void xen_reserve_top(void); + +void xen_leave_lazy(void); +void xen_post_allocator_init(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); -- cgit v1.2.3-70-g09d2 From ecb93d1ccd0aac63f03be2db3cac3fa974716f4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:05 -0800 Subject: x86/paravirt: add register-saving thunks to reduce caller register pressure Impact: Optimization One of the problems with inserting a pile of C calls where previously there were none is that the register pressure is greatly increased. The C calling convention says that the caller must expect a certain set of registers may be trashed by the callee, and that the callee can use those registers without restriction. This includes the function argument registers, and several others. This patch seeks to alleviate this pressure by introducing wrapper thunks that will do the register saving/restoring, so that the callsite doesn't need to worry about it, but the callee function can be conventional compiler-generated code. In many cases (particularly performance-sensitive cases) the callee will be in assembler anyway, and need not use the compiler's calling convention. Standard calling convention is: arguments return scratch x86-32 eax edx ecx eax ? x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11 The thunk preserves all argument and scratch registers. The return register is not preserved, and is available as a scratch register for unwrapped callee code (and of course the return value). Wrapped function pointers are themselves wrapped in a struct paravirt_callee_save structure, in order to get some warning from the compiler when functions with mismatched calling conventions are used. The most common paravirt ops, both statically and dynamically, are interrupt enable/disable/save/restore, so handle them first. This is particularly easy since their calls are handled specially anyway. XXX Deal with VMI. What's their calling convention? Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 126 +++++++++++++++++++++++++++++++--------- arch/x86/kernel/paravirt.c | 8 +-- arch/x86/kernel/vsmp_64.c | 12 ++-- arch/x86/lguest/boot.c | 13 +++-- arch/x86/xen/enlighten.c | 8 +-- arch/x86/xen/irq.c | 14 +++-- 6 files changed, 132 insertions(+), 49 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dcce961262b..f9107b88631 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,6 +17,10 @@ #ifdef CONFIG_X86_32 /* CLBR_ANY should match all regs platform has. For i386, that's just it */ #define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX #define CLBR_RCX CLBR_ECX @@ -27,16 +31,19 @@ #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) + #define CLBR_ANY ((1 << 9) - 1) #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) +#define CLBR_RET_REG (CLBR_RAX) #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) #include #endif /* X86_64 */ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + #ifndef __ASSEMBLY__ #include #include @@ -50,6 +57,14 @@ struct tss_struct; struct mm_struct; struct desc_struct; +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -199,11 +214,15 @@ struct pv_irq_ops { * expected to use X86_EFLAGS_IF; all other bits * returned from save_fl are undefined, and may be ignored by * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); void (*halt)(void); @@ -1437,12 +1456,37 @@ extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" +#define PV_RESTORE_REGS "popl %edx; popl %ecx;" + +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS +#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS + #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" @@ -1452,52 +1496,76 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_FLAGS_ARG "D" #endif +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + extern typeof(func) __raw_callee_save_##func; \ + static void *__##func##__ __used = func; \ + \ + asm(".pushsection .text;" \ + "__raw_callee_save_" #func ": " \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + "ret;" \ + ".popsection") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_VEXTRA_CLOBBERS); + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_EXTRA_CLOBBERS); + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) @@ -1541,9 +1609,9 @@ static inline unsigned long __raw_local_irq_save(void) #define COND_PUSH(set, mask, reg) \ - .if ((~set) & mask); push %reg; .endif + .if ((~(set)) & mask); push %reg; .endif #define COND_POP(set, mask, reg) \ - .if ((~set) & mask); pop %reg; .endif + .if ((~(set)) & mask); pop %reg; .endif #ifdef CONFIG_X86_64 @@ -1594,15 +1662,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dd25e2b1593..8adb6b5aa42 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -310,10 +310,10 @@ struct pv_time_ops pv_time_ops = { struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, + .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec..c609205df59 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) flags &= ~X86_EFLAGS_IF; return flags; } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); static void vsmp_restore_fl(unsigned long flags) { @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) flags |= X86_EFLAGS_AC; native_restore_fl(flags); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); static void vsmp_irq_disable(void) { @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); static void vsmp_irq_enable(void) { @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) cap, ctl); if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; ctl &= ~(1 << 4); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92f1c6f3e19..19e33b6cd59 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -173,24 +173,29 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } +PV_CALLEE_SAVE_REGS_THUNK(save_fl); /* restore_flags() just sets the flags back to the value given. */ static void restore_fl(unsigned long flags) { lguest_data.irq_enabled = flags; } +PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } +PV_CALLEE_SAVE_REGS_THUNK(irq_disable); /* Interrupts go on... */ static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(irq_enable); + /*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, @@ -984,10 +989,10 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; - pv_irq_ops.save_fl = save_fl; - pv_irq_ops.restore_fl = restore_fl; - pv_irq_ops.irq_disable = irq_disable; - pv_irq_ops.irq_enable = irq_enable; + pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0cd2a165f17..ff6d530ccc7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -676,10 +676,10 @@ void xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - pv_irq_ops.save_fl = xen_save_fl_direct; - pv_irq_ops.restore_fl = xen_restore_fl_direct; - pv_irq_ops.irq_disable = xen_irq_disable_direct; - pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } } diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 2e8271431e1..5a070900ad3 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void) */ return (-flags) & X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); static void xen_restore_fl(unsigned long flags) { @@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags) xen_force_evtchn_callback(); } } +PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); static void xen_irq_disable(void) { @@ -86,6 +88,7 @@ static void xen_irq_disable(void) percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); static void xen_irq_enable(void) { @@ -106,6 +109,7 @@ static void xen_irq_enable(void) if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); static void xen_safe_halt(void) { @@ -124,10 +128,12 @@ static void xen_halt(void) static const struct pv_irq_ops xen_irq_ops __initdata = { .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, + + .save_fl = PV_CALLEE_SAVE(xen_save_fl), + .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), + .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), + .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 -- cgit v1.2.3-70-g09d2 From da5de7c22eb705be709a57e486e7475a6969b994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:07 -0800 Subject: x86/paravirt: use callee-saved convention for pte_val/make_pte/etc Impact: Optimization In the native case, pte_val, make_pte, etc are all just identity functions, so there's no need to clobber a lot of registers over them. (This changes the 32-bit callee-save calling convention to return both EAX and EDX so functions can return 64-bit values.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 78 ++++++++++++++++++++--------------------- arch/x86/kernel/paravirt.c | 53 +++++++--------------------- arch/x86/xen/mmu.c | 24 ++++++++----- 3 files changed, 67 insertions(+), 88 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index beb10ecdbe6..2d098b78bc1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -19,7 +19,7 @@ #define CLBR_ANY ((1 << 4) - 1) #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) #define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX @@ -308,11 +308,11 @@ struct pv_mmu_ops { void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - pteval_t (*pte_val)(pte_t); - pte_t (*make_pte)(pteval_t pte); + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; - pgdval_t (*pgd_val)(pgd_t); - pgd_t (*make_pgd)(pgdval_t pgd); + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE @@ -327,12 +327,12 @@ struct pv_mmu_ops { void (*set_pud)(pud_t *pudp, pud_t pudval); - pmdval_t (*pmd_val)(pmd_t); - pmd_t (*make_pmd)(pmdval_t pmd); + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; #if PAGETABLE_LEVELS == 4 - pudval_t (*pud_val)(pud_t); - pud_t (*make_pud)(pudval_t pud); + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ @@ -1155,13 +1155,13 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, + pv_mmu_ops.make_pte, + val); return (pte_t) { .pte = ret }; } @@ -1171,11 +1171,11 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, - pte.pte, (u64)pte.pte >> 32); + ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); return ret; } @@ -1185,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, + val); return (pgd_t) { ret }; } @@ -1199,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); return ret; } @@ -1267,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, + val); return (pmd_t) { ret }; } @@ -1281,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); return ret; } @@ -1307,11 +1307,11 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, + val); return (pud_t) { ret }; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8adb6b5aa42..cea11c8e304 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -391,43 +391,12 @@ struct pv_apic_ops pv_apic_ops = { #endif }; -typedef pte_t make_pte_t(pteval_t); -typedef pmd_t make_pmd_t(pmdval_t); -typedef pud_t make_pud_t(pudval_t); -typedef pgd_t make_pgd_t(pgdval_t); - -typedef pteval_t pte_val_t(pte_t); -typedef pmdval_t pmd_val_t(pmd_t); -typedef pudval_t pud_val_t(pud_t); -typedef pgdval_t pgd_val_t(pgd_t); - - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) #else /* 64-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif struct pv_mmu_ops pv_mmu_ops = { @@ -481,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = paravirt_native_pmd_val, - .make_pmd = paravirt_native_make_pmd, + + .pmd_val = PTE_IDENT, + .make_pmd = PTE_IDENT, #if PAGETABLE_LEVELS == 4 - .pud_val = paravirt_native_pud_val, - .make_pud = paravirt_native_make_pud, + .pud_val = PTE_IDENT, + .make_pud = PTE_IDENT, + .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = paravirt_native_pte_val, - .pgd_val = paravirt_native_pgd_val, + .pte_val = PTE_IDENT, + .pgd_val = PTE_IDENT, - .make_pte = paravirt_native_make_pte, - .make_pgd = paravirt_native_make_pgd, + .make_pte = PTE_IDENT, + .make_pgd = PTE_IDENT, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 94e452c0b00..5e41f7fc6cf 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -492,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte) { return pte_mfn_to_pfn(pte.pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); pgdval_t xen_pgd_val(pgd_t pgd) { return pte_mfn_to_pfn(pgd.pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); pte_t xen_make_pte(pteval_t pte) { pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); return native_make_pgd(pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); pmdval_t xen_pmd_val(pmd_t pmd) { return pte_mfn_to_pfn(pmd.pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); void xen_set_pud_hyper(pud_t *ptr, pud_t val) { @@ -590,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd) pmd = pte_pfn_to_mfn(pmd); return native_make_pmd(pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); #if PAGETABLE_LEVELS == 4 pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); pud_t xen_make_pud(pudval_t pud) { @@ -603,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud) return native_make_pud(pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); pgd_t *xen_get_user_pgd(pgd_t *pgd) { @@ -1813,11 +1821,11 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, + .pte_val = PV_CALLEE_SAVE(xen_pte_val), + .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, + .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, @@ -1827,12 +1835,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { #endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, + .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), + .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), #if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, + .pud_val = PV_CALLEE_SAVE(xen_pud_val), + .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pte_init, -- cgit v1.2.3-70-g09d2 From 795f99b61d20c34cb04d17d8906b32f745a635ec Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: xen: setup percpu data pointers Impact: fix xen booting We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/xen/enlighten.c | 3 +++ arch/x86/xen/smp.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bef941f6145..fe19c88a502 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1647,6 +1647,9 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif + /* setup percpu state */ + load_percpu_segment(0); + xen_smp_init(); /* Get mfn list */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7735e3dd359..88d5d5ec6be 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,7 +170,8 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(__per_cpu_load + + (unsigned long)&per_cpu_var(gdt_page)); xen_setup_vcpu_info_placement(); } @@ -235,6 +236,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; +#else + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ -- cgit v1.2.3-70-g09d2 From 1f4f931501e9270c156d05ee76b7b872de486304 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:58:06 -0800 Subject: xen: fix 32-bit build resulting from mmu move Moving the mmu code from enlighten.c to mmu.c inadvertently broke the 32-bit build. Fix it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e41f7fc6cf..d2e8ed1aff3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1396,6 +1396,43 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } +#ifdef CONFIG_HIGHPTE +static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) +{ + pgprot_t prot = PAGE_KERNEL; + + if (PagePinned(page)) + prot = PAGE_KERNEL_RO; + + if (0 && PageHighMem(page)) + printk("mapping highpte %lx type %d prot %s\n", + page_to_pfn(page), type, + (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); + + return kmap_atomic_prot(page, type, prot); +} +#endif + +#ifdef CONFIG_X86_32 +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); + + return pte; +} + +/* Init-time set_pte while constructing initial pagetables, which + doesn't allow RO pagetable pages to be remapped RW */ +static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + pte = mask_rw_pte(ptep, pte); + + xen_set_pte(ptep, pte); +} +#endif /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ -- cgit v1.2.3-70-g09d2 From 383414322b3b3ced0cbc146801e0cc6c60a6c5f4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:31 -0800 Subject: xen: setup percpu data pointers We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Also, set the kernel stack when bringing up secondary CPUs. If we don't they all end up sharing the same stack... Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 15 ++++++++++++++- arch/x86/xen/smp.c | 6 ++++-- arch/x86/xen/xen-ops.h | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cd022c43dfb..aed7ceeb4b6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; +void *xen_initial_gdt; + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -917,8 +919,19 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif - /* setup percpu state */ +#ifdef CONFIG_X86_64 + /* + * Setup percpu state. We only need to do this for 64-bit + * because 32-bit already has %fs set properly. + */ load_percpu_segment(0); +#endif + /* + * The only reliable way to retain the initial address of the + * percpu gdt_page is to remember it here, so we can go and + * mark it RW later, when the initial percpu area is freed. + */ + xen_initial_gdt = &per_cpu(gdt_page, 0); xen_smp_init(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 88d5d5ec6be..035582ae815 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,8 +170,7 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(__per_cpu_load + - (unsigned long)&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(xen_initial_gdt); xen_setup_vcpu_info_placement(); } @@ -287,6 +286,9 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif xen_setup_timer(cpu); xen_init_lock_cpu(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 11913fc94c1..2f5ef2632ea 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,6 +10,8 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +extern void *xen_initial_gdt; + struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -- cgit v1.2.3-70-g09d2 From 5393744b71ce797f1b1546fafaed127fc50c2b61 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:42 -0800 Subject: xen: make direct versions of irq_enable/disable/save/restore to common code Now that x86-64 has directly accessible percpu variables, it can also implement the direct versions of these operations, which operate on a vcpu_info structure directly embedded in the percpu area. In fact, the 64-bit versions are more or less identical, and so can be shared. The only two differences are: 1. xen_restore_fl_direct takes its argument in eax on 32-bit, and rdi on 64-bit. Unfortunately it isn't possible to directly refer to the 2nd lsb of rdi directly (as you can with %ah), so the code isn't quite as dense. 2. check_events needs to variants to save different registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/Makefile | 3 +- arch/x86/xen/xen-asm.S | 140 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-asm.h | 12 ++++ arch/x86/xen/xen-asm_32.S | 111 ++++-------------------------------- arch/x86/xen/xen-asm_64.S | 134 +------------------------------------------- 5 files changed, 169 insertions(+), 231 deletions(-) create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-asm.h (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 6dcefba7836..3b767d03fd6 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg endif obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm_$(BITS).o grant-table.o suspend.o + time.o xen-asm.o xen-asm_$(BITS).o \ + grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S new file mode 100644 index 00000000000..4c6f9679913 --- /dev/null +++ b/arch/x86/xen/xen-asm.S @@ -0,0 +1,140 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in percpu data) of + the operations here; the indirect forms are better handled in + C, since they're generally too large to inline anyway. + */ + +#include +#include +#include + +#include "xen-asm.h" + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) +#ifdef CONFIG_X86_64 + testw $X86_EFLAGS_IF, %di +#else + testb $X86_EFLAGS_IF>>8, %ah +#endif + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: +#ifdef CONFIG_X86_32 + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax +#else + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call xen_force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax +#endif + ret + diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h new file mode 100644 index 00000000000..465276467a4 --- /dev/null +++ b/arch/x86/xen/xen-asm.h @@ -0,0 +1,12 @@ +#ifndef _XEN_XEN_ASM_H +#define _XEN_XEN_ASM_H + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 42786f59d9c..082d173caaf 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -11,101 +11,28 @@ generally too large to inline anyway. */ -#include - -#include +//#include #include -#include #include #include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - +#include "xen-asm.h" /* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. + Force an event check by making a hypercall, + but preserve regs before making the call. */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) +check_events: + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* We can't use sysexit directly, because we're not running in ring0. @@ -289,17 +216,3 @@ ENTRY(xen_iret_crit_fixup) lea 4(%edi),%esp /* point esp to new frame */ 2: jmp xen_do_upcall - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d6fc51f4ce8..d205a283efe 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -11,142 +11,14 @@ generally too large to inline anyway. */ -#include - -#include -#include #include -#include #include +#include +#include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#if 1 -/* - FIXME: x86_64 now can support direct access to percpu variables - via a segment override. Update xen accordingly. - */ -#define BUG ud2a -#endif - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - BUG - - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - BUG - - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - BUG - - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - BUG - - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %rax - push %rcx - push %rdx - push %rsi - push %rdi - push %r8 - push %r9 - push %r10 - push %r11 - call xen_force_evtchn_callback - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdi - pop %rsi - pop %rdx - pop %rcx - pop %rax - ret +#include "xen-asm.h" ENTRY(xen_adjust_exception_frame) mov 8+0(%rsp),%rcx -- cgit v1.2.3-70-g09d2 From e4d0407185cdbdcfd99fc23bde2e5454bbc46329 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:54 -0800 Subject: xen: use direct ops on 64-bit Enable the use of the direct vcpu-access operations on 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index aed7ceeb4b6..37230342c2c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -87,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; * * 0: not available, 1: available */ -static int have_vcpu_info_placement = -#ifdef CONFIG_X86_32 - 1 -#else - 0 -#endif - ; - +static int have_vcpu_info_placement = 1; static void xen_vcpu_setup(int cpu) { @@ -914,11 +907,6 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; -#ifdef CONFIG_X86_64 - /* Disable until direct per-cpu data access. */ - have_vcpu_info_placement = 0; -#endif - #ifdef CONFIG_X86_64 /* * Setup percpu state. We only need to do this for 64-bit -- cgit v1.2.3-70-g09d2 From 130ace11a9dc682541336d1fe5cb3bc7771a149e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Feb 2009 00:57:48 +0900 Subject: x86: style cleanups for xen assemblies Make the following style cleanups: * drop unnecessary //#include from xen-asm_32.S * compulsive adding of space after comma * reformat multiline comments Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-asm.S | 78 +++++++-------- arch/x86/xen/xen-asm_32.S | 238 ++++++++++++++++++++++++---------------------- arch/x86/xen/xen-asm_64.S | 107 +++++++++++---------- 3 files changed, 219 insertions(+), 204 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 4c6f9679913..79d7362ad6d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in percpu data) of - the operations here; the indirect forms are better handled in - C, since they're generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in percpu data) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -18,17 +18,19 @@ #include "xen-asm.h" /* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. + * Enable events. This clears the event mask and tests the pending + * event status with one and operation. If there are pending events, + * then enter the hypervisor to get them handled. */ ENTRY(xen_irq_enable_direct) /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* Test for pending */ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct) /* - Disabling events is simply a matter of making the event mask - non-zero. + * Disabling events is simply a matter of making the event mask + * non-zero. */ ENTRY(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask @@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct) RELOC(xen_irq_disable_direct, 0) /* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). + * (xen_)save_fl is used to get the current interrupt enable status. + * Callers expect the status to be in X86_EFLAGS_IF, and other bits + * may be set in the return value. We take advantage of this by + * making sure that X86_EFLAGS_IF has the right value (and other bits + * in that byte are 0), but other bits in the return value are + * undefined. We need to toggle the state of the bit, because Xen and + * x86 use opposite senses (mask vs enable). */ ENTRY(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah - addb %ah,%ah + addb %ah, %ah ENDPATCH(xen_save_fl_direct) ret ENDPROC(xen_save_fl_direct) @@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct) /* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. + * In principle the caller should be passing us a value return from + * xen_save_fl_direct, but for robustness sake we test only the + * X86_EFLAGS_IF flag rather than the whole byte. After setting the + * interrupt mask state, it checks for unmasked pending events and + * enters the hypervisor to get them delivered if so. */ ENTRY(xen_restore_fl_direct) #ifdef CONFIG_X86_64 @@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct) testb $X86_EFLAGS_IF>>8, %ah #endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct) /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: #ifdef CONFIG_X86_32 @@ -137,4 +140,3 @@ check_events: pop %rax #endif ret - diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 082d173caaf..88e15deb8b8 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,17 +1,16 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -//#include #include #include #include @@ -21,8 +20,8 @@ #include "xen-asm.h" /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: push %eax @@ -35,10 +34,10 @@ check_events: ret /* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. + * We can't use sysexit directly, because we're not running in ring0. + * But we can easily fake it up using iret. Assuming xen_sysexit is + * jumped to with a standard stack frame, we can just strip it back to + * a standard iret frame and use iret. */ ENTRY(xen_sysexit) movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ @@ -49,33 +48,31 @@ ENTRY(xen_sysexit) ENDPROC(xen_sysexit) /* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. + * This is run where a normal iret would be run, with the same stack setup: + * 8: eflags + * 4: cs + * esp-> 0: eip + * + * This attempts to make sure that any pending events are dealt with + * on return to usermode, but there is a small window in which an + * event can happen just before entering usermode. If the nested + * interrupt ends up setting one of the TIF_WORK_MASK pending work + * flags, they will not be tested again before returning to + * usermode. This means that a process can end up with pending work, + * which will be unprocessed until the process enters and leaves the + * kernel again, which could be an unbounded amount of time. This + * means that a pending signal or reschedule event could be + * indefinitely delayed. + * + * The fix is to notice a nested interrupt in the critical window, and + * if one occurs, then fold the nested interrupt into the current + * interrupt stack frame, and re-process it iteratively rather than + * recursively. This means that it will exit via the normal path, and + * all pending work will be dealt with appropriately. + * + * Because the nested interrupt handler needs to deal with the current + * stack state in whatever form its in, we keep things simple by only + * using a single register which is pushed/popped on the stack. */ ENTRY(xen_iret) /* test eflags for special cases */ @@ -85,13 +82,15 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ + /* + * Store vcpu_info pointer for easy access. Do it this way to + * avoid having to reload %fs + */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax + movl TI_cpu(%eax), %eax + movl __per_cpu_offset(,%eax,4), %eax + mov per_cpu__xen_vcpu(%eax), %eax #else movl per_cpu__xen_vcpu, %eax #endif @@ -99,37 +98,46 @@ ENTRY(xen_iret) /* check IF state we're restoring */ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ + /* + * Maybe enable events. Once this happens we could get a + * recursive event, so the critical region starts immediately + * afterwards. However, if that happens we don't end up + * resuming the code, so we don't have to be worried about + * being preempted to another CPU. + */ setz XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ cmpw $0x0001, XEN_vcpu_info_pending(%eax) - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ + /* + * If there's something pending, mask events again so we can + * jump back into xen_hypervisor_callback + */ sete XEN_vcpu_info_mask(%eax) popl %eax - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ + /* + * From this point on the registers are restored and the stack + * updated, so we don't need to worry about it if we're + * preempted + */ iret_restore_end: - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ + /* + * Jump to hypervisor_callback after fixing up the stack. + * Events are masked, so jumping out of the critical region is + * OK. + */ je xen_hypervisor_callback 1: iret xen_iret_end_crit: -.section __ex_table,"a" +.section __ex_table, "a" .align 4 - .long 1b,iret_exc + .long 1b, iret_exc .previous hyper_iret: @@ -139,55 +147,55 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. + * This is called by xen_hypervisor_callback in entry.S when it sees + * that the EIP at the time of interrupt was between + * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in + * %eax so we can do a more refined determination of what to do. + * + * The stack format at this point is: + * ---------------- + * ss : (ss/esp may be present if we came from usermode) + * esp : + * eflags } outer exception info + * cs } + * eip } + * ---------------- <- edi (copy dest) + * eax : outer eax if it hasn't been restored + * ---------------- + * eflags } nested exception info + * cs } (no ss/esp because we're nested + * eip } from the same ring) + * orig_eax }<- esi (copy src) + * - - - - - - - - + * fs } + * es } + * ds } SAVE_ALL state + * eax } + * : : + * ebx }<- esp + * ---------------- + * + * In order to deliver the nested exception properly, we need to shift + * everything from the return addr up to the error code so it sits + * just under the outer exception info. This means that when we + * handle the exception, we do it in the context of the outer + * exception rather than starting a new one. + * + * The only caveat is that if the outer eax hasn't been restored yet + * (ie, it's still on stack), we need to insert its value into the + * SAVE_ALL state before going on, since it's usermode state which we + * eventually need to restore. */ ENTRY(xen_iret_crit_fixup) /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. + * Paranoia: Make sure we're really coming from kernel space. + * One could imagine a case where userspace jumps into the + * critical range address, but just before the CPU delivers a + * GP, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. The Intel documents + * explicitly say that the reported EIP for a bad jump is the + * jump instruction itself, not the destination, but some + * virtual environments get this wrong. */ movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx @@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) lea PT_ORIG_EAX(%esp), %esi lea PT_EFLAGS(%esp), %edi - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ + /* + * If eip is before iret_restore_end then stack + * hasn't been restored yet. + */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ movl %eax, PT_EAX(%esp) - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ /* set up the copy */ 1: std @@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup) rep movsl cld - lea 4(%edi),%esp /* point esp to new frame */ + lea 4(%edi), %esp /* point esp to new frame */ 2: jmp xen_do_upcall diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d205a283efe..02f496a8dba 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -21,25 +21,25 @@ #include "xen-asm.h" ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp),%rcx - mov 8+8(%rsp),%r11 + mov 8+0(%rsp), %rcx + mov 8+8(%rsp), %r11 ret $16 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* - Xen64 iret frame: - - ss - rsp - rflags - cs - rip <-- standard iret frame - - flags - - rcx } - r11 }<-- pushed by hypercall page -rsp -> rax } + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } */ ENTRY(xen_iret) pushq $0 @@ -48,8 +48,8 @@ ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) /* - sysexit is not used for 64-bit processes, so it's - only ever used to return to 32-bit compat userspace. + * sysexit is not used for 64-bit processes, so it's only ever used to + * return to 32-bit compat userspace. */ ENTRY(xen_sysexit) pushq $__USER32_DS @@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit) RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(old_rsp) @@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64) RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp @@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) /* - Xen handles syscall callbacks much like ordinary exceptions, - which means we have: - - kernel gs - - kernel rsp - - an iret-like stack frame on the stack (including rcx and r11): - ss - rsp - rflags - cs - rip - r11 - rsp-> rcx - - In all the entrypoints, we undo all that to make it look - like a CPU-generated syscall/sysenter and jump to the normal - entrypoint. + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + * + * In all the entrypoints, we undo all that to make it look like a + * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ .macro undo_xen_syscall - mov 0*8(%rsp),%rcx - mov 1*8(%rsp),%r11 - mov 5*8(%rsp),%rsp + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp .endm /* Normal 64-bit system call target */ @@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target) ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $VGCF_in_syscall jmp hypercall_iret -- cgit v1.2.3-70-g09d2 From 792dc4f6cdacf50d3f2b93756d282fc04ee34bd5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:43 -0800 Subject: xen: use our own eventchannel->irq path Rather than overloading vectors for event channels, take full responsibility for mapping an event channel to irq directly. With this patch Xen has its own irq allocator. When the kernel gets an event channel upcall, it maps the event channel number to an irq and injects it into the normal interrupt path. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/events.h | 6 ------ arch/x86/xen/irq.c | 17 +---------------- drivers/xen/events.c | 22 ++++++++++++++++++++-- 3 files changed, 21 insertions(+), 24 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 19144184983..1df35417c41 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -15,10 +15,4 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->flags); } -static inline void xen_do_IRQ(int irq, struct pt_regs *regs) -{ - regs->orig_ax = ~irq; - do_IRQ(regs); -} - #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 5a070900ad3..cfd17799bd6 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -19,21 +19,6 @@ void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -static void __init __xen_init_IRQ(void) -{ - int i; - - /* Create identity vector->irq map */ - for(i = 0; i < NR_VECTORS; i++) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[i] = i; - } - - xen_init_IRQ(); -} - static unsigned long xen_save_fl(void) { struct vcpu_info *vcpu; @@ -127,7 +112,7 @@ static void xen_halt(void) } static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = __xen_init_IRQ, + .init_IRQ = xen_init_IRQ, .save_fl = PV_CALLEE_SAVE(xen_save_fl), .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7c3705479ea..2c8d710713f 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,24 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) } +static void xen_do_irq(unsigned irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + if (WARN_ON(irq == -1)) + return; + + exit_idle(); + irq_enter(); + + //printk("cpu %d handling irq %d\n", smp_processor_id(), info->irq); + handle_irq(irq, regs); + + irq_exit(); + + set_irq_regs(old_regs); +} + /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for @@ -557,8 +576,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; - if (irq != -1) - xen_do_IRQ(irq, regs); + xen_do_irq(irq, regs); } } -- cgit v1.2.3-70-g09d2 From ccbeed3a05908d201b47b6c3dd1a373138bba566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: make lazy %gs optional on x86_32 Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo Cc: Jeremy Fitzhardinge Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++ arch/x86/include/asm/elf.h | 15 ++++- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/ptrace.h | 4 +- arch/x86/include/asm/system.h | 12 +++- arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/entry_32.S | 132 +++++++++++++++++++++++++++++++------ arch/x86/kernel/process_32.c | 4 +- arch/x86/kernel/ptrace.c | 5 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/enlighten.c | 17 ++--- 11 files changed, 158 insertions(+), 40 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c8e353c112..5bcdede71ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,10 @@ config X86_TRAMPOLINE depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP) default y +config X86_32_LAZY_GS + def_bool y + depends on X86_32 + config KTIME_SCALAR def_bool X86_32 source "init/Kconfig" diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 39b0aac1675..83c1bc8d2e8 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -124,7 +124,6 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - pr_reg[10] = get_user_gs(regs); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ @@ -133,6 +132,18 @@ do { \ pr_reg[16] = regs->ss & 0xffff; \ } while (0); +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + pr_reg[10] = get_user_gs(regs); \ +} while (0); + +#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + savesegment(gs, pr_reg[10]); \ +} while (0); + #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4955165682c..f923203dc39 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -79,7 +79,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - set_user_gs(task_pt_regs(tsk), 0); \ + lazy_load_gs(0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d34d954c22..e304b66abee 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -28,7 +28,7 @@ struct pt_regs { int xds; int xes; int xfs; - /* int gs; */ + int xgs; long orig_eax; long eip; int xcs; @@ -50,7 +50,7 @@ struct pt_regs { unsigned long ds; unsigned long es; unsigned long fs; - /* int gs; */ + unsigned long gs; unsigned long orig_ax; unsigned long ip; unsigned long cs; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 70c74b8db87..79b98e5b96f 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned); * x86_32 user gs accessors. */ #ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) #define task_user_gs(tsk) ((tsk)->thread.gs) -#endif +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ static inline unsigned long get_limit(unsigned long segment) { diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee..fbf2f33e308 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -75,6 +75,7 @@ void foo(void) OFFSET(PT_DS, pt_regs, ds); OFFSET(PT_ES, pt_regs, es); OFFSET(PT_FS, pt_regs, fs); + OFFSET(PT_GS, pt_regs, gs); OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); OFFSET(PT_EIP, pt_regs, ip); OFFSET(PT_CS, pt_regs, cs); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c461925d3b6..82e6868bee4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -30,12 +30,13 @@ * 1C(%esp) - %ds * 20(%esp) - %es * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss + * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -101,8 +102,99 @@ #define resume_userspace_sig resume_userspace #endif +/* + * User gs save/restore + * + * %gs is used for userland TLS and kernel only uses it for stack + * canary which is required to be at %gs:20 by gcc. Read the comment + * at the top of stackprotector.h for more info. + * + * Local labels 98 and 99 are used. + */ +#ifdef CONFIG_X86_32_LAZY_GS + + /* unfortunately push/pop can't be no-op */ +.macro PUSH_GS + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +.endm +.macro POP_GS pop=0 + addl $(4 + \pop), %esp + CFI_ADJUST_CFA_OFFSET -(4 + \pop) +.endm +.macro POP_GS_EX +.endm + + /* all the rest are no-op */ +.macro PTGS_TO_GS +.endm +.macro PTGS_TO_GS_EX +.endm +.macro GS_TO_REG reg +.endm +.macro REG_TO_PTGS reg +.endm +.macro SET_KERNEL_GS reg +.endm + +#else /* CONFIG_X86_32_LAZY_GS */ + +.macro PUSH_GS + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET gs, 0*/ +.endm + +.macro POP_GS pop=0 +98: popl %gs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE gs*/ + .if \pop <> 0 + add $\pop, %esp + CFI_ADJUST_CFA_OFFSET -\pop + .endif +.endm +.macro POP_GS_EX +.pushsection .fixup, "ax" +99: movl $0, (%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro PTGS_TO_GS +98: mov PT_GS(%esp), %gs +.endm +.macro PTGS_TO_GS_EX +.pushsection .fixup, "ax" +99: movl $0, PT_GS(%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro GS_TO_REG reg + movl %gs, \reg + /*CFI_REGISTER gs, \reg*/ +.endm +.macro REG_TO_PTGS reg + movl \reg, PT_GS(%esp) + /*CFI_REL_OFFSET gs, PT_GS*/ +.endm +.macro SET_KERNEL_GS reg + xorl \reg, \reg + movl \reg, %gs +.endm + +#endif /* CONFIG_X86_32_LAZY_GS */ + .macro SAVE_ALL cld + PUSH_GS pushl %fs CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET fs, 0;*/ @@ -138,6 +230,7 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs + SET_KERNEL_GS %edx .endm .macro RESTORE_INT_REGS @@ -164,7 +257,7 @@ CFI_RESTORE eax .endm -.macro RESTORE_REGS +.macro RESTORE_REGS pop=0 RESTORE_INT_REGS 1: popl %ds CFI_ADJUST_CFA_OFFSET -4 @@ -175,6 +268,7 @@ 3: popl %fs CFI_ADJUST_CFA_OFFSET -4 /*CFI_RESTORE fs;*/ + POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -188,6 +282,7 @@ .long 2b, 5b .long 3b, 6b .popsection + POP_GS_EX .endm .macro RING0_INT_FRAME @@ -368,6 +463,7 @@ sysenter_exit: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs + PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT #ifdef CONFIG_AUDITSYSCALL @@ -416,6 +512,7 @@ sysexit_audit: .align 4 .long 1b,2b .popsection + PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) # system call handler stub @@ -458,8 +555,7 @@ restore_all: restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code + RESTORE_REGS 4 # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 irq_return: INTERRUPT_RETURN @@ -1078,7 +1174,10 @@ ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %fs's slot on the stack */ + /* the function address is in %gs's slot on the stack */ + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -1107,20 +1206,15 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d58a340e1be..86122fa2a1b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(gs, prev->gs); + lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - loadsegment(gs, next->gs); + lazy_load_gs(next->gs); percpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 508b6b57d0c..7ec39ab37a2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 19e33b6cd59..da2e314f61b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) /* There's one problem which normal hardware doesn't have: the Host * can't handle us removing entries we're currently using. So we clear * the GS register here: if it's needed it'll be reloaded anyway. */ - loadsegment(gs, 0); + lazy_load_gs(0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 37230342c2c..95ff6a0e942 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone, - * it means we're in a context switch, and %gs has just been - * saved. This means we can zero it out to prevent faults on - * exit from the hypervisor if the next process has no %gs. - * Either way, it has been saved, and the new value will get - * loaded properly. This will go away as soon as Xen has been - * modified to not save/restore %gs for normal hypercalls. + * XXX sleazy hack: If we're being called in a lazy-cpu zone + * and lazy gs handling is enabled, it means we're in a + * context switch, and %gs has just been saved. This means we + * can zero it out to prevent faults on exit from the + * hypervisor if the next process has no %gs. Either way, it + * has been saved, and the new value will get loaded properly. + * This will go away as soon as Xen has been modified to not + * save/restore %gs for normal hypercalls. * * On x86_64, this hack is not used for %gs, because gs points * to KERNEL_GS_BASE (and uses it for PDA references), so we @@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) */ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { #ifdef CONFIG_X86_32 - loadsegment(gs, 0); + lazy_load_gs(0); #else loadsegment(fs, 0); #endif -- cgit v1.2.3-70-g09d2 From 694aa960608d2976666d850bd4ef78053bbd0c84 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 12 Feb 2009 16:25:42 -0800 Subject: xen: fix xen_flush_tlb_others The commit commit 4595f9620cda8a1e973588e743cf5f8436dd20c6 Author: Rusty Russell Date: Sat Jan 10 21:58:09 2009 -0800 x86: change flush_tlb_others to take a const struct cpumask causes xen_flush_tlb_others to allocate a multicall and then issue it without initializing it in the case where the cpumask is empty, leading to: [ 8.354898] 1 multicall(s) failed: cpu 1 [ 8.354921] Pid: 2213, comm: bootclean Not tainted 2.6.29-rc3-x86_32p-xenU-tip #135 [ 8.354937] Call Trace: [ 8.354955] [] xen_mc_flush+0x133/0x1b0 [ 8.354971] [] ? xen_force_evtchn_callback+0x1a/0x30 [ 8.354988] [] xen_flush_tlb_others+0xb0/0xd0 [ 8.355003] [] flush_tlb_page+0x53/0xa0 [ 8.355018] [] do_wp_page+0x2a0/0x7c0 [ 8.355034] [] ? notify_remote_via_irq+0x3a/0x70 [ 8.355049] [] handle_mm_fault+0x7b0/0xa50 [ 8.355065] [] ? wake_up_new_task+0x8e/0xb0 [ 8.355079] [] ? do_fork+0xe5/0x320 [ 8.355095] [] do_page_fault+0xe9/0x240 [ 8.355109] [] ? do_page_fault+0x0/0x240 [ 8.355125] [] error_code+0x72/0x78 [ 8.355139] call 1/1: op=2863311530 arg=[aaaaaaaa] result=-38 xen_flush_tlb_others+0x41/0xd0 Since empty cpumasks are rare and undoing an xen_mc_entry() is tricky just issue such requests normally. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d2e8ed1aff3..319bd40a57c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1273,8 +1273,6 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, /* Remove us, and any offline CPUS. */ cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - if (unlikely(cpumask_empty(to_cpumask(args->mask)))) - goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -1285,7 +1283,6 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); -issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } -- cgit v1.2.3-70-g09d2 From 0341c14da49e7b93d2998926f6ac89a3129e3fa1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 13 Feb 2009 11:14:01 -0800 Subject: x86: use _types.h headers in asm where available In general, the only definitions that assembly files can use are in _types.S headers (where available), so convert them. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 4 ++-- arch/x86/boot/header.S | 2 +- arch/x86/kernel/acpi/realmode/wakeup.S | 4 ++-- arch/x86/kernel/acpi/wakeup_32.S | 2 +- arch/x86/kernel/acpi/wakeup_64.S | 4 ++-- arch/x86/kernel/efi_stub_32.S | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/relocate_kernel_32.S | 2 +- arch/x86/kernel/relocate_kernel_64.S | 4 ++-- arch/x86/kernel/trampoline_32.S | 2 +- arch/x86/kernel/trampoline_64.S | 4 ++-- arch/x86/kernel/vmlinux_32.lds.S | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 2 +- arch/x86/lib/getuser.S | 2 +- arch/x86/power/hibernate_asm_32.S | 2 +- arch/x86/power/hibernate_asm_64.S | 2 +- arch/x86/xen/xen-head.S | 2 +- 20 files changed, 26 insertions(+), 26 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 29c5fbf0839..112f81abfa6 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1d5dff4123e..36743525e37 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -26,8 +26,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b993062e9a5..7ccff4884a2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include "boot.h" #include "offsets.h" diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 3355973b12a..580b4e29601 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -3,8 +3,8 @@ */ #include #include -#include -#include +#include +#include #include .code16 diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index a12e6a9fb65..8ded418b059 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -1,7 +1,7 @@ .section .text.page_aligned #include #include -#include +#include # Copyright 2003, 2008 Pavel Machek , distribute under GPLv2 diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a7..82add8b804b 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -1,8 +1,8 @@ .text #include #include -#include -#include +#include +#include #include #include diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index ef00bb77d7e..8b9171b9cbd 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S @@ -6,7 +6,7 @@ */ #include -#include +#include /* * efi_call_phys(void *, ...) is a function with variable parameters. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e9920683145..999e827ef9c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fbcf96b295f..dcf31283f94 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 2a0aad7718d..c32ca19d591 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -11,8 +11,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index a160f311972..2064d0aa8d2 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index b0bbdd4829c..d32cfb27a47 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -7,10 +7,10 @@ */ #include -#include +#include #include #include -#include +#include /* * Must be relocatable PIC code callable as a C function diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index d8ccc3c6552..66d874e5404 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -29,7 +29,7 @@ #include #include -#include +#include /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 95a012a4664..cddfb8d386b 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -25,8 +25,8 @@ */ #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 3eba7f7bac0..0d860963f26 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 087a7f2c639..fbfced6f680 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -6,7 +6,7 @@ #include #include -#include +#include #undef i386 /* in case the preprocessor is a 32bit one */ diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index ad374003742..51f1504cddd 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index d1e9b53f9d3..b641388d828 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 000415947d9..9356547d8c0 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -18,7 +18,7 @@ .text #include #include -#include +#include #include #include diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 63d49a523ed..1a5ff24e29c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include -- cgit v1.2.3-70-g09d2 From b93d51dc62a41b5c2d6f32a0870709dc0cc55545 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 13:35:57 -0800 Subject: x86, xen: record and display initiator of each multicall when debugging Store the caller for each multicall so we can report it on failure. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index c738644b543..82f2513e975 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -39,6 +39,7 @@ struct mc_buffer { struct multicall_entry entries[MC_BATCH]; #if MC_DEBUG struct multicall_entry debug[MC_BATCH]; + void *caller[MC_BATCH]; #endif unsigned char args[MC_ARGS]; struct callback { @@ -154,11 +155,12 @@ void xen_mc_flush(void) ret, smp_processor_id()); dump_stack(); for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n", + printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", i+1, b->mcidx, b->debug[i].op, b->debug[i].args[0], - b->entries[i].result); + b->entries[i].result, + b->caller[i]); } } #endif @@ -197,6 +199,9 @@ struct multicall_space __xen_mc_entry(size_t args) } ret.mc = &b->entries[b->mcidx]; +#ifdef MC_DEBUG + b->caller[b->mcidx] = __builtin_return_address(0); +#endif b->mcidx++; ret.args = &b->args[argidx]; b->argidx = argidx + args; -- cgit v1.2.3-70-g09d2 From 3d39e9d07b576ee72f2c94cfad9e618fe21763b2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:38:51 -0800 Subject: x86, xen: degrade BUG to WARN when multicall fails If one of the components of a multicall fails, WARN rather than BUG, to help with debugging. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 82f2513e975..6cffd5532b9 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -179,7 +179,7 @@ void xen_mc_flush(void) } b->cbidx = 0; - BUG_ON(ret); + WARN_ON(ret); } struct multicall_space __xen_mc_entry(size_t args) -- cgit v1.2.3-70-g09d2 From c99608637eac8834d830496c462c054137772122 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:38:56 -0800 Subject: x86, xen: do multicall callbacks with interrupts disabled We can't call the callbacks after enabling interrupts, as we may get a nested multicall call, which would cause a great deal of havok. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 6cffd5532b9..8bff7e7c290 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -170,8 +170,6 @@ void xen_mc_flush(void) } else BUG_ON(b->argidx != 0); - local_irq_restore(flags); - for (i = 0; i < b->cbidx; i++) { struct callback *cb = &b->callbacks[i]; @@ -179,6 +177,8 @@ void xen_mc_flush(void) } b->cbidx = 0; + local_irq_restore(flags); + WARN_ON(ret); } -- cgit v1.2.3-70-g09d2 From c1eeb2de41d7015678bdd412b48a5f071b84e29a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Feb 2009 23:02:14 -0800 Subject: x86: fold apic_ops into genapic Impact: cleanup make it simpler, don't need have one extra struct. v2: fix the sgi_uv build Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 97 ++++++++++--------------------- arch/x86/include/asm/genapic.h | 61 +++++++++++++++++++ arch/x86/kernel/apic.c | 63 ++------------------ arch/x86/kernel/bigsmp_32.c | 7 +++ arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/kernel/es7000_32.c | 7 +++ arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 16 ++++- arch/x86/kernel/genx2apic_cluster.c | 11 +++- arch/x86/kernel/genx2apic_phys.c | 11 +++- arch/x86/kernel/genx2apic_uv_x.c | 9 ++- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/numaq_32.c | 7 +++ arch/x86/kernel/probe_32.c | 7 +++ arch/x86/kernel/summit_32.c | 7 +++ arch/x86/kernel/uv_irq.c | 2 +- arch/x86/kernel/vmi_32.c | 6 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 19 +++--- arch/x86/xen/enlighten.c | 21 +++---- 24 files changed, 205 insertions(+), 162 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index dc1db99cd40..4f56e053d34 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -92,6 +92,12 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } +extern void native_apic_wait_icr_idle(void); +extern u32 native_safe_apic_wait_icr_idle(void); +extern void native_apic_icr_write(u32 low, u32 id); +extern u64 native_apic_icr_read(void); + +#ifdef CONFIG_X86_X2APIC static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || @@ -112,7 +118,31 @@ static inline u32 native_apic_msr_read(u32 reg) return low; } -#ifdef CONFIG_X86_X2APIC +static inline void native_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static inline u32 native_safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +static inline void native_x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +static inline u64 native_x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); @@ -146,47 +176,6 @@ static inline int x2apic_enabled(void) } #endif -struct apic_ops { - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); -}; - -extern struct apic_ops *apic_ops; - -static inline u32 apic_read(u32 reg) -{ - return apic_ops->read(reg); -} - -static inline void apic_write(u32 reg, u32 val) -{ - apic_ops->write(reg, val); -} - -static inline u64 apic_icr_read(void) -{ - return apic_ops->icr_read(); -} - -static inline void apic_icr_write(u32 low, u32 high) -{ - apic_ops->icr_write(low, high); -} - -static inline void apic_wait_icr_idle(void) -{ - apic_ops->wait_icr_idle(); -} - -static inline u32 safe_apic_wait_icr_idle(void) -{ - return apic_ops->safe_wait_icr_idle(); -} - extern int get_physical_broadcast(void); #ifdef CONFIG_X86_X2APIC @@ -197,18 +186,6 @@ static inline void ack_x2APIC_irq(void) } #endif - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC(void); @@ -256,18 +233,6 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#ifdef CONFIG_X86_LOCAL_APIC -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} -#endif - #endif #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 273b99452ae..a6d0b00a544 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -5,6 +5,7 @@ #include #include +#include /* * Copyright 2004 James Cleverdon, IBM. @@ -83,10 +84,70 @@ struct genapic { void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); }; extern struct genapic *apic; +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + /* * Warm reset vector default position: */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 004aa1c31e4..af494bad885 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -210,18 +210,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) +void native_apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_xapic_wait_icr_idle(void) +u32 native_safe_apic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -237,13 +232,13 @@ u32 safe_xapic_wait_icr_idle(void) return send_status; } -void xapic_icr_write(u32 low, u32 id) +void native_apic_icr_write(u32 low, u32 id) { apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -static u64 xapic_icr_read(void) +u64 native_apic_icr_read(void) { u32 icr1, icr2; @@ -253,54 +248,6 @@ static u64 xapic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef CONFIG_X86_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -static u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -1329,7 +1276,6 @@ void check_x2apic(void) if (msr & X2APIC_ENABLE) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); x2apic_preenabled = x2apic = 1; - apic_ops = &x2apic_ops; } } @@ -1403,7 +1349,6 @@ void __init enable_IR_x2apic(void) if (!x2apic) { x2apic = 1; - apic_ops = &x2apic_ops; enable_x2apic(); } diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 47a62f46afd..9eeb714c5de 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -263,4 +263,11 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 4772e91e824..e22d6ed26e6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 5e8c79e748a..42f090702f0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9abd48b2267..f6c70a164e3 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include struct nmi_watchdog_ctlblk { diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 55515d73d9c..23f1df4ce18 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -806,4 +806,11 @@ struct genapic apic_es7000 = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cdc4772d9c8..70b616b4c62 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -19,8 +19,8 @@ #include #include -#include #include +#include #include extern struct genapic apic_flat; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 249d2d3c034..36ee760fd13 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #ifdef CONFIG_ACPI #include @@ -229,6 +229,13 @@ struct genapic apic_flat = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; /* @@ -374,4 +381,11 @@ struct genapic apic_physflat = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 7c87156b641..dd6e8d68542 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -7,8 +7,8 @@ #include #include -#include #include +#include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); @@ -46,7 +46,7 @@ static void /* * send the IPI. */ - x2apic_icr_write(cfg, apicid); + native_x2apic_icr_write(cfg, apicid); } /* @@ -234,4 +234,11 @@ struct genapic apic_x2apic_cluster = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 5cbae8aa040..eb1486bb002 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -7,8 +7,8 @@ #include #include -#include #include +#include static int x2apic_phys; @@ -50,7 +50,7 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, /* * send the IPI. */ - x2apic_icr_write(cfg, apicid); + native_x2apic_icr_write(cfg, apicid); } static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) @@ -220,4 +220,11 @@ struct genapic apic_x2apic_phys = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 89b84e004f0..9ae4a92fac8 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -22,8 +22,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -292,6 +292,13 @@ struct genapic apic_x2apic_uv_x = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index dbf5445727a..1326272cae4 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f13ca1650aa..3957776b193 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bdfad80c3cf..48b9ca5e088 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -11,7 +11,7 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include +#include #include #include diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 0cc41a1d255..f0f0c2f0596 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -569,4 +569,11 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 22337b75de6..1f701caa95b 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -127,6 +127,13 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; extern struct genapic apic_numaq; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 1e733eff9b3..1cf32c325d1 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -599,4 +599,11 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e..75eb5ec5dd2 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include static void uv_noop(unsigned int irq) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index f052c84ecbe..a1c7b71dc0d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -798,8 +798,8 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(apic_ops->read, APICRead); - para_fill(apic_ops->write, APICWrite); + para_fill(apic->read, APICRead); + para_fill(apic->write, APICWrite); #endif /* diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a4791ef412d..2a5e0e6a7c0 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index da2e314f61b..bc9893f2c38 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include @@ -828,13 +828,14 @@ static u32 lguest_apic_safe_wait_icr_idle(void) return 0; } -static struct apic_ops lguest_basic_apic_ops = { - .read = lguest_apic_read, - .write = lguest_apic_write, - .icr_read = lguest_apic_icr_read, - .icr_write = lguest_apic_icr_write, - .wait_icr_idle = lguest_apic_wait_icr_idle, - .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, +static void set_lguest_basic_apic_ops(void) +{ + apic->read = lguest_apic_read; + apic->write = lguest_apic_write; + apic->icr_read = lguest_apic_icr_read; + apic->icr_write = lguest_apic_icr_write; + apic->wait_icr_idle = lguest_apic_wait_icr_idle; + apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle; }; #endif @@ -1035,7 +1036,7 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - apic_ops = &lguest_basic_apic_ops; + set_lguest_basic_apic_ops(); #endif /* time operations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 95ff6a0e942..e3dd3fb6729 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include @@ -554,14 +554,15 @@ static u32 xen_safe_apic_wait_icr_idle(void) return 0; } -static struct apic_ops xen_basic_apic_ops = { - .read = xen_apic_read, - .write = xen_apic_write, - .icr_read = xen_apic_icr_read, - .icr_write = xen_apic_icr_write, - .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, -}; +static void set_xen_basic_apic_ops(void) +{ + apic->read = xen_apic_read; + apic->write = xen_apic_write; + apic->icr_read = xen_apic_icr_read; + apic->icr_write = xen_apic_icr_write; + apic->wait_icr_idle = xen_apic_wait_icr_idle; + apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; +} #endif @@ -898,7 +899,7 @@ asmlinkage void __init xen_start_kernel(void) /* * set up the basic apic ops. */ - apic_ops = &xen_basic_apic_ops; + set_xen_basic_apic_ops(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { -- cgit v1.2.3-70-g09d2 From 7b6aa335ca1a845c2262ec7a595b4521bca0f79d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 13:58:15 +0100 Subject: x86, apic: remove genapic.h Impact: cleanup Remove genapic.h and remove all references to it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ipi.h | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/cpu/mcheck/p4.c | 2 +- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/es7000_32.c | 4 ++-- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/numaq_32.c | 4 ++-- arch/x86/kernel/probe_32.c | 8 ++++---- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/summit_32.c | 2 +- arch/x86/kernel/tlb_uv.c | 4 ++-- arch/x86/kernel/uv_irq.c | 2 +- arch/x86/kernel/visws_quirks.c | 4 ++-- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/mm/srat_64.c | 2 +- arch/x86/mm/tlb.c | 2 +- arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/op_model_p4.c | 2 +- arch/x86/oprofile/op_model_ppro.c | 2 +- arch/x86/pci/numaq_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 45 files changed, 54 insertions(+), 54 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 5f2efc5d992..3395c680a97 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -123,7 +123,7 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); -#include +#include extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 956c1dee6fb..42814152c94 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index af494bad885..7db03a9b61d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 9eeb714c5de..72f4e534051 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index e48640cfac0..6882a735d9c 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,7 +7,7 @@ #include #include -#include +#include struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ff4d7b9e32e..c94ba9311e6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,7 @@ # include #endif -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b5d13e472d..41f3788ec9b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,8 +26,8 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include -#include +#include +#include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f137a87d4b..290f92e2b7c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index e22d6ed26e6..4772e91e824 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 42f090702f0..5e8c79e748a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f9c92b66dfb..9b60fce09f7 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ad7f2a696f4..3340cc0f244 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,7 @@ #include #include -#include +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 23f1df4ce18..6cdfb3e4dc3 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include /* @@ -387,7 +387,7 @@ void __init es7000_enable_apic_mode(void) #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index ef788635324..91cae6f6e73 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 36ee760fd13..a7d84763648 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index dd6e8d68542..f5e02cffa26 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index eb1486bb002..11eb4cb7ca3 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include static int x2apic_phys; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 9ae4a92fac8..c1746a198bd 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a89878e08a4..00e6071cefc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -62,7 +62,7 @@ #include #include -#include +#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 1326272cae4..dbf5445727a 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3957776b193..f13ca1650aa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4beb9a13873..e4ac7c80e2d 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -212,7 +212,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include +#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5c4f5548384..eedfaebe106 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,7 +46,7 @@ #include #include -#include +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 20076445319..7f4d2586972 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -29,7 +29,7 @@ #include #include -#include +#include /* * Checksum an MP configuration block. */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 48b9ca5e088..bdfad80c3cf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -11,7 +11,7 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include +#include #include #include diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f0f0c2f0596..5a2d75d1fd4 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include @@ -301,7 +301,7 @@ int __init get_memcfg_numaq(void) #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index b3d5d74e522..be0d554984a 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -17,20 +17,20 @@ #include #include #include -#include +#include #include #include #include #include -#include +#include #include #include #include #include #include #include -#include +#include #include #include @@ -41,7 +41,7 @@ #include #include -#include +#include #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 32e8f0af292..7c8cd447d5e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,7 +24,7 @@ # include #endif -#include +#include /* * Power off function, if any diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 43d964411c0..deaafd2693e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index eaaffae31cc..13f33ea8cca 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b5f2b698973..562a9fc3bc3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -60,12 +60,12 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 1cf32c325d1..eb31ba276bb 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f396e61bcb3..1a7dfa7cb52 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -15,12 +15,12 @@ #include #include #include -#include +#include #include #include #include -#include +#include static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 75eb5ec5dd2..aeef529917e 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include static void uv_noop(unsigned int irq) diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 4fd646e6dd4..5264fea6c28 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -32,9 +32,9 @@ #include #include -#include +#include -#include +#include #include diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index a1c7b71dc0d..2cc4a90e2cb 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 2a5e0e6a7c0..a4791ef412d 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index bc9893f2c38..f3a5305b8ad 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 15df1baee10..574c8bc95ef 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include int acpi_numa __initdata; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 14c5af4d11e..b641349fe07 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; -#include +#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index a32a5c7a8ef..202864ad49a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "op_counter.h" #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 09a237bc9ef..4c4a51c90bc 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 5ebd8f605d7..e9f80c744cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 5601e829c38..8eb295e116f 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e3dd3fb6729..86497d5f44c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 965c7ecaf2e2b083d711a01ab33735a4bdeee1a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 23:19:12 +0100 Subject: x86: remove the Voyager 32-bit subarch Impact: remove unused/broken code The Voyager subarch last built successfully on the v2.6.26 kernel and has been stale since then and does not build on the v2.6.27, v2.6.28 and v2.6.29-rc5 kernels. No actual users beyond the maintainer reported this breakage. Patches were sent and most of the fixes were accepted but the discussion around how to do a few remaining issues cleanly fizzled out with no resolution and the code remained broken. In the v2.6.30 x86 tree development cycle 32-bit subarch support has been reworked and removed - and the Voyager code, beyond the build problems already known, needs serious and significant changes and probably a rewrite to support it. CONFIG_X86_VOYAGER has been marked BROKEN then. The maintainer has been notified but no patches have been sent so far to fix it. While all other subarchs have been converted to the new scheme, voyager is still broken. We'd prefer to receive patches which clean up the current situation in a constructive way, but even in case of removal there is no obstacle to add that support back after the issues have been sorted out in a mutually acceptable fashion. So remove this inactive code for now. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 14 - arch/x86/boot/Makefile | 1 - arch/x86/boot/a20.c | 6 - arch/x86/boot/boot.h | 3 - arch/x86/boot/main.c | 5 - arch/x86/boot/voyager.c | 40 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - arch/x86/include/asm/mach-voyager/do_timer.h | 17 - arch/x86/include/asm/mach-voyager/entry_arch.h | 26 - arch/x86/include/asm/mach-voyager/setup_arch.h | 12 - arch/x86/include/asm/vic.h | 61 - arch/x86/include/asm/voyager.h | 571 -------- arch/x86/lguest/Kconfig | 1 - arch/x86/mach-voyager/Makefile | 8 - arch/x86/mach-voyager/setup.c | 119 -- arch/x86/mach-voyager/voyager_basic.c | 317 ----- arch/x86/mach-voyager/voyager_cat.c | 1197 ---------------- arch/x86/mach-voyager/voyager_smp.c | 1805 ------------------------ arch/x86/mach-voyager/voyager_thread.c | 128 -- arch/x86/xen/Kconfig | 2 +- drivers/lguest/Kconfig | 2 +- 22 files changed, 2 insertions(+), 4335 deletions(-) delete mode 100644 arch/x86/boot/voyager.c delete mode 100644 arch/x86/include/asm/mach-voyager/do_timer.h delete mode 100644 arch/x86/include/asm/mach-voyager/entry_arch.h delete mode 100644 arch/x86/include/asm/mach-voyager/setup_arch.h delete mode 100644 arch/x86/include/asm/vic.h delete mode 100644 arch/x86/include/asm/voyager.h delete mode 100644 arch/x86/mach-voyager/Makefile delete mode 100644 arch/x86/mach-voyager/setup.c delete mode 100644 arch/x86/mach-voyager/voyager_basic.c delete mode 100644 arch/x86/mach-voyager/voyager_cat.c delete mode 100644 arch/x86/mach-voyager/voyager_smp.c delete mode 100644 arch/x86/mach-voyager/voyager_thread.c (limited to 'arch/x86/xen') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 35efba546e0..5e2919c0ff9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -302,7 +302,6 @@ config X86_EXTENDED_PLATFORM SGI 320/540 (Visual Workstation) Summit/EXA (IBM x440) Unisys ES7000 IA32 series - Voyager (NCR) If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. @@ -423,19 +422,6 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on SMP && !PCI && BROKEN - depends on X86_32_NON_STANDARD - ---help--- - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index cd48c721001..c70eff69a1f 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -32,7 +32,6 @@ setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o setup-y += printf.o string.o tty.o video.o video-mode.o version.o setup-$(CONFIG_X86_APM_BOOT) += apm.o -setup-$(CONFIG_X86_VOYAGER) += voyager.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index fba8e9c6a50..7c19ce8c244 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -126,11 +126,6 @@ static void enable_a20_fast(void) int enable_a20(void) { -#ifdef CONFIG_X86_VOYAGER - /* On Voyager, a20_test() is unsafe? */ - enable_a20_kbc(); - return 0; -#else int loops = A20_ENABLE_LOOPS; int kbc_err; @@ -164,5 +159,4 @@ int enable_a20(void) } return -1; -#endif } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index cc0ef13fba7..7b2692e897e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -302,9 +302,6 @@ void probe_cards(int unsafe); /* video-vesa.c */ void vesa_store_edid(void); -/* voyager.c */ -int query_voyager(void); - #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 197421db1af..58f0415d3ae 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -149,11 +149,6 @@ void main(void) /* Query MCA information */ query_mca(); - /* Voyager */ -#ifdef CONFIG_X86_VOYAGER - query_voyager(); -#endif - /* Query Intel SpeedStep (IST) information */ query_ist(); diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c deleted file mode 100644 index 433909d61e5..00000000000 --- a/arch/x86/boot/voyager.c +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * Get the Voyager config information - */ - -#include "boot.h" - -int query_voyager(void) -{ - u8 err; - u16 es, di; - /* Abuse the apm_bios_info area for this */ - u8 *data_ptr = (u8 *)&boot_params.apm_bios_info; - - data_ptr[0] = 0xff; /* Flag on config not found(?) */ - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=q" (err), "=r" (es), "=D" (di) - : "a" (0xffc0)); - - if (err) - return -1; /* Not Voyager */ - - set_fs(es); - copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */ - return 0; -} diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 096dd5359cd..5c023f6f652 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -197,7 +197,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2efb5d5063f..4157cc4a2bd 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -199,7 +199,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h deleted file mode 100644 index 9e5a459fd15..00000000000 --- a/arch/x86/include/asm/mach-voyager/do_timer.h +++ /dev/null @@ -1,17 +0,0 @@ -/* defines for inline arch setup functions */ -#include - -#include -#include - -/** - * do_timer_interrupt_hook - hook into timer tick - * - * Call the pit clock event handler. see asm/i8253.h - **/ -static inline void do_timer_interrupt_hook(void) -{ - global_clock_event->event_handler(global_clock_event); - voyager_timer_interrupt(); -} - diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h deleted file mode 100644 index ae52624b593..00000000000 --- a/arch/x86/include/asm/mach-voyager/entry_arch.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: James.Bottomley@HansenPartnership.com - * - * linux/arch/i386/voyager/entry_arch.h - * - * This file builds the VIC and QIC CPI gates - */ - -/* initialise the voyager interrupt gates - * - * This uses the macros in irq.h to set up assembly jump gates. The - * calls are then redirected to the same routine with smp_ prefixed */ -BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT) -BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT) -BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0); - -/* do all the QIC interrupts */ -BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI); -BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI); -BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI); -BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI); -BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI); -BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI); diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h deleted file mode 100644 index 71729ca05cd..00000000000 --- a/arch/x86/include/asm/mach-voyager/setup_arch.h +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include -#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \ - (&boot_params.apm_bios_info)) - -/* Hook to call BIOS initialisation function */ - -/* for voyager, pass the voyager BIOS/SUS info area to the detection - * routines */ - -#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO); - diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h deleted file mode 100644 index 53100f35361..00000000000 --- a/arch/x86/include/asm/vic.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager Interrupt Controller */ - -/* The eight CPI vectors. To activate a CPI, you write a bit mask - * corresponding to the processor set to be interrupted into the - * relevant register. That set of CPUs will then be interrupted with - * the CPI */ -static const int VIC_CPI_Registers[] = - {0xFC00, 0xFC01, 0xFC08, 0xFC09, - 0xFC10, 0xFC11, 0xFC18, 0xFC19 }; - -#define VIC_PROC_WHO_AM_I 0xfc29 -# define QUAD_IDENTIFIER 0xC0 -# define EIGHT_SLOT_IDENTIFIER 0xE0 -#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72 -#define VIC_CPI_BASE_REGISTER 0xFC41 -#define VIC_PROCESSOR_ID 0xFC21 -# define VIC_CPU_MASQUERADE_ENABLE 0x8 - -#define VIC_CLAIM_REGISTER_0 0xFC38 -#define VIC_CLAIM_REGISTER_1 0xFC39 -#define VIC_REDIRECT_REGISTER_0 0xFC60 -#define VIC_REDIRECT_REGISTER_1 0xFC61 -#define VIC_PRIORITY_REGISTER 0xFC20 - -#define VIC_PRIMARY_MC_BASE 0xFC48 -#define VIC_SECONDARY_MC_BASE 0xFC49 - -#define QIC_PROCESSOR_ID 0xFC71 -# define QIC_CPUID_ENABLE 0x08 - -#define QIC_VIC_CPI_BASE_REGISTER 0xFC79 -#define QIC_CPI_BASE_REGISTER 0xFC7A - -#define QIC_MASK_REGISTER0 0xFC80 -/* NOTE: these are masked high, enabled low */ -# define QIC_PERF_TIMER 0x01 -# define QIC_LPE 0x02 -# define QIC_SYS_INT 0x04 -# define QIC_CMN_INT 0x08 -/* at the moment, just enable CMN_INT, disable SYS_INT */ -# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */)) -#define QIC_MASK_REGISTER1 0xFC81 -# define QIC_BOOT_CPI_MASK 0xFE -/* Enable CPI's 1-6 inclusive */ -# define QIC_CPI_ENABLE 0x81 - -#define QIC_INTERRUPT_CLEAR0 0xFC8A -#define QIC_INTERRUPT_CLEAR1 0xFC8B - -/* this is where we place the CPI vectors */ -#define VIC_DEFAULT_CPI_BASE 0xC0 -/* this is where we place the QIC CPI vectors */ -#define QIC_DEFAULT_CPI_BASE 0xD0 - -#define VIC_BOOT_INTERRUPT_MASK 0xfe - -extern void smp_vic_timer_interrupt(void); diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h deleted file mode 100644 index c1635d43616..00000000000 --- a/arch/x86/include/asm/voyager.h +++ /dev/null @@ -1,571 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager system */ - -#undef VOYAGER_DEBUG -#undef VOYAGER_CAT_DEBUG - -#ifdef VOYAGER_DEBUG -#define VDEBUG(x) printk x -#else -#define VDEBUG(x) -#endif - -/* There are three levels of voyager machine: 3,4 and 5. The rule is - * if it's less than 3435 it's a Level 3 except for a 3360 which is - * a level 4. A 3435 or above is a Level 5 */ -#define VOYAGER_LEVEL5_AND_ABOVE 0x3435 -#define VOYAGER_LEVEL4 0x3360 - -/* The L4 DINO ASIC */ -#define VOYAGER_DINO 0x43 - -/* voyager ports in standard I/O space */ -#define VOYAGER_MC_SETUP 0x96 - - -#define VOYAGER_CAT_CONFIG_PORT 0x97 -# define VOYAGER_CAT_DESELECT 0xff -#define VOYAGER_SSPB_RELOCATION_PORT 0x98 - -/* Valid CAT controller commands */ -/* start instruction register cycle */ -#define VOYAGER_CAT_IRCYC 0x01 -/* start data register cycle */ -#define VOYAGER_CAT_DRCYC 0x02 -/* move to execute state */ -#define VOYAGER_CAT_RUN 0x0F -/* end operation */ -#define VOYAGER_CAT_END 0x80 -/* hold in idle state */ -#define VOYAGER_CAT_HOLD 0x90 -/* single step an "intest" vector */ -#define VOYAGER_CAT_STEP 0xE0 -/* return cat controller to CLEMSON mode */ -#define VOYAGER_CAT_CLEMSON 0xFF - -/* the default cat command header */ -#define VOYAGER_CAT_HEADER 0x7F - -/* the range of possible CAT module ids in the system */ -#define VOYAGER_MIN_MODULE 0x10 -#define VOYAGER_MAX_MODULE 0x1f - -/* The voyager registers per asic */ -#define VOYAGER_ASIC_ID_REG 0x00 -#define VOYAGER_ASIC_TYPE_REG 0x01 -/* the sub address registers can be made auto incrementing on reads */ -#define VOYAGER_AUTO_INC_REG 0x02 -# define VOYAGER_AUTO_INC 0x04 -# define VOYAGER_NO_AUTO_INC 0xfb -#define VOYAGER_SUBADDRDATA 0x03 -#define VOYAGER_SCANPATH 0x05 -# define VOYAGER_CONNECT_ASIC 0x01 -# define VOYAGER_DISCONNECT_ASIC 0xfe -#define VOYAGER_SUBADDRLO 0x06 -#define VOYAGER_SUBADDRHI 0x07 -#define VOYAGER_SUBMODSELECT 0x08 -#define VOYAGER_SUBMODPRESENT 0x09 - -#define VOYAGER_SUBADDR_LO 0xff -#define VOYAGER_SUBADDR_HI 0xffff - -/* the maximum size of a scan path -- used to form instructions */ -#define VOYAGER_MAX_SCAN_PATH 0x100 -/* the biggest possible register size (in bytes) */ -#define VOYAGER_MAX_REG_SIZE 4 - -/* Total number of possible modules (including submodules) */ -#define VOYAGER_MAX_MODULES 16 -/* Largest number of asics per module */ -#define VOYAGER_MAX_ASICS_PER_MODULE 7 - -/* the CAT asic of each module is always the first one */ -#define VOYAGER_CAT_ID 0 -#define VOYAGER_PSI 0x1a - -/* voyager instruction operations and registers */ -#define VOYAGER_READ_CONFIG 0x1 -#define VOYAGER_WRITE_CONFIG 0x2 -#define VOYAGER_BYPASS 0xff - -typedef struct voyager_asic { - __u8 asic_addr; /* ASIC address; Level 4 */ - __u8 asic_type; /* ASIC type */ - __u8 asic_id; /* ASIC id */ - __u8 jtag_id[4]; /* JTAG id */ - __u8 asic_location; /* Location within scan path; start w/ 0 */ - __u8 bit_location; /* Location within bit stream; start w/ 0 */ - __u8 ireg_length; /* Instruction register length */ - __u16 subaddr; /* Amount of sub address space */ - struct voyager_asic *next; /* Next asic in linked list */ -} voyager_asic_t; - -typedef struct voyager_module { - __u8 module_addr; /* Module address */ - __u8 scan_path_connected; /* Scan path connected */ - __u16 ee_size; /* Size of the EEPROM */ - __u16 num_asics; /* Number of Asics */ - __u16 inst_bits; /* Instruction bits in the scan path */ - __u16 largest_reg; /* Largest register in the scan path */ - __u16 smallest_reg; /* Smallest register in the scan path */ - voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */ - struct voyager_module *submodule; /* Submodule pointer */ - struct voyager_module *next; /* Next module in linked list */ -} voyager_module_t; - -typedef struct voyager_eeprom_hdr { - __u8 module_id[4]; - __u8 version_id; - __u8 config_id; - __u16 boundry_id; /* boundary scan id */ - __u16 ee_size; /* size of EEPROM */ - __u8 assembly[11]; /* assembly # */ - __u8 assembly_rev; /* assembly rev */ - __u8 tracer[4]; /* tracer number */ - __u16 assembly_cksum; /* asm checksum */ - __u16 power_consump; /* pwr requirements */ - __u16 num_asics; /* number of asics */ - __u16 bist_time; /* min. bist time */ - __u16 err_log_offset; /* error log offset */ - __u16 scan_path_offset;/* scan path offset */ - __u16 cct_offset; - __u16 log_length; /* length of err log */ - __u16 xsum_end; /* offset to end of - checksum */ - __u8 reserved[4]; - __u8 sflag; /* starting sentinal */ - __u8 part_number[13]; /* prom part number */ - __u8 version[10]; /* version number */ - __u8 signature[8]; - __u16 eeprom_chksum; - __u32 data_stamp_offset; - __u8 eflag ; /* ending sentinal */ -} __attribute__((packed)) voyager_eprom_hdr_t; - - - -#define VOYAGER_EPROM_SIZE_OFFSET \ - ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size))) -#define VOYAGER_XSUM_END_OFFSET 0x2a - -/* the following three definitions are for internal table layouts - * in the module EPROMs. We really only care about the IDs and - * offsets */ -typedef struct voyager_sp_table { - __u8 asic_id; - __u8 bypass_flag; - __u16 asic_data_offset; - __u16 config_data_offset; -} __attribute__((packed)) voyager_sp_table_t; - -typedef struct voyager_jtag_table { - __u8 icode[4]; - __u8 runbist[4]; - __u8 intest[4]; - __u8 samp_preld[4]; - __u8 ireg_len; -} __attribute__((packed)) voyager_jtt_t; - -typedef struct voyager_asic_data_table { - __u8 jtag_id[4]; - __u16 length_bsr; - __u16 length_bist_reg; - __u32 bist_clk; - __u16 subaddr_bits; - __u16 seed_bits; - __u16 sig_bits; - __u16 jtag_offset; -} __attribute__((packed)) voyager_at_t; - -/* Voyager Interrupt Controller (VIC) registers */ - -/* Base to add to Cross Processor Interrupts (CPIs) when triggering - * the CPU IRQ line */ -/* register defines for the WCBICs (one per processor) */ -#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */ -#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */ -#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */ -#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */ -#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */ -#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */ -#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */ -#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */ - - -/* top of memory registers */ -#define VOYAGER_WCBIC_TOM_L 0x4 -#define VOYAGER_WCBIC_TOM_H 0x5 - -/* register defines for Voyager Memory Contol (VMC) - * these are present on L4 machines only */ -#define VOYAGER_VMC1 0x81 -#define VOYAGER_VMC2 0x91 -#define VOYAGER_VMC3 0xa1 -#define VOYAGER_VMC4 0xb1 - -/* VMC Ports */ -#define VOYAGER_VMC_MEMORY_SETUP 0x9 -# define VMC_Interleaving 0x01 -# define VMC_4Way 0x02 -# define VMC_EvenCacheLines 0x04 -# define VMC_HighLine 0x08 -# define VMC_Start0_Enable 0x20 -# define VMC_Start1_Enable 0x40 -# define VMC_Vremap 0x80 -#define VOYAGER_VMC_BANK_DENSITY 0xa -# define VMC_BANK_EMPTY 0 -# define VMC_BANK_4MB 1 -# define VMC_BANK_16MB 2 -# define VMC_BANK_64MB 3 -# define VMC_BANK0_MASK 0x03 -# define VMC_BANK1_MASK 0x0C -# define VMC_BANK2_MASK 0x30 -# define VMC_BANK3_MASK 0xC0 - -/* Magellan Memory Controller (MMC) defines - present on L5 */ -#define VOYAGER_MMC_ASIC_ID 1 -/* the two memory modules corresponding to memory cards in the system */ -#define VOYAGER_MMC_MEMORY0_MODULE 0x14 -#define VOYAGER_MMC_MEMORY1_MODULE 0x15 -/* the Magellan Memory Address (MMA) defines */ -#define VOYAGER_MMA_ASIC_ID 2 - -/* Submodule number for the Quad Baseboard */ -#define VOYAGER_QUAD_BASEBOARD 1 - -/* ASIC defines for the Quad Baseboard */ -#define VOYAGER_QUAD_QDATA0 1 -#define VOYAGER_QUAD_QDATA1 2 -#define VOYAGER_QUAD_QABC 3 - -/* Useful areas in extended CMOS */ -#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a -#define VOYAGER_MEMORY_CLICKMAP 0xa23 -#define VOYAGER_DUMP_LOCATION 0xb1a - -/* SUS In Control bit - used to tell SUS that we don't need to be - * babysat anymore */ -#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff -# define VOYAGER_IN_CONTROL_FLAG 0x80 - -/* Voyager PSI defines */ -#define VOYAGER_PSI_STATUS_REG 0x08 -# define PSI_DC_FAIL 0x01 -# define PSI_MON 0x02 -# define PSI_FAULT 0x04 -# define PSI_ALARM 0x08 -# define PSI_CURRENT 0x10 -# define PSI_DVM 0x20 -# define PSI_PSCFAULT 0x40 -# define PSI_STAT_CHG 0x80 - -#define VOYAGER_PSI_SUPPLY_REG 0x8000 - /* read */ -# define PSI_FAIL_DC 0x01 -# define PSI_FAIL_AC 0x02 -# define PSI_MON_INT 0x04 -# define PSI_SWITCH_OFF 0x08 -# define PSI_HX_OFF 0x10 -# define PSI_SECURITY 0x20 -# define PSI_CMOS_BATT_LOW 0x40 -# define PSI_CMOS_BATT_FAIL 0x80 - /* write */ -# define PSI_CLR_SWITCH_OFF 0x13 -# define PSI_CLR_HX_OFF 0x14 -# define PSI_CLR_CMOS_BATT_FAIL 0x17 - -#define VOYAGER_PSI_MASK 0x8001 -# define PSI_MASK_MASK 0x10 - -#define VOYAGER_PSI_AC_FAIL_REG 0x8004 -#define AC_FAIL_STAT_CHANGE 0x80 - -#define VOYAGER_PSI_GENERAL_REG 0x8007 - /* read */ -# define PSI_SWITCH_ON 0x01 -# define PSI_SWITCH_ENABLED 0x02 -# define PSI_ALARM_ENABLED 0x08 -# define PSI_SECURE_ENABLED 0x10 -# define PSI_COLD_RESET 0x20 -# define PSI_COLD_START 0x80 - /* write */ -# define PSI_POWER_DOWN 0x10 -# define PSI_SWITCH_DISABLE 0x01 -# define PSI_SWITCH_ENABLE 0x11 -# define PSI_CLEAR 0x12 -# define PSI_ALARM_DISABLE 0x03 -# define PSI_ALARM_ENABLE 0x13 -# define PSI_CLEAR_COLD_RESET 0x05 -# define PSI_SET_COLD_RESET 0x15 -# define PSI_CLEAR_COLD_START 0x07 -# define PSI_SET_COLD_START 0x17 - - - -struct voyager_bios_info { - __u8 len; - __u8 major; - __u8 minor; - __u8 debug; - __u8 num_classes; - __u8 class_1; - __u8 class_2; -}; - -/* The following structures and definitions are for the Kernel/SUS - * interface these are needed to find out how SUS initialised any Quad - * boards in the system */ - -#define NUMBER_OF_MC_BUSSES 2 -#define SLOTS_PER_MC_BUS 8 -#define MAX_CPUS 16 /* 16 way CPU system */ -#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */ -#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */ -#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */ -#define NUMBER_OF_POS_REGS 8 - -typedef struct { - __u8 MC_Slot; - __u8 POS_Values[NUMBER_OF_POS_REGS]; -} __attribute__((packed)) MC_SlotInformation_t; - -struct QuadDescription { - __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields - * will be zero except for slot */ - __u8 StructureVersion; - __u32 CPI_BaseAddress; - __u32 LARC_BankSize; - __u32 LocalMemoryStateBits; - __u8 Slot; /* Processor slots 1 - 4 */ -} __attribute__((packed)); - -struct ProcBoardInfo { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOfBoards; - struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS]; -} __attribute__((packed)); - -struct CacheDescription { - __u8 Level; - __u32 TotalSize; - __u16 LineSize; - __u8 Associativity; - __u8 CacheType; - __u8 WriteType; - __u8 Number_CPUs_SharedBy; - __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS]; - -} __attribute__((packed)); - -struct CPU_Description { - __u8 CPU_HardwareId; - char *FRU_String; - __u8 NumberOfCacheLevels; - struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS]; -} __attribute__((packed)); - -struct CPU_Info { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOf_CPUs; - struct CPU_Description CPU_Data[MAX_CPUS]; -} __attribute__((packed)); - - -/* - * This structure will be used by SUS and the OS. - * The assumption about this structure is that no blank space is - * packed in it by our friend the compiler. - */ -typedef struct { - __u8 Mailbox_SUS; /* Written to by SUS to give - commands/response to the OS */ - __u8 Mailbox_OS; /* Written to by the OS to give - commands/response to SUS */ - __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the - interface SUS supports */ - __u8 OS_MailboxVersion; /* Tells SUS which iteration of the - interface the OS supports */ - __u32 OS_Flags; /* Flags set by the OS as info for - SUS */ - __u32 SUS_Flags; /* Flags set by SUS as info - for the OS */ - __u32 WatchDogPeriod; /* Watchdog period (in seconds) which - the DP uses to see if the OS - is dead */ - __u32 WatchDogCount; /* Updated by the OS on every tic. */ - __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS - where to stuff the SUS error log - on a dump */ - MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS]; - /* Storage for MCA POS data */ - /* All new SECOND_PASS_INTERFACE fields added from this point */ - struct ProcBoardInfo *BoardData; - struct CPU_Info *CPU_Data; - /* All new fields must be added from this point */ -} Voyager_KernelSUS_Mbox_t; - -/* structure for finding the right memory address to send a QIC CPI to */ -struct voyager_qic_cpi { - /* Each cache line (32 bytes) can trigger a cpi. The cpi - * read/write may occur anywhere in the cache line---pick the - * middle to be safe */ - struct { - __u32 pad1[3]; - __u32 cpi; - __u32 pad2[4]; - } qic_cpi[8]; -}; - -struct voyager_status { - __u32 power_fail:1; - __u32 switch_off:1; - __u32 request_from_kernel:1; -}; - -struct voyager_psi_regs { - __u8 cat_id; - __u8 cat_dev; - __u8 cat_control; - __u8 subaddr; - __u8 dummy4; - __u8 checkbit; - __u8 subaddr_low; - __u8 subaddr_high; - __u8 intstatus; - __u8 stat1; - __u8 stat3; - __u8 fault; - __u8 tms; - __u8 gen; - __u8 sysconf; - __u8 dummy15; -}; - -struct voyager_psi_subregs { - __u8 supply; - __u8 mask; - __u8 present; - __u8 DCfail; - __u8 ACfail; - __u8 fail; - __u8 UPSfail; - __u8 genstatus; -}; - -struct voyager_psi { - struct voyager_psi_regs regs; - struct voyager_psi_subregs subregs; -}; - -struct voyager_SUS { -#define VOYAGER_DUMP_BUTTON_NMI 0x1 -#define VOYAGER_SUS_VALID 0x2 -#define VOYAGER_SYSINT_COMPLETE 0x3 - __u8 SUS_mbox; -#define VOYAGER_NO_COMMAND 0x0 -#define VOYAGER_IGNORE_DUMP 0x1 -#define VOYAGER_DO_DUMP 0x2 -#define VOYAGER_SYSINT_HANDSHAKE 0x3 -#define VOYAGER_DO_MEM_DUMP 0x4 -#define VOYAGER_SYSINT_WAS_RECOVERED 0x5 - __u8 kernel_mbox; -#define VOYAGER_MAILBOX_VERSION 0x10 - __u8 SUS_version; - __u8 kernel_version; -#define VOYAGER_OS_HAS_SYSINT 0x1 -#define VOYAGER_OS_IN_PROGRESS 0x2 -#define VOYAGER_UPDATING_WDPERIOD 0x4 - __u32 kernel_flags; -#define VOYAGER_SUS_BOOTING 0x1 -#define VOYAGER_SUS_IN_PROGRESS 0x2 - __u32 SUS_flags; - __u32 watchdog_period; - __u32 watchdog_count; - __u32 SUS_errorlog; - /* lots of system configuration stuff under here */ -}; - -/* Variables exported by voyager_smp */ -extern __u32 voyager_extended_vic_processors; -extern __u32 voyager_allowed_boot_processors; -extern __u32 voyager_quad_processors; -extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS]; -extern struct voyager_SUS *voyager_SUS; - -/* variables exported always */ -extern struct task_struct *voyager_thread; -extern int voyager_level; -extern struct voyager_status voyager_status; - -/* functions exported by the voyager and voyager_smp modules */ -extern int voyager_cat_readb(__u8 module, __u8 asic, int reg); -extern void voyager_cat_init(void); -extern void voyager_detect(struct voyager_bios_info *); -extern void voyager_trap_init(void); -extern void voyager_setup_irqs(void); -extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length); -extern void voyager_smp_intr_init(void); -extern __u8 voyager_extended_cmos_read(__u16 cmos_address); -extern void voyager_smp_dump(void); -extern void voyager_timer_interrupt(void); -extern void smp_local_timer_interrupt(void); -extern void voyager_power_off(void); -extern void smp_voyager_power_off(void *dummy); -extern void voyager_restart(void); -extern void voyager_cat_power_off(void); -extern void voyager_cat_do_common_interrupt(void); -extern void voyager_handle_nmi(void); -extern void voyager_smp_intr_init(void); -/* Commands for the following are */ -#define VOYAGER_PSI_READ 0 -#define VOYAGER_PSI_WRITE 1 -#define VOYAGER_PSI_SUBREAD 2 -#define VOYAGER_PSI_SUBWRITE 3 -extern void voyager_cat_psi(__u8, __u16, __u8 *); - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index c70e12b1a63..8dab8f7844d 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -3,7 +3,6 @@ config LGUEST_GUEST select PARAVIRT depends on X86_32 depends on !X86_PAE - depends on !X86_VOYAGER select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile deleted file mode 100644 index 15c250b371d..00000000000 --- a/arch/x86/mach-voyager/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux kernel. -# - -EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := setup.o voyager_basic.o voyager_thread.o - -obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c deleted file mode 100644 index 88c3c555634..00000000000 --- a/arch/x86/mach-voyager/setup.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -void __init pre_intr_init_hook(void) -{ - init_ISA_irqs(); -} - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - -void __init intr_init_hook(void) -{ -#ifdef CONFIG_SMP - voyager_smp_intr_init(); -#endif - - setup_irq(2, &irq2); -} - -static void voyager_disable_tsc(void) -{ - /* Voyagers run their CPUs from independent clocks, so disable - * the TSC code because we can't sync them */ - setup_clear_cpu_cap(X86_FEATURE_TSC); -} - -void __init pre_setup_arch_hook(void) -{ - voyager_disable_tsc(); -} - -void __init pre_time_init_hook(void) -{ - voyager_disable_tsc(); -} - -void __init trap_init_hook(void) -{ -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -void __init time_init_hook(void) -{ - irq0.mask = cpumask_of_cpu(safe_smp_processor_id()); - setup_irq(0, &irq0); -} - -/* Hook for machine specific memory setup. */ - -char *__init machine_specific_memory_setup(void) -{ - char *who; - int new_nr; - - who = "NOT VOYAGER"; - - if (voyager_level == 5) { - __u32 addr, length; - int i; - - who = "Voyager-SUS"; - - e820.nr_map = 0; - for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { - e820_add_region(addr, length, E820_RAM); - } - return who; - } else if (voyager_level == 4) { - __u32 tom; - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - /* select the DINO config space */ - outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT); - /* Read DINO top of memory register */ - tom = ((inb(catbase + 0x4) & 0xf0) << 16) - + ((inb(catbase + 0x5) & 0x7f) << 24); - - if (inb(catbase) != VOYAGER_DINO) { - printk(KERN_ERR - "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); - tom = (boot_params.screen_info.ext_mem_k) << 10; - } - who = "Voyager-TOM"; - e820_add_region(0, 0x9f000, E820_RAM); - /* map from 1M to top of memory */ - e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, - E820_RAM); - /* FIXME: Should check the ASICs to see if I need to - * take out the 8M window. Just do it at the moment - * */ - e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, - E820_RESERVED); - return who; - } - - return default_machine_specific_memory_setup(); -} diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c deleted file mode 100644 index 46d6f806769..00000000000 --- a/arch/x86/mach-voyager/voyager_basic.c +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the voyager specific routines for getting - * initialisation of the architecture to function. For additional - * features see: - * - * voyager_cat.c - Voyager CAT bus interface - * voyager_smp.c - Voyager SMP hal (emulates linux smp.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Power off function, if any - */ -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -int voyager_level = 0; - -struct voyager_SUS *voyager_SUS = NULL; - -#ifdef CONFIG_SMP -static void voyager_dump(int dummy1, struct tty_struct *dummy3) -{ - /* get here via a sysrq */ - voyager_smp_dump(); -} - -static struct sysrq_key_op sysrq_voyager_dump_op = { - .handler = voyager_dump, - .help_msg = "Voyager", - .action_msg = "Dump Voyager Status", -}; -#endif - -void voyager_detect(struct voyager_bios_info *bios) -{ - if (bios->len != 0xff) { - int class = (bios->class_1 << 8) - | (bios->class_2 & 0xff); - - printk("Voyager System detected.\n" - " Class %x, Revision %d.%d\n", - class, bios->major, bios->minor); - if (class == VOYAGER_LEVEL4) - voyager_level = 4; - else if (class < VOYAGER_LEVEL5_AND_ABOVE) - voyager_level = 3; - else - voyager_level = 5; - printk(" Architecture Level %d\n", voyager_level); - if (voyager_level < 4) - printk - ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n"); - /* install the power off handler */ - pm_power_off = voyager_power_off; -#ifdef CONFIG_SMP - register_sysrq_key('v', &sysrq_voyager_dump_op); -#endif - } else { - printk("\n\n**WARNING**: No Voyager Subsystem Found\n"); - } -} - -void voyager_system_interrupt(int cpl, void *dev_id) -{ - printk("Voyager: detected system interrupt\n"); -} - -/* Routine to read information from the extended CMOS area */ -__u8 voyager_extended_cmos_read(__u16 addr) -{ - outb(addr & 0xff, 0x74); - outb((addr >> 8) & 0xff, 0x75); - return inb(0x76); -} - -/* internal definitions for the SUS Click Map of memory */ - -#define CLICK_ENTRIES 16 -#define CLICK_SIZE 4096 /* click to byte conversion for Length */ - -typedef struct ClickMap { - struct Entry { - __u32 Address; - __u32 Length; - } Entry[CLICK_ENTRIES]; -} ClickMap_t; - -/* This routine is pretty much an awful hack to read the bios clickmap by - * mapping it into page 0. There are usually three regions in the map: - * Base Memory - * Extended Memory - * zero length marker for end of map - * - * Returns are 0 for failure and 1 for success on extracting region. - */ -int __init voyager_memory_detect(int region, __u32 * start, __u32 * length) -{ - int i; - int retval = 0; - __u8 cmos[4]; - ClickMap_t *map; - unsigned long map_addr; - unsigned long old; - - if (region >= CLICK_ENTRIES) { - printk("Voyager: Illegal ClickMap region %d\n", region); - return 0; - } - - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = - voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i); - - map_addr = *(unsigned long *)cmos; - - /* steal page 0 for this */ - old = pg0[0]; - pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); - local_flush_tlb(); - /* now clear everything out but page 0 */ - map = (ClickMap_t *) (map_addr & (~PAGE_MASK)); - - /* zero length is the end of the clickmap */ - if (map->Entry[region].Length != 0) { - *length = map->Entry[region].Length * CLICK_SIZE; - *start = map->Entry[region].Address; - retval = 1; - } - - /* replace the mapping */ - pg0[0] = old; - local_flush_tlb(); - return retval; -} - -/* voyager specific handling code for timer interrupts. Used to hand - * off the timer tick to the SMP code, since the VIC doesn't have an - * internal timer (The QIC does, but that's another story). */ -void voyager_timer_interrupt(void) -{ - if ((jiffies & 0x3ff) == 0) { - - /* There seems to be something flaky in either - * hardware or software that is resetting the timer 0 - * count to something much higher than it should be - * This seems to occur in the boot sequence, just - * before root is mounted. Therefore, every 10 - * seconds or so, we sanity check the timer zero count - * and kick it back to where it should be. - * - * FIXME: This is the most awful hack yet seen. I - * should work out exactly what is interfering with - * the timer count settings early in the boot sequence - * and swiftly introduce it to something sharp and - * pointy. */ - __u16 val; - - spin_lock(&i8253_lock); - - outb_p(0x00, 0x43); - val = inb_p(0x40); - val |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - - if (val > LATCH) { - printk - ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", - val); - spin_lock(&i8253_lock); - outb(0x34, 0x43); - outb_p(LATCH & 0xff, 0x40); /* LSB */ - outb(LATCH >> 8, 0x40); /* MSB */ - spin_unlock(&i8253_lock); - } - } -#ifdef CONFIG_SMP - smp_vic_timer_interrupt(); -#endif -} - -void voyager_power_off(void) -{ - printk("VOYAGER Power Off\n"); - - if (voyager_level == 5) { - voyager_cat_power_off(); - } else if (voyager_level == 4) { - /* This doesn't apparently work on most L4 machines, - * but the specs say to do this to get automatic power - * off. Unfortunately, if it doesn't power off the - * machine, it ends up doing a cold restart, which - * isn't really intended, so comment out the code */ -#if 0 - int port; - - /* enable the voyager Configuration Space */ - outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP); - /* the port for the power off flag is an offset from the - floating base */ - port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21; - /* set the power off flag */ - outb(inb(port) | 0x1, port); -#endif - } - /* and wait for it to happen */ - local_irq_disable(); - for (;;) - halt(); -} - -/* copied from process.c */ -static inline void kb_wait(void) -{ - int i; - - for (i = 0; i < 0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; -} - -void machine_shutdown(void) -{ - /* Architecture specific shutdown needed before a kexec */ -} - -void machine_restart(char *cmd) -{ - printk("Voyager Warm Restart\n"); - kb_wait(); - - if (voyager_level == 5) { - /* write magic values to the RTC to inform system that - * shutdown is beginning */ - outb(0x8f, 0x70); - outb(0x5, 0x71); - - udelay(50); - outb(0xfe, 0x64); /* pull reset low */ - } else if (voyager_level == 4) { - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - __u8 basebd = inb(VOYAGER_MC_SETUP); - - outb(basebd | 0x08, VOYAGER_MC_SETUP); - outb(0x02, catbase + 0x21); - } - local_irq_disable(); - for (;;) - halt(); -} - -void machine_emergency_restart(void) -{ - /*for now, just hook this to a warm restart */ - machine_restart(NULL); -} - -void mca_nmi_hook(void) -{ - __u8 dumpval __maybe_unused = inb(0xf823); - __u8 swnmi __maybe_unused = inb(0xf813); - - /* FIXME: assume dump switch pressed */ - /* check to see if the dump switch was pressed */ - VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi)); - /* clear swnmi */ - outb(0xff, 0xf813); - /* tell SUS to ignore dump */ - if (voyager_level == 5 && voyager_SUS != NULL) { - if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) { - voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND; - voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS; - udelay(1000); - voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP; - voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS; - } - } - printk(KERN_ERR - "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", - smp_processor_id()); - show_stack(NULL, NULL); - show_state(); -} - -void machine_halt(void) -{ - /* treat a halt like a power off */ - machine_power_off(); -} - -void machine_power_off(void) -{ - if (pm_power_off) - pm_power_off(); -} diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c deleted file mode 100644 index 2ad598c104a..00000000000 --- a/arch/x86/mach-voyager/voyager_cat.c +++ /dev/null @@ -1,1197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the logic for manipulating the CAT bus - * in a level 5 machine. - * - * The CAT bus is a serial configuration and test bus. Its primary - * uses are to probe the initial configuration of the system and to - * diagnose error conditions when a system interrupt occurs. The low - * level interface is fairly primitive, so most of this file consists - * of bit shift manipulations to send and receive packets on the - * serial bus */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef VOYAGER_CAT_DEBUG -#define CDEBUG(x) printk x -#else -#define CDEBUG(x) -#endif - -/* the CAT command port */ -#define CAT_CMD (sspb + 0xe) -/* the CAT data port */ -#define CAT_DATA (sspb + 0xd) - -/* the internal cat functions */ -static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits); -static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data, - __u16 num_bits); -static void cat_build_header(__u8 * header, const __u16 len, - const __u16 smallest_reg_bits, - const __u16 longest_reg_bits); -static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 op); -static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 * value); -static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, - __u8 pad_bits); -static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 value); -static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value); -static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, - __u16 offset, __u16 len, void *buf); -static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value); -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp); -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp); - -static inline const char *cat_module_name(int module_id) -{ - switch (module_id) { - case 0x10: - return "Processor Slot 0"; - case 0x11: - return "Processor Slot 1"; - case 0x12: - return "Processor Slot 2"; - case 0x13: - return "Processor Slot 4"; - case 0x14: - return "Memory Slot 0"; - case 0x15: - return "Memory Slot 1"; - case 0x18: - return "Primary Microchannel"; - case 0x19: - return "Secondary Microchannel"; - case 0x1a: - return "Power Supply Interface"; - case 0x1c: - return "Processor Slot 5"; - case 0x1d: - return "Processor Slot 6"; - case 0x1e: - return "Processor Slot 7"; - case 0x1f: - return "Processor Slot 8"; - default: - return "Unknown Module"; - } -} - -static int sspb = 0; /* stores the super port location */ -int voyager_8slot = 0; /* set to true if a 51xx monster */ - -voyager_module_t *voyager_cat_list; - -/* the I/O port assignments for the VIC and QIC */ -static struct resource vic_res = { - .name = "Voyager Interrupt Controller", - .start = 0xFC00, - .end = 0xFC6F -}; -static struct resource qic_res = { - .name = "Quad Interrupt Controller", - .start = 0xFC70, - .end = 0xFCFF -}; - -/* This function is used to pack a data bit stream inside a message. - * It writes num_bits of the data buffer in msg starting at start_bit. - * Note: This function assumes that any unused bit in the data stream - * is set to zero so that the ors will work correctly */ -static void -cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - __u16 residue = (num_bits % BITS_PER_BYTE) + offset; - int i; - - /* adjust if we have more than a byte of residue */ - if (residue >= BITS_PER_BYTE) { - residue -= BITS_PER_BYTE; - len++; - } - - /* clear out the bits. We assume here that if len==0 then - * residue >= offset. This is always true for the catbus - * operations */ - msg[byte] &= 0xff << (BITS_PER_BYTE - offset); - msg[byte++] |= data[0] >> offset; - if (len == 0) - return; - for (i = 1; i < len; i++) - msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset)) - | (data[i] >> offset); - if (residue != 0) { - __u8 mask = 0xff >> residue; - __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset) - | (data[i] >> offset); - - last_byte &= ~mask; - msg[byte] &= mask; - msg[byte] |= last_byte; - } - return; -} - -/* unpack the data again (same arguments as cat_pack()). data buffer - * must be zero populated. - * - * Function: given a message string move to start_bit and copy num_bits into - * data (starting at bit 0 in data). - */ -static void -cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - const __u8 last_bits = num_bits % BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - int i; - - if (last_bits != 0) - len++; - - /* special case: want < 8 bits from msg and we can get it from - * a single byte of the msg */ - if (len == 0 && BITS_PER_BYTE - offset >= num_bits) { - data[0] = msg[byte] << offset; - data[0] &= 0xff >> (BITS_PER_BYTE - num_bits); - return; - } - for (i = 0; i < len; i++) { - /* this annoying if has to be done just in case a read of - * msg one beyond the array causes a panic */ - if (offset != 0) { - data[i] = msg[byte++] << offset; - data[i] |= msg[byte] >> (BITS_PER_BYTE - offset); - } else { - data[i] = msg[byte++]; - } - } - /* do we need to truncate the final byte */ - if (last_bits != 0) { - data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits); - } - return; -} - -static void -cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits, - const __u16 longest_reg_bits) -{ - int i; - __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE; - __u8 *last_byte = &header[len - 1]; - - if (start_bit == 0) - start_bit = 1; /* must have at least one bit in the hdr */ - - for (i = 0; i < len; i++) - header[i] = 0; - - for (i = start_bit; i > 0; i--) - *last_byte = ((*last_byte) << 1) + 1; - -} - -static int -cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op) -{ - __u8 parity, inst, inst_buf[4] = { 0 }; - __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE]; - __u16 ibytes, hbytes, padbits; - int i; - - /* - * Parity is the parity of the register number + 1 (READ_REGISTER - * and WRITE_REGISTER always add '1' to the number of bits == 1) - */ - parity = (__u8) (1 + (reg & 0x01) + - ((__u8) (reg & 0x02) >> 1) + - ((__u8) (reg & 0x04) >> 2) + - ((__u8) (reg & 0x08) >> 3)) % 2; - - inst = ((parity << 7) | (reg << 2) | op); - - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - printk - ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n"); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(inst, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n")); - return 1; - } - return 0; - } - ibytes = modp->inst_bits / BITS_PER_BYTE; - if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - ibytes++; - } - hbytes = modp->largest_reg / BITS_PER_BYTE; - if (modp->largest_reg % BITS_PER_BYTE) - hbytes++; - CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes)); - /* initialise the instruction sequence to 0xff */ - for (i = 0; i < ibytes + hbytes; i++) - iseq[i] = 0xff; - cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg); - cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE); - inst_buf[0] = inst; - inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE); - cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("ins = 0x%x, iseq: ", inst); - for (i = 0; i < ibytes + hbytes; i++) - printk("0x%x ", iseq[i]); - printk("\n"); -#endif - if (cat_shiftout(iseq, ibytes, hbytes, padbits)) { - CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n")); - return 1; - } - CDEBUG(("CAT SHIFTOUT DONE\n")); - return 0; -} - -static int -cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - *value = inb(CAT_DATA); - outb(0xAA, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n")); - return 1; - } - return 0; - } else { - __u16 sbits = modp->num_asics - 1 + asicp->ireg_length; - __u16 sbytes = sbits / BITS_PER_BYTE; - __u16 tbytes; - __u8 string[VOYAGER_MAX_SCAN_PATH], - trailer[VOYAGER_MAX_REG_SIZE]; - __u8 padbits; - int i; - - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - - if ((padbits = sbits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - sbytes++; - } - tbytes = asicp->ireg_length / BITS_PER_BYTE; - if (asicp->ireg_length % BITS_PER_BYTE) - tbytes++; - CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n", - tbytes, sbytes, padbits)); - cat_build_header(trailer, tbytes, 1, asicp->ireg_length); - - for (i = tbytes - 1; i >= 0; i--) { - outb(trailer[i], CAT_DATA); - string[sbytes + i] = inb(CAT_DATA); - } - - for (i = sbytes - 1; i >= 0; i--) { - outb(0xaa, CAT_DATA); - string[i] = inb(CAT_DATA); - } - *value = 0; - cat_unpack(string, - padbits + (tbytes * BITS_PER_BYTE) + - asicp->asic_location, value, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("value=0x%x, string: ", *value); - for (i = 0; i < tbytes + sbytes; i++) - printk("0x%x ", string[i]); - printk("\n"); -#endif - - /* sanity check the rest of the return */ - for (i = 0; i < tbytes; i++) { - __u8 input = 0; - - cat_unpack(string, padbits + (i * BITS_PER_BYTE), - &input, BITS_PER_BYTE); - if (trailer[i] != input) { - CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i])); - return 1; - } - } - CDEBUG(("cat_getdata DONE\n")); - return 0; - } -} - -static int -cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits) -{ - int i; - - for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--) - outb(data[i], CAT_DATA); - - for (i = header_bytes - 1; i >= 0; i--) { - __u8 header = 0; - __u8 input; - - outb(data[i], CAT_DATA); - input = inb(CAT_DATA); - CDEBUG(("cat_shiftout: returned 0x%x\n", input)); - cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits, - &header, BITS_PER_BYTE); - if (input != header) { - CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header)); - return 1; - } - } - return 0; -} - -static int -cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value) -{ - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n")); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(value, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_senddata: failed to get correct header response to sent data\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) { - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - } - - return 0; - } else { - __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE; - __u16 dbytes = - (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE; - __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], - hseq[VOYAGER_MAX_REG_SIZE]; - int i; - - if ((padbits = (modp->num_asics - 1 - + asicp->ireg_length) % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - dbytes++; - } - if (asicp->ireg_length % BITS_PER_BYTE) - hbytes++; - - cat_build_header(hseq, hbytes, 1, asicp->ireg_length); - - for (i = 0; i < dbytes + hbytes; i++) - dseq[i] = 0xff; - CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n", - dbytes, hbytes, padbits)); - cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length, - hseq, hbytes * BITS_PER_BYTE); - cat_pack(dseq, asicp->asic_location, &value, - asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("dseq "); - for (i = 0; i < hbytes + dbytes; i++) { - printk("0x%x ", dseq[i]); - } - printk("\n"); -#endif - return cat_shiftout(dseq, dbytes, hbytes, padbits); - } -} - -static int -cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG)) - return 1; - return cat_senddata(modp, asicp, reg, value); -} - -static int -cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG)) - return 1; - return cat_getdata(modp, asicp, reg, value); -} - -static int -cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len) -{ - __u8 val; - - if (len > 1) { - /* set auto increment */ - __u8 newval; - - if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) { - CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", - val)); - newval = val | VOYAGER_AUTO_INC; - if (newval != val) { - if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) { - CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - } - } - if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n")); - return 1; - } - if (asicp->subaddr > VOYAGER_SUBADDR_LO) { - if (cat_write - (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n")); - return 1; - } - cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, - val)); - } - cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val)); - return 0; -} - -static int -cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - /* FIXME: need special actions for VOYAGER_CAT_ID here */ - if (asicp->asic_id == VOYAGER_CAT_ID) { - CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n")); - /* FIXME -- This is supposed to be handled better - * There is a problem writing to the cat asic in the - * PSI. The 30us delay seems to work, though */ - udelay(30); - } - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - printk("cat_subwrite: cat_subaddrsetup FAILED\n"); - return retval; - } - - if (cat_sendinst - (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) { - printk("cat_subwrite: cat_sendinst FAILED\n"); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) { - printk - ("cat_subwrite: cat_sendata element at %d FAILED\n", - i); - return 1; - } - } - return 0; -} -static int -cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n")); - return retval; - } - - if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) { - CDEBUG(("cat_subread: cat_sendinst failed\n")); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) { - CDEBUG(("cat_subread: cat_getdata element %d failed\n", - i)); - return 1; - } - } - return 0; -} - -/* buffer for storing EPROM data read in during initialisation */ -static __initdata __u8 eprom_buf[0xFFFF]; -static voyager_module_t *voyager_initial_module; - -/* Initialise the cat bus components. We assume this is called by the - * boot cpu *after* all memory initialisation has been done (so we can - * use kmalloc) but before smp initialisation, so we can probe the SMP - * configuration and pick up necessary information. */ -void __init voyager_cat_init(void) -{ - voyager_module_t **modpp = &voyager_initial_module; - voyager_asic_t **asicpp; - voyager_asic_t *qabc_asic = NULL; - int i, j; - unsigned long qic_addr = 0; - __u8 qabc_data[0x20]; - __u8 num_submodules, val; - voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0]; - - __u8 cmos[4]; - unsigned long addr; - - /* initiallise the SUS mailbox */ - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i); - addr = *(unsigned long *)cmos; - if ((addr & 0xff000000) != 0xff000000) { - printk(KERN_ERR - "Voyager failed to get SUS mailbox (addr = 0x%lx\n", - addr); - } else { - static struct resource res; - - res.name = "voyager SUS"; - res.start = addr; - res.end = addr + 0x3ff; - - request_resource(&iomem_resource, &res); - voyager_SUS = (struct voyager_SUS *) - ioremap(addr, 0x400); - printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n", - voyager_SUS->SUS_version); - voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION; - voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT; - } - - /* clear the processor counts */ - voyager_extended_vic_processors = 0; - voyager_quad_processors = 0; - - printk("VOYAGER: beginning CAT bus probe\n"); - /* set up the SuperSet Port Block which tells us where the - * CAT communication port is */ - sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100; - VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb)); - - /* now find out if were 8 slot or normal */ - if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER) - == EIGHT_SLOT_IDENTIFIER) { - voyager_8slot = 1; - printk(KERN_NOTICE - "Voyager: Eight slot 51xx configuration detected\n"); - } - - for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) { - __u8 input; - int asic; - __u16 eprom_size; - __u16 sp_offset; - - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(i, VOYAGER_CAT_CONFIG_PORT); - - /* check the presence of the module */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - /* stream series of alternating 1's and 0's to stimulate - * response */ - outb(0xAA, CAT_DATA); - input = inb(CAT_DATA); - outb(VOYAGER_CAT_END, CAT_CMD); - if (input != VOYAGER_CAT_HEADER) { - continue; - } - CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i, - cat_module_name(i))); - *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */ - if (*modpp == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset(*modpp, 0, sizeof(voyager_module_t)); - /* need temporary asic for cat_subread. It will be - * filled in correctly later */ - (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */ - if ((*modpp)->asic == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset((*modpp)->asic, 0, sizeof(voyager_asic_t)); - (*modpp)->asic->asic_id = VOYAGER_CAT_ID; - (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI; - (*modpp)->module_addr = i; - (*modpp)->scan_path_connected = 0; - if (i == VOYAGER_PSI) { - /* Exception leg for modules with no EEPROM */ - printk("Module \"%s\"\n", cat_module_name(i)); - continue; - } - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n", - cat_module_name(i), eprom_hdr->version_id, - *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics); - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* All we really care about are the Quad cards. We - * identify them because they are in a processor slot - * and have only four asics */ - if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) { - modpp = &((*modpp)->next); - continue; - } - /* Now we know it's in a processor slot, does it have - * a quad baseboard submodule */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT, - &num_submodules); - /* lowest two bits, active low */ - num_submodules = ~(0xfc | num_submodules); - CDEBUG(("VOYAGER CAT: %d submodules present\n", - num_submodules)); - if (num_submodules == 0) { - /* fill in the dyadic extended processors */ - __u8 cpu = i & 0x07; - - printk("Module \"%s\": Dyadic Processor Card\n", - cat_module_name(i)); - voyager_extended_vic_processors |= (1 << cpu); - cpu += 4; - voyager_extended_vic_processors |= (1 << cpu); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - - /* now we want to read the asics on the first submodule, - * which should be the quad base board */ - - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val); - CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val)); - val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD; - cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val); - - outb(VOYAGER_CAT_END, CAT_CMD); - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - /* Now do everything for the QBB submodule 1 */ - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* get rid of the dummy CAT asic and read the real one */ - kfree((*modpp)->asic); - for (asic = 0; asic < (*modpp)->num_asics; asic++) { - int j; - voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */ - voyager_sp_table_t *sp_table; - voyager_at_t *asic_table; - voyager_jtt_t *jtag_table; - - if (asicp == NULL) { - printk - ("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - asicpp = &(asicp->next); - asicp->asic_location = asic; - sp_table = - (voyager_sp_table_t *) (eprom_buf + sp_offset); - asicp->asic_id = sp_table->asic_id; - asic_table = - (voyager_at_t *) (eprom_buf + - sp_table->asic_data_offset); - for (j = 0; j < 4; j++) - asicp->jtag_id[j] = asic_table->jtag_id[j]; - jtag_table = - (voyager_jtt_t *) (eprom_buf + - asic_table->jtag_offset); - asicp->ireg_length = jtag_table->ireg_len; - asicp->bit_location = (*modpp)->inst_bits; - (*modpp)->inst_bits += asicp->ireg_length; - if (asicp->ireg_length > (*modpp)->largest_reg) - (*modpp)->largest_reg = asicp->ireg_length; - if (asicp->ireg_length < (*modpp)->smallest_reg || - (*modpp)->smallest_reg == 0) - (*modpp)->smallest_reg = asicp->ireg_length; - CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n", - asicp->asic_id, asicp->ireg_length, - asicp->bit_location)); - if (asicp->asic_id == VOYAGER_QUAD_QABC) { - CDEBUG(("VOYAGER CAT: QABC ASIC found\n")); - qabc_asic = asicp; - } - sp_offset += sizeof(voyager_sp_table_t); - } - CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg)); - /* OK, now we have the QUAD ASICs set up, use them. - * we need to: - * - * 1. Find the Memory area for the Quad CPIs. - * 2. Find the Extended VIC processor - * 3. Configure a second extended VIC processor (This - * cannot be done for the 51xx. - * */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_connect(*modpp, (*modpp)->asic); - CDEBUG(("CAT CONNECTED!!\n")); - cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data); - qic_addr = qabc_data[5] << 8; - qic_addr = (qic_addr | qabc_data[6]) << 8; - qic_addr = (qic_addr | qabc_data[7]) << 8; - printk - ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n", - cat_module_name(i), qic_addr, qabc_data[8]); -#if 0 /* plumbing fails---FIXME */ - if ((qabc_data[8] & 0xf0) == 0) { - /* FIXME: 32 way 8 CPU slot monster cannot be - * plumbed this way---need to check for it */ - - printk("Plumbing second Extended Quad Processor\n"); - /* second VIC line hardwired to Quad CPU 1 */ - qabc_data[8] |= 0x20; - cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]); -#ifdef VOYAGER_CAT_DEBUG - /* verify plumbing */ - cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]); - if ((qabc_data[8] & 0xf0) == 0) { - CDEBUG(("PLUMBING FAILED: 0x%x\n", - qabc_data[8])); - } -#endif - } -#endif - - { - struct resource *res = - kzalloc(sizeof(struct resource), GFP_KERNEL); - res->name = kmalloc(128, GFP_KERNEL); - sprintf((char *)res->name, "Voyager %s Quad CPI", - cat_module_name(i)); - res->start = qic_addr; - res->end = qic_addr + 0x3ff; - request_resource(&iomem_resource, res); - } - - qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400); - - for (j = 0; j < 4; j++) { - __u8 cpu; - - if (voyager_8slot) { - /* 8 slot has a different mapping, - * each slot has only one vic line, so - * 1 cpu in each slot must be < 8 */ - cpu = (i & 0x07) + j * 8; - } else { - cpu = (i & 0x03) + j * 4; - } - if ((qabc_data[8] & (1 << j))) { - voyager_extended_vic_processors |= (1 << cpu); - } - if (qabc_data[8] & (1 << (j + 4))) { - /* Second SET register plumbed: Quad - * card has two VIC connected CPUs. - * Secondary cannot be booted as a VIC - * CPU */ - voyager_extended_vic_processors |= (1 << cpu); - voyager_allowed_boot_processors &= - (~(1 << cpu)); - } - - voyager_quad_processors |= (1 << cpu); - voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *) - (qic_addr + (j << 8)); - CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu, - (unsigned long)voyager_quad_cpi_addr[cpu])); - } - outb(VOYAGER_CAT_END, CAT_CMD); - - *asicpp = NULL; - modpp = &((*modpp)->next); - } - *modpp = NULL; - printk - ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", - voyager_extended_vic_processors, voyager_quad_processors, - voyager_allowed_boot_processors); - request_resource(&ioport_resource, &vic_res); - if (voyager_quad_processors) - request_resource(&ioport_resource, &qic_res); - /* set up the front power switch */ -} - -int voyager_cat_readb(__u8 module, __u8 asic, int reg) -{ - return 0; -} - -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (!modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_disconnect: ASIC is not CAT\n")); - return 1; - } - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_disconnect: failed to read SCANPATH\n")); - return err; - } - val &= VOYAGER_DISCONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_disconnect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 0; - - return 0; -} - -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_connect: ASIC is not CAT\n")); - return 1; - } - - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_connect: failed to read SCANPATH\n")); - return err; - } - val |= VOYAGER_CONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_connect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 1; - - return 0; -} - -void voyager_cat_power_off(void) -{ - /* Power the machine off by writing to the PSI over the CAT - * bus */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - CDEBUG(("PSI STATUS 0x%x\n", data)); - /* These two writes are power off prep and perform */ - data = PSI_CLEAR; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - data = PSI_POWER_DOWN; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); -} - -struct voyager_status voyager_status = { 0 }; - -void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data) -{ - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - switch (cmd) { - case VOYAGER_PSI_READ: - cat_read(&psi, &psi_asic, reg, data); - break; - case VOYAGER_PSI_WRITE: - cat_write(&psi, &psi_asic, reg, *data); - break; - case VOYAGER_PSI_SUBREAD: - cat_subread(&psi, &psi_asic, reg, 1, data); - break; - case VOYAGER_PSI_SUBWRITE: - cat_subwrite(&psi, &psi_asic, reg, 1, data); - break; - default: - printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd); - break; - } - outb(VOYAGER_CAT_END, CAT_CMD); -} - -void voyager_cat_do_common_interrupt(void) -{ - /* This is caused either by a memory parity error or something - * in the PSI */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - struct voyager_psi psi_reg; - int i; - re_read: - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status. NOTE: Need to read *all* the PSI regs here - * otherwise the cmn int will be reasserted */ - for (i = 0; i < sizeof(psi_reg.regs); i++) { - cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); - if ((psi_reg.regs.checkbit & 0x02) == 0) { - psi_reg.regs.checkbit |= 0x02; - cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit); - printk("VOYAGER RE-READ PSI\n"); - goto re_read; - } - outb(VOYAGER_CAT_RUN, CAT_CMD); - for (i = 0; i < sizeof(psi_reg.subregs); i++) { - /* This looks strange, but the PSI doesn't do auto increment - * correctly */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, - 1, &((__u8 *) & psi_reg.subregs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); -#ifdef VOYAGER_CAT_DEBUG - printk("VOYAGER PSI: "); - for (i = 0; i < sizeof(psi_reg.regs); i++) - printk("%02x ", ((__u8 *) & psi_reg.regs)[i]); - printk("\n "); - for (i = 0; i < sizeof(psi_reg.subregs); i++) - printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]); - printk("\n"); -#endif - if (psi_reg.regs.intstatus & PSI_MON) { - /* switch off or power fail */ - - if (psi_reg.subregs.supply & PSI_SWITCH_OFF) { - if (voyager_status.switch_off) { - printk(KERN_ERR - "Voyager front panel switch turned off again---Immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else { - printk(KERN_ERR - "Voyager front panel switch turned off\n"); - voyager_status.switch_off = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - /* Tell the hardware we're taking care of the - * shutdown, otherwise it will power the box off - * within 3 seconds of the switch being pressed and, - * which is much more important to us, continue to - * assert the common interrupt */ - data = PSI_CLR_SWITCH_OFF; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - } else { - - VDEBUG(("Voyager ac fail reg 0x%x\n", - psi_reg.subregs.ACfail)); - if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) { - /* No further update */ - return; - } -#if 0 - /* Don't bother trying to find out who failed. - * FIXME: This probably makes the code incorrect on - * anything other than a 345x */ - for (i = 0; i < 5; i++) { - if (psi_reg.subregs.ACfail & (1 << i)) { - break; - } - } - printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i); -#endif - /* DON'T do this: it shuts down the AC PSI - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_MASK_MASK | i; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - */ - printk(KERN_ERR "Voyager AC power failure\n"); - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_COLD_START; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - voyager_status.power_fail = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - - } else if (psi_reg.regs.intstatus & PSI_FAULT) { - /* Major fault! */ - printk(KERN_ERR - "Voyager PSI Detected major fault, immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM - | PSI_CURRENT | PSI_DVM - | PSI_PSCFAULT | PSI_STAT_CHG)) { - /* other psi fault */ - - printk(KERN_WARNING "Voyager PSI status 0x%x\n", data); - /* clear the PSI fault */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0); - outb(VOYAGER_CAT_END, CAT_CMD); - } -} diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c deleted file mode 100644 index 98e3c2bc756..00000000000 --- a/arch/x86/mach-voyager/voyager_smp.c +++ /dev/null @@ -1,1805 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file provides all the same external entries as smp.c but uses - * the voyager hal to provide the functionality - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* TLB state -- visible externally, indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 }; - -/* CPU IRQ affinity -- set to all ones initially */ -static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = - {[0 ... NR_CPUS-1] = ~0UL }; - -/* per CPU data structure (for /proc/cpuinfo et al), visible externally - * indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - -/* physical ID of the CPU used to boot the system */ -unsigned char boot_cpu_id; - -/* The memory line addresses for the Quad CPIs */ -struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned; - -/* The masks for the Extended VIC processors, filled in by cat_init */ -__u32 voyager_extended_vic_processors = 0; - -/* Masks for the extended Quad processors which cannot be VIC booted */ -__u32 voyager_allowed_boot_processors = 0; - -/* The mask for the Quad Processors (both extended and non-extended) */ -__u32 voyager_quad_processors = 0; - -/* Total count of live CPUs, used in process.c to display - * the CPU information and in irq.c for the per CPU irq - * activity count. Finally exported by i386_ksyms.c */ -static int voyager_extended_cpus = 1; - -/* Used for the invalidate map that's also checked in the spinlock */ -static volatile unsigned long smp_invalidate_needed; - -/* Bitmask of CPUs present in the system - exported by i386_syms.c, used - * by scheduler but indexed physically */ -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; - -/* The internal functions */ -static void send_CPI(__u32 cpuset, __u8 cpi); -static void ack_CPI(__u8 cpi); -static int ack_QIC_CPI(__u8 cpi); -static void ack_special_QIC_CPI(__u8 cpi); -static void ack_VIC_CPI(__u8 cpi); -static void send_CPI_allbutself(__u8 cpi); -static void mask_vic_irq(unsigned int irq); -static void unmask_vic_irq(unsigned int irq); -static unsigned int startup_vic_irq(unsigned int irq); -static void enable_local_vic_irq(unsigned int irq); -static void disable_local_vic_irq(unsigned int irq); -static void before_handle_vic_irq(unsigned int irq); -static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask); -static void ack_vic_irq(unsigned int irq); -static void vic_enable_cpi(void); -static void do_boot_cpu(__u8 cpuid); -static void do_quad_bootstrap(void); -static void initialize_secondary(void); - -int hard_smp_processor_id(void); -int safe_smp_processor_id(void); - -/* Inline functions */ -static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi) -{ - voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi = - (smp_processor_id() << 16) + cpi; -} - -static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) { -#ifdef VOYAGER_DEBUG - if (!cpu_online(cpu)) - VDEBUG(("CPU%d sending cpi %d to CPU%d not in " - "cpu_online_map\n", - hard_smp_processor_id(), cpi, cpu)); -#endif - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - } - } -} - -static inline void wrapper_smp_local_timer_interrupt(void) -{ - irq_enter(); - smp_local_timer_interrupt(); - irq_exit(); -} - -static inline void send_one_CPI(__u8 cpu, __u8 cpi) -{ - if (voyager_quad_processors & (1 << cpu)) - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - else - send_CPI(1 << cpu, cpi); -} - -static inline void send_CPI_allbutself(__u8 cpi) -{ - __u8 cpu = smp_processor_id(); - __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu); - send_CPI(mask, cpi); -} - -static inline int is_cpu_quad(void) -{ - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER); -} - -static inline int is_cpu_extended(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors & (1 << cpu)); -} - -static inline int is_cpu_vic_boot(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors - & voyager_allowed_boot_processors & (1 << cpu)); -} - -static inline void ack_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CPU_BOOT_CPI: - if (is_cpu_quad() && !is_cpu_vic_boot()) - ack_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - case VIC_SYS_INT: - case VIC_CMN_INT: - /* These are slightly strange. Even on the Quad card, - * They are vectored as VIC CPIs */ - if (is_cpu_quad()) - ack_special_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - default: - printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi); - break; - } -} - -/* local variables */ - -/* The VIC IRQ descriptors -- these look almost identical to the - * 8259 IRQs except that masks and things must be kept per processor - */ -static struct irq_chip vic_chip = { - .name = "VIC", - .startup = startup_vic_irq, - .mask = mask_vic_irq, - .unmask = unmask_vic_irq, - .set_affinity = set_vic_irq_affinity, -}; - -/* used to count up as CPUs are brought on line (starts at 0) */ -static int cpucount = 0; - -/* The per cpu profile stuff - used in smp_local_timer_interrupt */ -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - -/* the map used to check if a CPU has booted */ -static __u32 cpu_booted_map; - -/* the synchronize flag used to hold all secondary CPUs spinning in - * a tight loop until the boot sequence is ready for them */ -static cpumask_t smp_commenced_mask = CPU_MASK_NONE; - -/* This is for the new dynamic CPU boot code */ - -/* The per processor IRQ masks (these are usually kept in sync) */ -static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; - -/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */ -static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Lock for enable/disable of VIC interrupts */ -static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock); - -/* The boot processor is correctly set up in PC mode when it - * comes up, but the secondaries need their master/slave 8259 - * pairs initializing correctly */ - -/* Interrupt counters (per cpu) and total - used to try to - * even up the interrupt handling routines */ -static long vic_intr_total = 0; -static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 }; -static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Since we can only use CPI0, we fake all the other CPIs */ -static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned; - -/* debugging routine to read the isr of the cpu's pic */ -static inline __u16 vic_read_isr(void) -{ - __u16 isr; - - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - - return isr; -} - -static __init void qic_setup(void) -{ - if (!is_cpu_quad()) { - /* not a quad, no setup */ - return; - } - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - - if (is_cpu_extended()) { - /* the QIC duplicate of the VIC base register */ - outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER); - outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER); - - /* FIXME: should set up the QIC timer and memory parity - * error vectors here */ - } -} - -static __init void vic_setup_pic(void) -{ - outb(1, VIC_REDIRECT_REGISTER_1); - /* clear the claim registers for dynamic routing */ - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - outb(0, VIC_PRIORITY_REGISTER); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - /* Now initiallise the master PIC belonging to this CPU by - * sending the four ICWs */ - - /* ICW1: level triggered, ICW4 needed */ - outb(0x19, 0x20); - - /* ICW2: vector base */ - outb(FIRST_EXTERNAL_VECTOR, 0x21); - - /* ICW3: slave at line 2 */ - outb(0x04, 0x21); - - /* ICW4: 8086 mode */ - outb(0x01, 0x21); - - /* now the same for the slave PIC */ - - /* ICW1: level trigger, ICW4 needed */ - outb(0x19, 0xA0); - - /* ICW2: slave vector base */ - outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1); - - /* ICW3: slave ID */ - outb(0x02, 0xA1); - - /* ICW4: 8086 mode */ - outb(0x01, 0xA1); -} - -static void do_quad_bootstrap(void) -{ - if (is_cpu_quad() && is_cpu_vic_boot()) { - int i; - unsigned long flags; - __u8 cpuid = hard_smp_processor_id(); - - local_irq_save(flags); - - for (i = 0; i < 4; i++) { - /* FIXME: this would be >>3 &0x7 on the 32 way */ - if (((cpuid >> 2) & 0x03) == i) - /* don't lower our own mask! */ - continue; - - /* masquerade as local Quad CPU */ - outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID); - /* enable the startup CPI */ - outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1); - /* restore cpu id */ - outb(0, QIC_PROCESSOR_ID); - } - local_irq_restore(flags); - } -} - -void prefill_possible_map(void) -{ - /* This is empty on voyager because we need a much - * earlier detection which is done in find_smp_config */ -} - -/* Set up all the basic stuff: read the SMP config and make all the - * SMP information reflect only the boot cpu. All others will be - * brought on-line later. */ -void __init find_smp_config(void) -{ - int i; - - boot_cpu_id = hard_smp_processor_id(); - - printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); - - /* initialize the CPU structures (moved from smp_boot_cpus) */ - for (i = 0; i < nr_cpu_ids; i++) - cpu_irq_affinity[i] = ~0; - cpu_online_map = cpumask_of_cpu(boot_cpu_id); - - /* The boot CPU must be extended */ - voyager_extended_vic_processors = 1 << boot_cpu_id; - /* initially, all of the first 8 CPUs can boot */ - voyager_allowed_boot_processors = 0xff; - /* set up everything for just this CPU, we can alter - * this as we start the other CPUs later */ - /* now get the CPU disposition from the extended CMOS */ - cpus_addr(phys_cpu_present_map)[0] = - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK); - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 2) << 16; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 3) << 24; - init_cpu_possible(&phys_cpu_present_map); - printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", - cpus_addr(phys_cpu_present_map)[0]); - /* Here we set up the VIC to enable SMP */ - /* enable the CPIs by writing the base vector to their register */ - outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER); - outb(1, VIC_REDIRECT_REGISTER_1); - /* set the claim registers for static routing --- Boot CPU gets - * all interrupts untill all other CPUs started */ - outb(0xff, VIC_CLAIM_REGISTER_0); - outb(0xff, VIC_CLAIM_REGISTER_1); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - - /* Finally tell the firmware that we're driving */ - outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG, - VOYAGER_SUS_IN_CONTROL_PORT); - - current_thread_info()->cpu = boot_cpu_id; - percpu_write(cpu_number, boot_cpu_id); -} - -/* - * The bootstrap kernel entry code has set these up. Save them - * for a given CPU, id is physical */ -void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - - identify_secondary_cpu(c); -} - -/* Routine initially called when a non-boot CPU is brought online */ -static void __init start_secondary(void *unused) -{ - __u8 cpuid = hard_smp_processor_id(); - - cpu_init(); - - /* OK, we're in the routine */ - ack_CPI(VIC_CPU_BOOT_CPI); - - /* setup the 8259 master slave pair belonging to this CPU --- - * we won't actually receive any until the boot CPU - * relinquishes it's static routing mask */ - vic_setup_pic(); - - qic_setup(); - - if (is_cpu_quad() && !is_cpu_vic_boot()) { - /* clear the boot CPI */ - __u8 dummy; - - dummy = - voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi; - printk("read dummy %d\n", dummy); - } - - /* lower the mask to receive CPIs */ - vic_enable_cpi(); - - VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); - - notify_cpu_starting(cpuid); - - /* enable interrupts */ - local_irq_enable(); - - /* get our bogomips */ - calibrate_delay(); - - /* save our processor parameters */ - smp_store_cpu_info(cpuid); - - /* if we're a quad, we may need to bootstrap other CPUs */ - do_quad_bootstrap(); - - /* FIXME: this is rather a poor hack to prevent the CPU - * activating softirqs while it's supposed to be waiting for - * permission to proceed. Without this, the new per CPU stuff - * in the softirqs will fail */ - local_irq_disable(); - cpu_set(cpuid, cpu_callin_map); - - /* signal that we're done */ - cpu_booted_map = 1; - - while (!cpu_isset(cpuid, smp_commenced_mask)) - rep_nop(); - local_irq_enable(); - - local_flush_tlb(); - - cpu_set(cpuid, cpu_online_map); - wmb(); - cpu_idle(); -} - -/* Routine to kick start the given CPU and wait for it to report ready - * (or timeout in startup). When this routine returns, the requested - * CPU is either fully running and configured or known to be dead. - * - * We call this routine sequentially 1 CPU at a time, so no need for - * locking */ - -static void __init do_boot_cpu(__u8 cpu) -{ - struct task_struct *idle; - int timeout; - unsigned long flags; - int quad_boot = (1 << cpu) & voyager_quad_processors - & ~(voyager_extended_vic_processors - & voyager_allowed_boot_processors); - - /* This is the format of the CPI IDT gate (in real mode) which - * we're hijacking to boot the CPU */ - union IDTFormat { - struct seg { - __u16 Offset; - __u16 Segment; - } idt; - __u32 val; - } hijack_source; - - __u32 *hijack_vector; - __u32 start_phys_address = setup_trampoline(); - - /* There's a clever trick to this: The linux trampoline is - * compiled to begin at absolute location zero, so make the - * address zero but have the data segment selector compensate - * for the actual address */ - hijack_source.idt.Offset = start_phys_address & 0x000F; - hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF; - - cpucount++; - alternatives_smp_switch(1); - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU%d", cpu); - idle->thread.ip = (unsigned long)start_secondary; - /* init_tasks (in sched.c) is indexed logically */ - stack_start.sp = (void *)idle->thread.sp; - - per_cpu(current_task, cpu) = idle; - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - irq_ctx_init(cpu); - - /* Note: Don't modify initial ss override */ - VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, - (unsigned long)hijack_source.val, hijack_source.idt.Segment, - hijack_source.idt.Offset, stack_start.sp)); - - /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); - flush_tlb_all(); - - if (quad_boot) { - printk("CPU %d: non extended Quad boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } else { - printk("CPU%d: extended VIC boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - /* VIC errata, may also receive interrupt at this address */ - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + - VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } - /* All non-boot CPUs start with interrupts fully masked. Need - * to lower the mask of the CPI we're about to send. We do - * this in the VIC by masquerading as the processor we're - * about to boot and lowering its interrupt mask */ - local_irq_save(flags); - if (quad_boot) { - send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI); - } else { - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - /* here we're altering registers belonging to `cpu' */ - - outb(VIC_BOOT_INTERRUPT_MASK, 0x21); - /* now go back to our original identity */ - outb(boot_cpu_id, VIC_PROCESSOR_ID); - - /* and boot the CPU */ - - send_CPI((1 << cpu), VIC_CPU_BOOT_CPI); - } - cpu_booted_map = 0; - local_irq_restore(flags); - - /* now wait for it to become ready (or timeout) */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_booted_map) - break; - udelay(100); - } - /* reset the page table */ - zap_low_mappings(); - - if (cpu_booted_map) { - VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n", - cpu, smp_processor_id())); - - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - wmb(); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_present_map); - } else { - printk("CPU%d FAILED TO BOOT: ", cpu); - if (* - ((volatile unsigned char *)phys_to_virt(start_phys_address)) - == 0xA5) - printk("Stuck.\n"); - else - printk("Not responding.\n"); - - cpucount--; - } -} - -void __init smp_boot_cpus(void) -{ - int i; - - /* CAT BUS initialisation must be done after the memory */ - /* FIXME: The L4 has a catbus too, it just needs to be - * accessed in a totally different way */ - if (voyager_level == 5) { - voyager_cat_init(); - - /* now that the cat has probed the Voyager System Bus, sanity - * check the cpu map */ - if (((voyager_quad_processors | voyager_extended_vic_processors) - & cpus_addr(phys_cpu_present_map)[0]) != - cpus_addr(phys_cpu_present_map)[0]) { - /* should panic */ - printk("\n\n***WARNING*** " - "Sanity check of CPU present map FAILED\n"); - } - } else if (voyager_level == 4) - voyager_extended_vic_processors = - cpus_addr(phys_cpu_present_map)[0]; - - /* this sets up the idle task to run on the current cpu */ - voyager_extended_cpus = 1; - /* Remove the global_irq_holder setting, it triggers a BUG() on - * schedule at the moment */ - //global_irq_holder = boot_cpu_id; - - /* FIXME: Need to do something about this but currently only works - * on CPUs with a tsc which none of mine have. - smp_tune_scheduling(); - */ - smp_store_cpu_info(boot_cpu_id); - /* setup the jump vector */ - initial_code = (unsigned long)initialize_secondary; - printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data(boot_cpu_id)); - - if (is_cpu_quad()) { - /* booting on a Quad CPU */ - printk("VOYAGER SMP: Boot CPU is Quad\n"); - qic_setup(); - do_quad_bootstrap(); - } - - /* enable our own CPIs */ - vic_enable_cpi(); - - cpu_set(boot_cpu_id, cpu_online_map); - cpu_set(boot_cpu_id, cpu_callout_map); - - /* loop over all the extended VIC CPUs and boot them. The - * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { - if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) - continue; - do_boot_cpu(i); - /* This udelay seems to be needed for the Quad boots - * don't remove unless you know what you're doing */ - udelay(1000); - } - /* we could compute the total bogomips here, but why bother?, - * Code added from smpboot.c */ - { - unsigned long bogosum = 0; - - for_each_online_cpu(i) - bogosum += cpu_data(i).loops_per_jiffy; - printk(KERN_INFO "Total of %d processors activated " - "(%lu.%02lu BogoMIPS).\n", - cpucount + 1, bogosum / (500000 / HZ), - (bogosum / (5000 / HZ)) % 100); - } - voyager_extended_cpus = hweight32(voyager_extended_vic_processors); - printk("VOYAGER: Extended (interrupt handling CPUs): " - "%d, non-extended: %d\n", voyager_extended_cpus, - num_booting_cpus() - voyager_extended_cpus); - /* that's it, switch to symmetric mode */ - outb(0, VIC_PRIORITY_REGISTER); - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus())); -} - -/* Reload the secondary CPUs task structure (this function does not - * return ) */ -static void __init initialize_secondary(void) -{ -#if 0 - // AC kernels only - set_current(hard_get_current()); -#endif - - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile ("movl %0,%%esp\n\t" - "jmp *%1"::"r" (current->thread.sp), - "r"(current->thread.ip)); -} - -/* handle a Voyager SYS_INT -- If we don't, the base board will - * panic the system. - * - * System interrupts occur because some problem was detected on the - * various busses. To find out what you have to probe all the - * hardware via the CAT bus. FIXME: At the moment we do nothing. */ -void smp_vic_sys_interrupt(struct pt_regs *regs) -{ - ack_CPI(VIC_SYS_INT); - printk("Voyager SYSTEM INTERRUPT\n"); -} - -/* Handle a voyager CMN_INT; These interrupts occur either because of - * a system status change or because a single bit memory error - * occurred. FIXME: At the moment, ignore all this. */ -void smp_vic_cmn_interrupt(struct pt_regs *regs) -{ - static __u8 in_cmn_int = 0; - static DEFINE_SPINLOCK(cmn_int_lock); - - /* common ints are broadcast, so make sure we only do this once */ - _raw_spin_lock(&cmn_int_lock); - if (in_cmn_int) - goto unlock_end; - - in_cmn_int++; - _raw_spin_unlock(&cmn_int_lock); - - VDEBUG(("Voyager COMMON INTERRUPT\n")); - - if (voyager_level == 5) - voyager_cat_do_common_interrupt(); - - _raw_spin_lock(&cmn_int_lock); - in_cmn_int = 0; - unlock_end: - _raw_spin_unlock(&cmn_int_lock); - ack_CPI(VIC_CMN_INT); -} - -/* - * Reschedule call back. Nothing to do, all the work is done - * automatically when we return from the interrupt. */ -static void smp_reschedule_interrupt(void) -{ - /* do nothing */ -} - -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -static inline void voyager_leave_mm(unsigned long cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} - -/* - * Invalidate call-back - */ -static void smp_invalidate_interrupt(void) -{ - __u8 cpu = smp_processor_id(); - - if (!test_bit(cpu, &smp_invalidate_needed)) - return; - /* This will flood messages. Don't uncomment unless you see - * Problems with cross cpu invalidation - VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n", - smp_processor_id())); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - voyager_leave_mm(cpu); - } - smp_mb__before_clear_bit(); - clear_bit(cpu, &smp_invalidate_needed); - smp_mb__after_clear_bit(); -} - -/* All the new flush operations for 2.4 */ - -/* This routine is called with a physical cpu mask */ -static void -voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm, - unsigned long va) -{ - int stuck = 50000; - - if (!cpumask) - BUG(); - if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask) - BUG(); - if (cpumask & (1 << smp_processor_id())) - BUG(); - if (!mm) - BUG(); - - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - atomic_set_mask(cpumask, &smp_invalidate_needed); - /* - * We have to send the CPI only to - * CPUs affected. - */ - send_CPI(cpumask, VIC_INVALIDATE_CPI); - - while (smp_invalidate_needed) { - mb(); - if (--stuck == 0) { - printk("***WARNING*** Stuck doing invalidate CPI " - "(CPU%d)\n", smp_processor_id()); - break; - } - } - - /* Uncomment only to debug invalidation problems - VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu)); - */ - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - local_flush_tlb(); - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - voyager_leave_mm(smp_processor_id()); - } - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - voyager_leave_mm(smp_processor_id()); - } - - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} - -EXPORT_SYMBOL(flush_tlb_page); - -/* enable the requested IRQs */ -static void smp_enable_irq_interrupt(void) -{ - __u8 irq; - __u8 cpu = get_cpu(); - - VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu, - vic_irq_enable_mask[cpu])); - - spin_lock(&vic_irq_lock); - for (irq = 0; irq < 16; irq++) { - if (vic_irq_enable_mask[cpu] & (1 << irq)) - enable_local_vic_irq(irq); - } - vic_irq_enable_mask[cpu] = 0; - spin_unlock(&vic_irq_lock); - - put_cpu_no_resched(); -} - -/* - * CPU halt call-back - */ -static void smp_stop_cpu_function(void *dummy) -{ - VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id())); - cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); - for (;;) - halt(); -} - -/* execute a thread on a new CPU. The function to be called must be - * previously set up. This is used to schedule a function for - * execution on all CPUs - set up the function then broadcast a - * function_interrupt CPI to come here on each CPU */ -static void smp_call_function_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -static void smp_call_function_single_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_single_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -/* Sorry about the name. In an APIC based system, the APICs - * themselves are programmed to send a timer interrupt. This is used - * by linux to reschedule the processor. Voyager doesn't have this, - * so we use the system clock to interrupt one processor, which in - * turn, broadcasts a timer CPI to all the others --- we receive that - * CPI here. We don't use this actually for counting so losing - * ticks doesn't matter - * - * FIXME: For those CPUs which actually have a local APIC, we could - * try to use it to trigger this interrupt instead of having to - * broadcast the timer tick. Unfortunately, all my pentium DYADs have - * no local APIC, so I can't do this - * - * This function is currently a placeholder and is unused in the code */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -/* All of the QUAD interrupt GATES */ -void smp_qic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - ack_QIC_CPI(QIC_TIMER_CPI); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -void smp_qic_invalidate_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_INVALIDATE_CPI); - smp_invalidate_interrupt(); -} - -void smp_qic_reschedule_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_RESCHEDULE_CPI); - smp_reschedule_interrupt(); -} - -void smp_qic_enable_irq_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_ENABLE_IRQ_CPI); - smp_enable_irq_interrupt(); -} - -void smp_qic_call_function_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_CPI); - smp_call_function_interrupt(); -} - -void smp_qic_call_function_single_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI); - smp_call_function_single_interrupt(); -} - -void smp_vic_cpi_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - __u8 cpu = smp_processor_id(); - - if (is_cpu_quad()) - ack_QIC_CPI(VIC_CPI_LEVEL0); - else - ack_VIC_CPI(VIC_CPI_LEVEL0); - - if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu])) - wrapper_smp_local_timer_interrupt(); - if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu])) - smp_invalidate_interrupt(); - if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu])) - smp_reschedule_interrupt(); - if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu])) - smp_enable_irq_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_single_interrupt(); - set_irq_regs(old_regs); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) - voyager_leave_mm(cpu); -} - -/* flush the TLB of every active CPU in the system */ -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, 0, 1); -} - -/* send a reschedule CPI to one CPU by physical CPU number*/ -static void voyager_smp_send_reschedule(int cpu) -{ - send_one_CPI(cpu, VIC_RESCHEDULE_CPI); -} - -int hard_smp_processor_id(void) -{ - __u8 i; - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER) - return cpumask & 0x1F; - - for (i = 0; i < 8; i++) { - if (cpumask & (1 << i)) - return i; - } - printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask); - return 0; -} - -int safe_smp_processor_id(void) -{ - return hard_smp_processor_id(); -} - -/* broadcast a halt to all other CPUs */ -static void voyager_smp_send_stop(void) -{ - smp_call_function(smp_stop_cpu_function, NULL, 1); -} - -/* this function is triggered in time.c when a clock tick fires - * we need to re-broadcast the tick to all CPUs */ -void smp_vic_timer_interrupt(void) -{ - send_CPI_allbutself(VIC_TIMER_CPI); - smp_local_timer_interrupt(); -} - -/* local (per CPU) timer interrupt. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ -void smp_local_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - long weight; - - profile_tick(CPU_PROFILING); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - /* FIXME: need to update the vic timer tick here */ - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - - update_process_times(user_mode_vm(get_irq_regs())); - } - - if (((1 << cpu) & voyager_extended_vic_processors) == 0) - /* only extended VIC processors participate in - * interrupt distribution */ - return; - - /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ - - if ((++vic_tick[cpu] & 0x7) != 0) - return; - /* get here every 16 ticks (about every 1/6 of a second) */ - - /* Change our priority to give someone else a chance at getting - * the IRQ. The algorithm goes like this: - * - * In the VIC, the dynamically routed interrupt is always - * handled by the lowest priority eligible (i.e. receiving - * interrupts) CPU. If >1 eligible CPUs are equal lowest, the - * lowest processor number gets it. - * - * The priority of a CPU is controlled by a special per-CPU - * VIC priority register which is 3 bits wide 0 being lowest - * and 7 highest priority.. - * - * Therefore we subtract the average number of interrupts from - * the number we've fielded. If this number is negative, we - * lower the activity count and if it is positive, we raise - * it. - * - * I'm afraid this still leads to odd looking interrupt counts: - * the totals are all roughly equal, but the individual ones - * look rather skewed. - * - * FIXME: This algorithm is total crap when mixed with SMP - * affinity code since we now try to even up the interrupt - * counts when an affinity binding is keeping them on a - * particular CPU*/ - weight = (vic_intr_count[cpu] * voyager_extended_cpus - - vic_intr_total) >> 4; - weight += 4; - if (weight > 7) - weight = 7; - if (weight < 0) - weight = 0; - - outb((__u8) weight, VIC_PRIORITY_REGISTER); - -#ifdef VOYAGER_DEBUG - if ((vic_tick[cpu] & 0xFFF) == 0) { - /* print this message roughly every 25 secs */ - printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n", - cpu, vic_tick[cpu], weight); - } -#endif -} - -/* setup the profiling timer */ -int setup_profiling_timer(unsigned int multiplier) -{ - int i; - - if ((!multiplier)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. - */ - for (i = 0; i < nr_cpu_ids; ++i) - per_cpu(prof_multiplier, i) = multiplier; - - return 0; -} - -/* This is a bit of a mess, but forced on us by the genirq changes - * there's no genirq handler that really does what voyager wants - * so hack it up with the simple IRQ handler */ -static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) -{ - before_handle_vic_irq(irq); - handle_simple_irq(irq, desc); - after_handle_vic_irq(irq); -} - -/* The CPIs are handled in the per cpu 8259s, so they must be - * enabled to be received: FIX: enabling the CPIs in the early - * boot sequence interferes with bug checking; enable them later - * on in smp_init */ -#define VIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector)) -#define QIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) - -void __init voyager_smp_intr_init(void) -{ - int i; - - /* initialize the per cpu irq mask to all disabled */ - for (i = 0; i < nr_cpu_ids; i++) - vic_irq_mask[i] = 0xFFFF; - - VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); - - VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt); - VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt); - - QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt); - QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt); - QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt); - QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt); - QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt); - - /* now put the VIC descriptor into the first 48 IRQs - * - * This is for later: first 16 correspond to PC IRQs; next 16 - * are Primary MC IRQs and final 16 are Secondary MC IRQs */ - for (i = 0; i < 48; i++) - set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq); -} - -/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per - * processor to receive CPI */ -static void send_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - __u32 quad_cpuset = (cpuset & voyager_quad_processors); - - if (cpi < VIC_START_FAKE_CPI) { - /* fake CPI are only used for booting, so send to the - * extended quads as well---Quads must be VIC booted */ - outb((__u8) (cpuset), VIC_CPI_Registers[cpi]); - return; - } - if (quad_cpuset) - send_QIC_CPI(quad_cpuset, cpi); - cpuset &= ~quad_cpuset; - cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */ - if (cpuset == 0) - return; - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) - set_bit(cpi, &vic_cpi_mailbox[cpu]); - } - if (cpuset) - outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]); -} - -/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and - * set the cache line to shared by reading it. - * - * DON'T make this inline otherwise the cache line read will be - * optimised away - * */ -static int ack_QIC_CPI(__u8 cpi) -{ - __u8 cpu = hard_smp_processor_id(); - - cpi &= 7; - - outb(1 << cpi, QIC_INTERRUPT_CLEAR1); - return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi; -} - -static void ack_special_QIC_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CMN_INT: - outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0); - break; - case VIC_SYS_INT: - outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0); - break; - } - /* also clear at the VIC, just in case (nop for non-extended proc) */ - ack_VIC_CPI(cpi); -} - -/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */ -static void ack_VIC_CPI(__u8 cpi) -{ -#ifdef VOYAGER_DEBUG - unsigned long flags; - __u16 isr; - __u8 cpu = smp_processor_id(); - - local_irq_save(flags); - isr = vic_read_isr(); - if ((isr & (1 << (cpi & 7))) == 0) { - printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi); - } -#endif - /* send specific EOI; the two system interrupts have - * bit 4 set for a separate vector but behave as the - * corresponding 3 bit intr */ - outb_p(0x60 | (cpi & 7), 0x20); - -#ifdef VOYAGER_DEBUG - if ((vic_read_isr() & (1 << (cpi & 7))) != 0) { - printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi); - } - local_irq_restore(flags); -#endif -} - -/* cribbed with thanks from irq.c */ -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu])) -#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu])) - -static unsigned int startup_vic_irq(unsigned int irq) -{ - unmask_vic_irq(irq); - - return 0; -} - -/* The enable and disable routines. This is where we run into - * conflicting architectural philosophy. Fundamentally, the voyager - * architecture does not expect to have to disable interrupts globally - * (the IRQ controllers belong to each CPU). The processor masquerade - * which is used to start the system shouldn't be used in a running OS - * since it will cause great confusion if two separate CPUs drive to - * the same IRQ controller (I know, I've tried it). - * - * The solution is a variant on the NCR lazy SPL design: - * - * 1) To disable an interrupt, do nothing (other than set the - * IRQ_DISABLED flag). This dares the interrupt actually to arrive. - * - * 2) If the interrupt dares to come in, raise the local mask against - * it (this will result in all the CPU masks being raised - * eventually). - * - * 3) To enable the interrupt, lower the mask on the local CPU and - * broadcast an Interrupt enable CPI which causes all other CPUs to - * adjust their masks accordingly. */ - -static void unmask_vic_irq(unsigned int irq) -{ - /* linux doesn't to processor-irq affinity, so enable on - * all CPUs we know about */ - int cpu = smp_processor_id(), real_cpu; - __u16 mask = (1 << irq); - __u32 processorList = 0; - unsigned long flags; - - VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n", - irq, cpu, cpu_irq_affinity[cpu])); - spin_lock_irqsave(&vic_irq_lock, flags); - for_each_online_cpu(real_cpu) { - if (!(voyager_extended_vic_processors & (1 << real_cpu))) - continue; - if (!(cpu_irq_affinity[real_cpu] & mask)) { - /* irq has no affinity for this CPU, ignore */ - continue; - } - if (real_cpu == cpu) { - enable_local_vic_irq(irq); - } else if (vic_irq_mask[real_cpu] & mask) { - vic_irq_enable_mask[real_cpu] |= mask; - processorList |= (1 << real_cpu); - } - } - spin_unlock_irqrestore(&vic_irq_lock, flags); - if (processorList) - send_CPI(processorList, VIC_ENABLE_IRQ_CPI); -} - -static void mask_vic_irq(unsigned int irq) -{ - /* lazy disable, do nothing */ -} - -static void enable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = ~(1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - vic_irq_mask[cpu] &= mask; - if (vic_irq_mask[cpu] == old_mask) - return; - - VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -static void disable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = (1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - if (irq == 7) - return; - - vic_irq_mask[cpu] |= mask; - if (old_mask == vic_irq_mask[cpu]) - return; - - VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -/* The VIC is level triggered, so the ack can only be issued after the - * interrupt completes. However, we do Voyager lazy interrupt - * handling here: It is an extremely expensive operation to mask an - * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If - * this interrupt actually comes in, then we mask and ack here to push - * the interrupt off to another CPU */ -static void before_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - __u8 cpu = smp_processor_id(); - - _raw_spin_lock(&vic_irq_lock); - vic_intr_total++; - vic_intr_count[cpu]++; - - if (!(cpu_irq_affinity[cpu] & (1 << irq))) { - /* The irq is not in our affinity mask, push it off - * onto another CPU */ - VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d " - "on cpu %d\n", irq, cpu)); - disable_local_vic_irq(irq); - /* set IRQ_INPROGRESS to prevent the handler in irq.c from - * actually calling the interrupt routine */ - desc->status |= IRQ_REPLAY | IRQ_INPROGRESS; - } else if (desc->status & IRQ_DISABLED) { - /* Damn, the interrupt actually arrived, do the lazy - * disable thing. The interrupt routine in irq.c will - * not handle a IRQ_DISABLED interrupt, so nothing more - * need be done here */ - VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n", - irq, cpu)); - disable_local_vic_irq(irq); - desc->status |= IRQ_REPLAY; - } else { - desc->status &= ~IRQ_REPLAY; - } - - _raw_spin_unlock(&vic_irq_lock); -} - -/* Finish the VIC interrupt: basically mask */ -static void after_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - - _raw_spin_lock(&vic_irq_lock); - { - unsigned int status = desc->status & ~IRQ_INPROGRESS; -#ifdef VOYAGER_DEBUG - __u16 isr; -#endif - - desc->status = status; - if ((status & IRQ_DISABLED)) - disable_local_vic_irq(irq); -#ifdef VOYAGER_DEBUG - /* DEBUG: before we ack, check what's in progress */ - isr = vic_read_isr(); - if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) { - int i; - __u8 cpu = smp_processor_id(); - __u8 real_cpu; - int mask; /* Um... initialize me??? --RR */ - - printk("VOYAGER SMP: CPU%d lost interrupt %d\n", - cpu, irq); - for_each_possible_cpu(real_cpu, mask) { - - outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu, - VIC_PROCESSOR_ID); - isr = vic_read_isr(); - if (isr & (1 << irq)) { - printk - ("VOYAGER SMP: CPU%d ack irq %d\n", - real_cpu, irq); - ack_vic_irq(irq); - } - outb(cpu, VIC_PROCESSOR_ID); - } - } -#endif /* VOYAGER_DEBUG */ - /* as soon as we ack, the interrupt is eligible for - * receipt by another CPU so everything must be in - * order here */ - ack_vic_irq(irq); - if (status & IRQ_REPLAY) { - /* replay is set if we disable the interrupt - * in the before_handle_vic_irq() routine, so - * clear the in progress bit here to allow the - * next CPU to handle this correctly */ - desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS); - } -#ifdef VOYAGER_DEBUG - isr = vic_read_isr(); - if ((isr & (1 << irq)) != 0) - printk("VOYAGER SMP: after_handle_vic_irq() after " - "ack irq=%d, isr=0x%x\n", irq, isr); -#endif /* VOYAGER_DEBUG */ - } - _raw_spin_unlock(&vic_irq_lock); - - /* All code after this point is out of the main path - the IRQ - * may be intercepted by another CPU if reasserted */ -} - -/* Linux processor - interrupt affinity manipulations. - * - * For each processor, we maintain a 32 bit irq affinity mask. - * Initially it is set to all 1's so every processor accepts every - * interrupt. In this call, we change the processor's affinity mask: - * - * Change from enable to disable: - * - * If the interrupt ever comes in to the processor, we will disable it - * and ack it to push it off to another CPU, so just accept the mask here. - * - * Change from disable to enable: - * - * change the mask and then do an interrupt enable CPI to re-enable on - * the selected processors */ - -void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - /* Only extended processors handle interrupts */ - unsigned long real_mask; - unsigned long irq_mask = 1 << irq; - int cpu; - - real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors; - - if (cpus_addr(*mask)[0] == 0) - /* can't have no CPUs to accept the interrupt -- extremely - * bad things will happen */ - return; - - if (irq == 0) - /* can't change the affinity of the timer IRQ. This - * is due to the constraint in the voyager - * architecture that the CPI also comes in on and IRQ - * line and we have chosen IRQ0 for this. If you - * raise the mask on this interrupt, the processor - * will no-longer be able to accept VIC CPIs */ - return; - - if (irq >= 32) - /* You can only have 32 interrupts in a voyager system - * (and 32 only if you have a secondary microchannel - * bus) */ - return; - - for_each_online_cpu(cpu) { - unsigned long cpu_mask = 1 << cpu; - - if (cpu_mask & real_mask) { - /* enable the interrupt for this cpu */ - cpu_irq_affinity[cpu] |= irq_mask; - } else { - /* disable the interrupt for this cpu */ - cpu_irq_affinity[cpu] &= ~irq_mask; - } - } - /* this is magic, we now have the correct affinity maps, so - * enable the interrupt. This will send an enable CPI to - * those CPUs who need to enable it in their local masks, - * causing them to correct for the new affinity . If the - * interrupt is currently globally disabled, it will simply be - * disabled again as it comes in (voyager lazy disable). If - * the affinity map is tightened to disable the interrupt on a - * cpu, it will be pushed off when it comes in */ - unmask_vic_irq(irq); -} - -static void ack_vic_irq(unsigned int irq) -{ - if (irq & 8) { - outb(0x62, 0x20); /* Specific EOI to cascade */ - outb(0x60 | (irq & 7), 0xA0); - } else { - outb(0x60 | (irq & 7), 0x20); - } -} - -/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259 - * but are not vectored by it. This means that the 8259 mask must be - * lowered to receive them */ -static __init void vic_enable_cpi(void) -{ - __u8 cpu = smp_processor_id(); - - /* just take a copy of the current mask (nop for boot cpu) */ - vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id]; - - enable_local_vic_irq(VIC_CPI_LEVEL0); - enable_local_vic_irq(VIC_CPI_LEVEL1); - /* for sys int and cmn int */ - enable_local_vic_irq(7); - - if (is_cpu_quad()) { - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, QIC_CPI_ENABLE)); - } - - VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, vic_irq_mask[cpu])); -} - -void voyager_smp_dump() -{ - int old_cpu = smp_processor_id(), cpu; - - /* dump the interrupt masks of each processor */ - for_each_online_cpu(cpu) { - __u16 imr, isr, irr; - unsigned long flags; - - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - imr = (inb(0xa1) << 8) | inb(0x21); - outb(0x0a, 0xa0); - irr = inb(0xa0) << 8; - outb(0x0a, 0x20); - irr |= inb(0x20); - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n", - cpu, vic_irq_mask[cpu], imr, irr, isr); -#if 0 - /* These lines are put in to try to unstick an un ack'd irq */ - if (isr != 0) { - int irq; - for (irq = 0; irq < 16; irq++) { - if (isr & (1 << irq)) { - printk("\tCPU%d: ack irq %d\n", - cpu, irq); - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, - VIC_PROCESSOR_ID); - ack_vic_irq(irq); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - } - } - } -#endif - } -} - -void smp_voyager_power_off(void *dummy) -{ - if (smp_processor_id() == boot_cpu_id) - voyager_power_off(); - else - smp_stop_cpu_function(NULL); -} - -static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) -{ - /* FIXME: ignore max_cpus for now */ - smp_boot_cpus(); -} - -static void __cpuinit voyager_smp_prepare_boot_cpu(void) -{ - int cpu = smp_processor_id(); - switch_to_new_gdt(cpu); - - cpu_set(cpu, cpu_online_map); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); - -} - -static int __cpuinit voyager_cpu_up(unsigned int cpu) -{ - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) - return -ENOSYS; - - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) - return -EIO; - /* Unleash the CPU! */ - cpu_set(cpu, smp_commenced_mask); - while (!cpu_online(cpu)) - mb(); - return 0; -} - -static void __init voyager_smp_cpus_done(unsigned int max_cpus) -{ - zap_low_mappings(); -} - -void __init smp_setup_processor_id(void) -{ - current_thread_info()->cpu = hard_smp_processor_id(); -} - -static void voyager_send_call_func(const struct cpumask *callmask) -{ - __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id()); - send_CPI(mask, VIC_CALL_FUNCTION_CPI); -} - -static void voyager_send_call_func_single(int cpu) -{ - send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI); -} - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, - .smp_prepare_cpus = voyager_smp_prepare_cpus, - .cpu_up = voyager_cpu_up, - .smp_cpus_done = voyager_smp_cpus_done, - - .smp_send_stop = voyager_smp_send_stop, - .smp_send_reschedule = voyager_smp_send_reschedule, - - .send_call_func_ipi = voyager_send_call_func, - .send_call_func_single_ipi = voyager_send_call_func_single, -}; diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c deleted file mode 100644 index 15464a20fb3..00000000000 --- a/arch/x86/mach-voyager/voyager_thread.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This module provides the machine status monitor thread for the - * voyager architecture. This allows us to monitor the machine - * environment (temp, voltage, fan function) and the front panel and - * internal UPS. If a fault is detected, this thread takes corrective - * action (usually just informing init) - * */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct task_struct *voyager_thread; -static __u8 set_timeout; - -static int execute(const char *string) -{ - int ret; - - char *envp[] = { - "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL, - }; - char *argv[] = { - "/bin/bash", - "-c", - (char *)string, - NULL, - }; - - if ((ret = - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { - printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, - ret); - } - return ret; -} - -static void check_from_kernel(void) -{ - if (voyager_status.switch_off) { - - /* FIXME: This should be configurable via proc */ - execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); - } else if (voyager_status.power_fail) { - VDEBUG(("Voyager daemon detected AC power failure\n")); - - /* FIXME: This should be configureable via proc */ - execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1"); - set_timeout = 1; - } -} - -static void check_continuing_condition(void) -{ - if (voyager_status.power_fail) { - __u8 data; - voyager_cat_psi(VOYAGER_PSI_SUBREAD, - VOYAGER_PSI_AC_FAIL_REG, &data); - if ((data & 0x1f) == 0) { - /* all power restored */ - printk(KERN_NOTICE - "VOYAGER AC power restored, cancelling shutdown\n"); - /* FIXME: should be user configureable */ - execute - ("umask 600; echo O > /etc/powerstatus; kill -PWR 1"); - set_timeout = 0; - } - } -} - -static int thread(void *unused) -{ - printk(KERN_NOTICE "Voyager starting monitor thread\n"); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT); - - VDEBUG(("Voyager Daemon awoken\n")); - if (voyager_status.request_from_kernel == 0) { - /* probably awoken from timeout */ - check_continuing_condition(); - } else { - check_from_kernel(); - voyager_status.request_from_kernel = 0; - } - } -} - -static int __init voyager_thread_start(void) -{ - voyager_thread = kthread_run(thread, NULL, "kvoyagerd"); - if (IS_ERR(voyager_thread)) { - printk(KERN_ERR - "Voyager: Failed to create system monitor thread.\n"); - return PTR_ERR(voyager_thread); - } - return 0; -} - -static void __exit voyager_thread_stop(void) -{ - kthread_stop(voyager_thread); -} - -module_init(voyager_thread_start); -module_exit(voyager_thread_stop); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 87b9ab16642..b83e119fbeb 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK - depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) + depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_CMPXCHG && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index 76f2b36881c..a3d3cbab359 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST tristate "Linux hypervisor example code" - depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !X86_VOYAGER + depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX select HVC_DRIVER ---help--- This is a very simple module which allows you to run -- cgit v1.2.3-70-g09d2 From 9976b39b5031bbf76f715893cf080b6a17683881 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 09:19:26 -0800 Subject: xen: deal with virtually mapped percpu data The virtually mapped percpu space causes us two problems: - for hypercalls which take an mfn, we need to do a full pagetable walk to convert the percpu va into an mfn, and - when a hypercall requires a page to be mapped RO via all its aliases, we need to make sure its RO in both the percpu mapping and in the linear mapping This primarily affects the gdt and the vcpu info structure. Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel Cc: Gerd Hoffmann Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/page.h | 1 + arch/x86/xen/enlighten.c | 10 ++++++---- arch/x86/xen/mmu.c | 7 +++++++ arch/x86/xen/smp.c | 6 +++++- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 4bd990ee43d..1a918dde46b 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -164,6 +164,7 @@ static inline pte_t __pte_ma(pteval_t x) xmaddr_t arbitrary_virt_to_machine(void *address); +unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 86497d5f44c..352ea683065 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -103,7 +103,7 @@ static void xen_vcpu_setup(int cpu) vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = virt_to_mfn(vcpup); + info.mfn = arbitrary_virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", @@ -301,8 +301,10 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames = mcs.args; for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = virt_to_mfn(va); + frames[f] = arbitrary_virt_to_mfn((void *)va); + make_lowmem_page_readonly((void *)va); + make_lowmem_page_readonly(mfn_to_virt(frames[f])); } MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); @@ -314,7 +316,7 @@ static void load_TLS_descriptor(struct thread_struct *t, unsigned int cpu, unsigned int i) { struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); struct multicall_space mc = __xen_mc_entry(0); MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); @@ -488,7 +490,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, break; default: { - xmaddr_t maddr = virt_to_machine(&dt[entry]); + xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); xen_mc_flush(); if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 319bd40a57c..cb6afa4ec95 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -276,6 +276,13 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) p2m_top[topidx][idx] = mfn; } +unsigned long arbitrary_virt_to_mfn(void *vaddr) +{ + xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); + + return PFN_DOWN(maddr.maddr); +} + xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 035582ae815..8d470562ffc 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -219,6 +219,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; + unsigned long gdt_mfn; if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; @@ -248,9 +249,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); + + gdt_mfn = arbitrary_virt_to_mfn(gdt); make_lowmem_page_readonly(gdt); + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - ctxt->gdt_frames[0] = virt_to_mfn(gdt); + ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; -- cgit v1.2.3-70-g09d2