From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..f511246fa6c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); -- cgit v1.2.3-70-g09d2 From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: x86: move stack_canary into irq_stack Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 3 --- arch/x86/include/asm/percpu.h | 6 ------ arch/x86/include/asm/processor.h | 23 ++++++++++++++++++++++- arch/x86/include/asm/stackprotector.h | 6 +++--- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 4 ---- arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/head_64.S | 13 +++++-------- arch/x86/kernel/setup_percpu.c | 34 ++++------------------------------ arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++-- 10 files changed, 46 insertions(+), 62 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b473e952439..ba46416634f 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -17,9 +17,6 @@ struct x8664_pda { unsigned long unused4; int unused5; unsigned int unused6; /* 36 was cpunumber */ - unsigned long stack_canary; /* 40 stack canary value */ - /* gcc-ABI: this canary MUST be at - offset 40!!! */ short in_bootmem; /* pda lives in bootmem */ } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 165d5272ece..ce980db5e59 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -133,12 +133,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); -#else -static inline void load_pda_offset(int cpu) { } -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f511246fa6c..48676b943b9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -379,8 +379,29 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); + +static inline void load_gs_base(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + mb(); +} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 2383e5bb475..36a700acaf2 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,7 +2,7 @@ #define _ASM_STACKPROTECTOR_H 1 #include -#include +#include /* * Initialize the stackprotector canary value. @@ -19,7 +19,7 @@ static __always_inline void boot_init_stack_canary(void) * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); /* * We both use the random pool and the current TSC as a source @@ -32,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; - write_pda(stack_canary, canary); + percpu_write(irq_stack_union.stack_canary, canary); } #endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b77bd8bd3cc..52eb748a68a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -89,10 +89,10 @@ do { \ #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[pda_canary]\n\t" + "movq %%r8,%%gs:%P[gs_canary]\n\t" #define __switch_canary_param \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ - , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) + , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary #define __switch_canary_param diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 64c834a39aa..94f9c8b39d2 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -48,10 +48,6 @@ int main(void) #endif BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - DEFINE(pda_size, sizeof(struct x8664_pda)); - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f83a4d6160f..098934e72a1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); #ifdef CONFIG_SMP DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ #else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; #endif DEFINE_PER_CPU(unsigned long, kernel_stack) = @@ -960,7 +961,7 @@ void __cpuinit cpu_init(void) loadsegment(fs, 0); loadsegment(gs, 0); - load_pda_offset(cpu); + load_gs_base(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 98ea26a2fca..a0a2b5ca9b7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -242,13 +242,10 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * On SMP, %gs should point to the per-cpu area. For initial - * boot, make %gs point to the init data section. For a - * secondary CPU,initial_gs should be set to its pda address - * before the CPU runs this code. - * - * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't - * change. + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -281,7 +278,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad PER_CPU_VAR(__pda) + .quad PER_CPU_VAR(irq_stack_union) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efbafbbff58..90b8e154bb5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif -/* - * Define load_pda_offset() and per-cpu __pda for x86_64. - * load_pda_offset() is responsible for loading the offset of pda into - * %gs. - * - * On SMP, pda offset also duals as percpu base address and thus it - * should be at the start of per-cpu area. To achieve this, it's - * preallocated in vmlinux_64.lds.S directly instead of using - * DEFINE_PER_CPU(). - */ -#ifdef CONFIG_X86_64 -void __cpuinit load_pda_offset(int cpu) -{ - /* Memory clobbers used to order pda/percpu accesses */ - mb(); - wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); - mb(); -} -#ifndef CONFIG_SMP -DEFINE_PER_CPU(struct x8664_pda, __pda); -#endif -EXPORT_PER_CPU_SYMBOL(__pda); -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* - * CPU0 modified pda in the init data area, reload pda - * offset for CPU0 and clear the area for others. + * Up to this point, CPU0 has been using .data.init + * area. Reload %gs offset for CPU0. */ if (cpu == 0) - load_pda_offset(0); - else - memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); + load_gs_base(cpu); #endif DBG("PERCPU: cpu %4d %p\n", cpu, ptr); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index a09abb8fb97..c9740996430 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -220,8 +220,7 @@ SECTIONS * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) - per_cpu____pda = __per_cpu_start; + PERCPU_VADDR(0, :percpu) #else PERCPU(PAGE_SIZE) #endif @@ -262,3 +261,8 @@ SECTIONS */ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif -- cgit v1.2.3-70-g09d2 From b1882e68d17a93b523dce09c3a181319aace2f0e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:18:52 -0800 Subject: x86: clean up stray space in Impact: Whitespace cleanup only Clean up a stray space character in arch/x86/include/asm/processor.h. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..ac8fab3b868 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -73,7 +73,7 @@ struct cpuinfo_x86 { char pad0; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ - int x86_tlbsize; + int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; #endif -- cgit v1.2.3-70-g09d2 From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup_percpu.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 4 ---- arch/x86/kernel/smpcommon.c | 25 ------------------------- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/xen/smp.c | 1 - 7 files changed, 15 insertions(+), 35 deletions(-) delete mode 100644 arch/x86/kernel/smpcommon.c (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 48676b943b9..32c30b02b51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -778,7 +778,6 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73de055c29c..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,8 +60,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 599dc1cc1da..bcca3a7b374 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; + + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif +} + /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index def770b57b5..f9dbcff4354 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) do_rest: per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else @@ -1186,9 +1185,6 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif switch_to_new_gdt(); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 5ec29a1a846..00000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -#include - -#ifdef CONFIG_X86_32 -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -} -#endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index dd82f2052f3..331cd6d5648 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.sp = (void *)idle->thread.sp; - init_gdt(cpu); per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - init_gdt(smp_processor_id()); switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72c2eb9b64c..7735e3dd359 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); -- cgit v1.2.3-70-g09d2 From 1825b8edc2034c012ae48f797d74efd1bd9d4f72 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: remove extra barriers from load_gs_base() Impact: optimization mb() generates an mfence instruction, which is not needed here. Only a compiler barrier is needed, and that is handled by the memory clobber in the wrmsrl function. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 32c30b02b51..794234eba31 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -397,10 +397,7 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); static inline void load_gs_base(int cpu) { - /* Memory clobbers used to order pda/percpu accesses */ - mb(); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); - mb(); } #endif -- cgit v1.2.3-70-g09d2 From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: load new GDT after setting up boot cpu per-cpu area Impact: sync 32 and 64-bit code Merge load_gs_base() into switch_to_new_gdt(). Load the GDT and per-cpu state for the boot cpu when its new area is set up. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 5 ----- arch/x86/kernel/cpu/common.c | 15 +++++++++------ arch/x86/kernel/setup_percpu.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 794234eba31..befa20b4a68 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -394,11 +394,6 @@ union irq_stack_union { DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); - -static inline void load_gs_base(int cpu) -{ - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e30c8a282..0c766b80d91 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -258,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; void switch_to_new_gdt(void) { struct desc_ptr gdt_descr; + int cpu = smp_processor_id(); - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + /* Reload the per-cpu base */ #ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif } @@ -968,10 +973,6 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - loadsegment(fs, 0); - loadsegment(gs, 0); - load_gs_base(cpu); - #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -993,6 +994,8 @@ void __cpuinit cpu_init(void) */ switch_to_new_gdt(); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bcca3a7b374..4caa78d7cb1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -112,14 +112,14 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #endif /* * Up to this point, the boot CPU has been using .data.init - * area. Reload %gs offset for the boot CPU. + * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - load_gs_base(cpu); -#endif + switch_to_new_gdt(); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } -- cgit v1.2.3-70-g09d2 From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 30 Jan 2009 17:47:53 +0900 Subject: x86: pass in cpu number to switch_to_new_gdt() Impact: cleanup, prepare for xen boot fix. Xen needs to call this function very early to setup the GDT and per-cpu segments. Remove the call to smp_processor_id() and just pass in the cpu number. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 7 +++---- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 11 ++++++----- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index befa20b4a68..1c25eb69ea8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -768,7 +768,7 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 652fdc9a757..6eacd64b602 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -255,10 +255,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - int cpu = smp_processor_id(); gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; @@ -993,7 +992,7 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); loadsegment(fs, 0); load_idt((const struct desc_ptr *)&idt_descr); @@ -1098,7 +1097,7 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0d1e7ac439f..ef91747bbed 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - switch_to_new_gdt(); + switch_to_new_gdt(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff4354..612d3c74f6a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1185,7 +1185,7 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(); + switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); per_cpu(cpu_state, me) = CPU_ONLINE; diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 331cd6d5648..58c7cac3440 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1746,12 +1746,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - switch_to_new_gdt(); + int cpu = smp_processor_id(); + switch_to_new_gdt(cpu); - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); - cpu_set(smp_processor_id(), cpu_possible_map); - cpu_set(smp_processor_id(), cpu_present_map); + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_callout_map); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } static int __cpuinit voyager_cpu_up(unsigned int cpu) -- cgit v1.2.3-70-g09d2 From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: x86: split loading percpu segments from loading gdt Impact: split out a function, no functional change Xen needs to be able to access percpu data from very early on. For various reasons, it cannot also load the gdt at that time. It does, however, have a pefectly functional gdt at that point, so there's no pressing need to reload the gdt. Split the function to load the segment registers off, so Xen can call it directly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1c25eb69ea8..656d02ea509 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -769,6 +769,7 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6eacd64b602..0f73ea42308 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -253,6 +253,16 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ void switch_to_new_gdt(int cpu) @@ -263,12 +273,8 @@ void switch_to_new_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - loadsegment(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -#endif + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -- cgit v1.2.3-70-g09d2 From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:39 -0500 Subject: x86: use linker to offset symbols by __per_cpu_load Impact: cleanup and bug fix Use the linker to create symbols for certain per-cpu variables that are offset by __per_cpu_load. This allows the removal of the runtime fixup of the GDT pointer, which fixes a bug with resume reported by Jiri Slaby. Reported-by: Jiri Slaby Signed-off-by: Brian Gerst Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/head_64.S | 21 ++------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++ 5 files changed, 35 insertions(+), 24 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0b64af4f13a..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -34,6 +34,12 @@ #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) per_cpu__##var +#endif + #else /* ...!ASSEMBLY */ #include @@ -45,6 +51,22 @@ #define __percpu_arg(x) "%" #x #endif +/* + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. + * + * There also must be an entry in vmlinux_64.lds.S + */ +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(per_cpu_var(var)) init_per_cpu_var(var) + +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) per_cpu_var(var) +#endif + /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..373d3f628d2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -393,6 +393,8 @@ union irq_stack_union { }; DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + DECLARE_PER_CPU(char *, irq_stack_ptr); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f73ea42308..41b0de6df87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,12 +902,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); -#ifdef CONFIG_SMP -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ -#else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -#endif + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a0a2b5ca9b7..2e648e3a5ea 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) pushq $0 popfq -#ifdef CONFIG_SMP - /* - * Fix up static pointers that need __per_cpu_load added. The assembler - * is unable to do this directly. This is only needed for the boot cpu. - * These values are set up with the correct base addresses by C code for - * secondary cpus. - */ - movq initial_gs(%rip), %rax - cmpl $0, per_cpu__cpu_number(%rax) - jne 1f - addq %rax, early_gdt_descr_base(%rip) -1: -#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) -#ifdef CONFIG_SMP - .quad __per_cpu_load -#else - .quad PER_CPU_VAR(irq_stack_union) -#endif + .quad INIT_PER_CPU_VAR(irq_stack_union) __FINITDATA ENTRY(stack_start) @@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad per_cpu__gdt_page + .quad INIT_PER_CPU_VAR(gdt_page) ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 07f62d287ff..087a7f2c639 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -257,6 +257,14 @@ SECTIONS DWARF_DEBUG } + /* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + /* * Build-time check on the image size: */ -- cgit v1.2.3-70-g09d2 From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: implement x86_32 stack protector Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/processor.h | 4 ++ arch/x86/include/asm/segment.h | 9 ++- arch/x86/include/asm/stackprotector.h | 91 +++++++++++++++++++++++++++++-- arch/x86/include/asm/system.h | 21 +++++++ arch/x86/kernel/Makefile | 18 ++++++ arch/x86/kernel/cpu/common.c | 17 ++++-- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/head_32.S | 20 ++++++- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/setup_percpu.c | 2 + scripts/gcc-x86_32-has-stack-protector.sh | 8 +++ 12 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 scripts/gcc-x86_32-has-stack-protector.sh (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bcdede71ba..f760a22f95d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,7 +209,7 @@ config X86_TRAMPOLINE config X86_32_LAZY_GS def_bool y - depends on X86_32 + depends on X86_32 && !CC_STACKPROTECTOR config KTIME_SCALAR def_bool X86_32 @@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 select CC_STACKPROTECTOR_ALL help This option turns on the -fstack-protector GCC feature. This diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9763eb70013..5a947210425 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +#else /* X86_64 */ +#ifdef CONFIG_CC_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, stack_canary); #endif +#endif /* X86_64 */ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1dc1b51ac62..14e0ed86a6f 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -61,7 +61,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused + * 28 - stack_canary-20 [ for stack protector ] * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -95,6 +95,13 @@ #define __KERNEL_PERCPU 0 #endif +#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16) +#ifdef CONFIG_CC_STACKPROTECTOR +#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8) +#else +#define __KERNEL_STACK_CANARY 0 +#endif + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index ee275e9f48a..fa7e5bd6fbe 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -1,3 +1,35 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and unfortunately gcc requires it to be at a fixed offset from %gs. + * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 + * and x86_32 use segment registers differently and thus handles this + * requirement differently. + * + * On x86_64, %gs is shared by percpu area and stack canary. All + * percpu symbols are zero based and %gs points to the base of percpu + * area. The first occupant of the percpu area is always + * irq_stack_union which contains stack_canary at offset 40. Userland + * %gs is always saved and restored on kernel entry and exit using + * swapgs, so stack protector doesn't add any complexity there. + * + * On x86_32, it's slightly more complicated. As in x86_64, %gs is + * used for userland TLS. Unfortunately, some processors are much + * slower at loading segment registers with different value when + * entering and leaving the kernel, so the kernel uses %fs for percpu + * area and manages %gs lazily so that %gs is switched only when + * necessary, usually during task switch. + * + * As gcc requires the stack canary at %gs:20, %gs can't be managed + * lazily if stack protector is enabled, so the kernel saves and + * restores userland %gs on kernel entry and exit. This behavior is + * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in + * system.h to hide the details. + */ + #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 @@ -6,8 +38,18 @@ #include #include #include +#include +#include #include +/* + * 24 byte read-only segment initializer for stack canary. Linker + * can't handle the address bit shifting. Address will be set in + * head_32 for boot CPU and setup_per_cpu_areas() for others. + */ +#define GDT_STACK_CANARY_INIT \ + [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, + /* * Initialize the stackprotector canary value. * @@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void) u64 canary; u64 tsc; - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ +#ifdef CONFIG_X86_64 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); - +#endif /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, @@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; +#ifdef CONFIG_X86_64 percpu_write(irq_stack_union.stack_canary, canary); +#else + percpu_write(stack_canary, canary); +#endif +} + +static inline void setup_stack_canary_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); + struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); + struct desc_struct desc; + + desc = gdt_table[GDT_ENTRY_STACK_CANARY]; + desc.base0 = canary & 0xffff; + desc.base1 = (canary >> 16) & 0xff; + desc.base2 = (canary >> 24) & 0xff; + write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); +#endif +} + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); +#endif +} + +#else /* CC_STACKPROTECTOR */ + +#define GDT_STACK_CANARY_INIT + +/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ + +static inline void setup_stack_canary_segment(int cpu) +{ } + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm volatile ("mov %0, %%gs" : : "r" (0)); +#endif } #endif /* CC_STACKPROTECTOR */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 79b98e5b96f..2692ee8ef03 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_X86_32 +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl "__percpu_arg([current_task])",%%ebx\n\t" \ + "movl %P[task_canary](%%ebx),%%ebx\n\t" \ + "movl %%ebx,"__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (per_cpu_var(stack_canary)) +#define __switch_canary_iparam \ + , [current_task] "m" (per_cpu_var(current_task)) \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +62,7 @@ do { \ "pushl %[next_ip]\n\t" /* restore EIP */ \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ + __switch_canary \ "popl %%ebp\n\t" /* restore EBP */ \ "popfl\n" /* restore flags */ \ \ @@ -58,6 +75,8 @@ do { \ "=b" (ebx), "=c" (ecx), "=d" (edx), \ "=S" (esi), "=D" (edi) \ \ + __switch_canary_oparam \ + \ /* input parameters: */ \ : [next_sp] "m" (next->thread.sp), \ [next_ip] "m" (next->thread.ip), \ @@ -66,6 +85,8 @@ do { \ [prev] "a" (prev), \ [next] "d" (next) \ \ + __switch_canary_iparam \ + \ : /* reloaded segment registers */ \ "memory"); \ } while (0) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada1..b1f8be33300 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) +# +# On x86_32, register frame is passed verbatim on stack as struct +# pt_regs. gcc considers the parameter to belong to the callee and +# with -fstack-protector it copies pt_regs to the callee's stack frame +# to put the structure after the stack canary causing changes made by +# the exception handlers to be lost. Turn off stack protector for all +# files containing functions which take struct pt_regs from register +# frame. +# +# The proper way to fix this is to teach gcc that the argument belongs +# to the caller for these functions, oh well... +# +ifdef CONFIG_X86_32 +CFLAGS_process_32.o := $(nostackp) +CFLAGS_vm86_32.o := $(nostackp) +CFLAGS_signal.o := $(nostackp) +CFLAGS_traps.o := $(nostackp) +endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 41b0de6df87..260fe4cb2c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "cpu.h" @@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + GDT_STACK_CANARY_INIT #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); @@ -261,6 +263,7 @@ void load_percpu_segment(int cpu) loadsegment(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif + load_stack_canary_segment(); } /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -946,16 +949,21 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else +#else /* x86_64 */ + +#ifdef CONFIG_CC_STACKPROTECTOR +DEFINE_PER_CPU(unsigned long, stack_canary); +#endif -/* Make sure %fs is initialized properly in idle threads */ +/* Make sure %fs and %gs are initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; + regs->gs = __KERNEL_STACK_CANARY; return regs; } -#endif +#endif /* x86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - /* Clear all 6 debug registers: */ set_debugreg(0, 0); set_debugreg(0, 1); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 82e6868bee4..5f5bd22adcd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -186,7 +186,7 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg - xorl \reg, \reg + movl $(__KERNEL_STACK_CANARY), \reg movl \reg, %gs .endm diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 24c0e5cd71e..924e31615fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - xorl %eax,%eax # Clear GS and LDT +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * The linker can't handle this by relocation. Manually set + * base address in stack canary segment descriptor. + */ + cmpb $0,ready + jne 1f + movl $per_cpu__gdt_page,%eax + movl $per_cpu__stack_canary,%ecx + movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) + shrl $16, %ecx + movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) + movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) +1: +#endif + movl $(__KERNEL_STACK_CANARY),%eax movl %eax,%gs + + xorl %eax,%eax # Clear LDT lldt %ax cld # gcc2 wants the direction flag cleared at all times diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 86122fa2a1b..9a62383e7c3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef91747bbed..d992e6cff73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); + setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh new file mode 100644 index 00000000000..4fdf6ce1b06 --- /dev/null +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +if [ "$?" -eq "0" ] ; then + echo y +else + echo n +fi -- cgit v1.2.3-70-g09d2 From 54321d947ae9d6a051b81e3eccaf2d8658aeecc6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 10:20:05 -0800 Subject: x86: move pte types into pgtable*.h pgtable*.h is intended for definitions relating to actual pagetables and their entries, so move all the definitions for (pte|pmd|pud|pgd)(val)?_t to the appropriate pgtable*.h headers. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 22 ------ arch/x86/include/asm/page_32_types.h | 30 -------- arch/x86/include/asm/page_64_types.h | 11 --- arch/x86/include/asm/page_types.h | 97 +------------------------ arch/x86/include/asm/paravirt.h | 2 +- arch/x86/include/asm/pgtable-2level_types.h | 15 ++++ arch/x86/include/asm/pgtable-3level_types.h | 18 +++++ arch/x86/include/asm/pgtable.h | 103 +++++++++++++++----------- arch/x86/include/asm/pgtable_64_types.h | 16 +++++ arch/x86/include/asm/pgtable_types.h | 107 ++++++++++++++++++++++++++-- arch/x86/include/asm/processor.h | 1 + 11 files changed, 215 insertions(+), 207 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 28423609b00..467ce69306b 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -31,28 +31,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -#ifdef CONFIG_PARAVIRT -#include -#else /* !CONFIG_PARAVIRT */ - -#define pgd_val(x) native_pgd_val(x) -#define __pgd(x) native_make_pgd(x) - -#ifndef __PAGETABLE_PUD_FOLDED -#define pud_val(x) native_pud_val(x) -#define __pud(x) native_make_pud(x) -#endif - -#ifndef __PAGETABLE_PMD_FOLDED -#define pmd_val(x) native_pmd_val(x) -#define __pmd(x) native_make_pmd(x) -#endif - -#define pte_val(x) native_pte_val(x) -#define __pte(x) native_make_pte(x) - -#endif /* CONFIG_PARAVIRT */ - #define __pa(x) __phys_addr((unsigned long)(x)) #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) /* __pa_symbol should be used for C visible symbols. diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 83f820a2b10..b5486aaf36e 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -43,36 +43,6 @@ #ifndef __ASSEMBLY__ -#include - -#ifdef CONFIG_X86_PAE -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pudval_t; -typedef u64 pgdval_t; -typedef u64 pgprotval_t; - -typedef union { - struct { - unsigned long pte_low, pte_high; - }; - pteval_t pte; -} pte_t; -#else /* !CONFIG_X86_PAE */ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef union { - pteval_t pte; - pteval_t pte_low; -} pte_t; -#endif /* CONFIG_X86_PAE */ - -extern int nx_enabled; - /* * This much address space is reserved for vmalloc() and iomap() * as well as fixmap mappings. diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d00cd388082..bc73af3eda9 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -71,17 +71,6 @@ extern unsigned long phys_base; extern unsigned long __phys_addr(unsigned long); #define __phys_reloc_hide(x) (x) -/* - * These are used to make use of C type-checking.. - */ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef struct { pteval_t pte; } pte_t; - #define vmemmap ((struct page *)VMEMMAP_START) extern unsigned long init_memory_mapping(unsigned long start, diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 0733853e7c8..9f0c9596335 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -46,106 +46,15 @@ #ifndef __ASSEMBLY__ -#include - -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - -static inline pgd_t native_make_pgd(pgdval_t val) -{ - return (pgd_t) { val }; -} - -static inline pgdval_t native_pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} - -static inline pgdval_t pgd_flags(pgd_t pgd) -{ - return native_pgd_val(pgd) & PTE_FLAGS_MASK; -} - -#if PAGETABLE_LEVELS > 3 -typedef struct { pudval_t pud; } pud_t; - -static inline pud_t native_make_pud(pmdval_t val) -{ - return (pud_t) { val }; -} - -static inline pudval_t native_pud_val(pud_t pud) -{ - return pud.pud; -} -#else -#include - -static inline pudval_t native_pud_val(pud_t pud) -{ - return native_pgd_val(pud.pgd); -} -#endif - -#if PAGETABLE_LEVELS > 2 -typedef struct { pmdval_t pmd; } pmd_t; - -static inline pmd_t native_make_pmd(pmdval_t val) -{ - return (pmd_t) { val }; -} - -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} -#else -#include - -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return native_pgd_val(pmd.pud.pgd); -} -#endif - -static inline pudval_t pud_flags(pud_t pud) -{ - return native_pud_val(pud) & PTE_FLAGS_MASK; -} - -static inline pmdval_t pmd_flags(pmd_t pmd) -{ - return native_pmd_val(pmd) & PTE_FLAGS_MASK; -} - -static inline pte_t native_make_pte(pteval_t val) -{ - return (pte_t) { .pte = val }; -} - -static inline pteval_t native_pte_val(pte_t pte) -{ - return pte.pte; -} - -static inline pteval_t pte_flags(pte_t pte) -{ - return native_pte_val(pte) & PTE_FLAGS_MASK; -} - -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - - -typedef struct page *pgtable_t; +struct pgprot; extern int page_is_ram(unsigned long pagenr); extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); + struct pgprot vma_prot); extern void unmap_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); + struct pgprot vma_prot); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index db570f23b22..7ed73ccd74e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,7 +4,7 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include +#include #include /* Bitmask of what can be clobbered: usually at least eax. */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index d77db8990ea..09ae67efceb 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -1,6 +1,21 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H +#ifndef __ASSEMBLY__ +#include + +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef union { + pteval_t pte; + pteval_t pte_low; +} pte_t; +#endif /* !__ASSEMBLY__ */ + #define SHARED_KERNEL_PMD 0 /* diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 62561367653..bcc89625ebe 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -1,6 +1,23 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H +#ifndef __ASSEMBLY__ +#include + +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#endif /* !__ASSEMBLY__ */ + #ifdef CONFIG_PARAVIRT #define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) #else @@ -25,4 +42,5 @@ */ #define PTRS_PER_PTE 512 + #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9f508509797..b0d1066ab6a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,66 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern spinlock_t pgd_lock; extern struct list_head pgd_list; +#ifdef CONFIG_PARAVIRT +#include +#else /* !CONFIG_PARAVIRT */ +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) + +#define set_pte_present(mm, addr, ptep, pte) \ + native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + +#ifndef __PAGETABLE_PUD_FOLDED +#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define pgd_clear(pgd) native_pgd_clear(pgd) +#endif + +#ifndef set_pud +# define set_pud(pudp, pud) native_set_pud(pudp, pud) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pud_clear(pud) native_pud_clear(pud) +#endif + +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) + +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) + +static inline void __init paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void __init paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} + +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) + +#endif /* CONFIG_PARAVIRT */ + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -214,49 +274,6 @@ static inline int is_new_memtype_allowed(unsigned long flags, return 1; } -#ifdef CONFIG_PARAVIRT -#include -#else /* !CONFIG_PARAVIRT */ -#define set_pte(ptep, pte) native_set_pte(ptep, pte) -#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) - -#define set_pte_present(mm, addr, ptep, pte) \ - native_set_pte_present(mm, addr, ptep, pte) -#define set_pte_atomic(ptep, pte) \ - native_set_pte_atomic(ptep, pte) - -#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) - -#ifndef __PAGETABLE_PUD_FOLDED -#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) -#endif - -#ifndef set_pud -# define set_pud(pudp, pud) native_set_pud(pudp, pud) -#endif - -#ifndef __PAGETABLE_PMD_FOLDED -#define pud_clear(pud) native_pud_clear(pud) -#endif - -#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) -#define pmd_clear(pmd) native_pmd_clear(pmd) - -#define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) - -static inline void __init paravirt_pagetable_setup_start(pgd_t *base) -{ - native_pagetable_setup_start(base); -} - -static inline void __init paravirt_pagetable_setup_done(pgd_t *base) -{ - native_pagetable_setup_done(base); -} -#endif /* CONFIG_PARAVIRT */ - #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index ffaf1934068..2f59135c6f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -1,6 +1,22 @@ #ifndef _ASM_X86_PGTABLE_64_DEFS_H #define _ASM_X86_PGTABLE_64_DEFS_H +#ifndef __ASSEMBLY__ +#include + +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef struct { pteval_t pte; } pte_t; + +#endif /* !__ASSEMBLY__ */ + #define SHARED_KERNEL_PMD 0 /* diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e5d5a8d3577..a7452f10930 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -162,9 +162,110 @@ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif +#ifdef CONFIG_X86_32 +# include "pgtable_32_types.h" +#else +# include "pgtable_64_types.h" +#endif + #ifndef __ASSEMBLY__ +#include + +typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; + +typedef struct { pgdval_t pgd; } pgd_t; + +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + +#if PAGETABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} +#else +#include + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.pgd); +} +#endif + +#if PAGETABLE_LEVELS > 2 +typedef struct { pmdval_t pmd; } pmd_t; + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} +#else +#include + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.pgd); +} +#endif + +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline pteval_t pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + + +typedef struct page *pgtable_t; + extern pteval_t __supported_pte_mask; +extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); @@ -217,10 +318,4 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level); #endif /* !__ASSEMBLY__ */ -#ifdef CONFIG_X86_32 -# include "pgtable_32_types.h" -#else -# include "pgtable_64_types.h" -#endif - #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..96302880774 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -16,6 +16,7 @@ struct mm_struct; #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From de5483029b8f18e23395d8fd4f4ef6ae15beb809 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 19 Feb 2009 12:20:17 +0530 Subject: x86: include/asm/processor.h remove double declaration of print_cpu_info Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dabab1a19dd..72914d0315e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); #endif #endif /* X86_64 */ -extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; -- cgit v1.2.3-70-g09d2 From d951734654f76a370a89b4e531af9b765bd13541 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:32:28 +0100 Subject: x86, mm: rename TASK_SIZE64 => TASK_SIZE_MAX Impact: cleanup Rename TASK_SIZE64 to TASK_SIZE_MAX, and provide the define on 32-bit too. (mapped to TASK_SIZE) This allows 32-bit code to make use of the (former-) TASK_SIZE64 symbol as well, in a clean way. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 9 +++++---- arch/x86/kernel/ptrace.c | 2 +- arch/x86/mm/fault.c | 2 +- arch/x86/vdso/vma.c | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 72914d0315e..c7a98f73821 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -861,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x) * User space process size: 3GB (default). */ #define TASK_SIZE PAGE_OFFSET +#define TASK_SIZE_MAX TASK_SIZE #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP @@ -920,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. @@ -929,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); 0xc0000000 : 0xFFFFe000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX TASK_SIZE64 +#define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d2f7cd5b2c8..fb2159a5c81 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_IA32)) return IA32_PAGE_OFFSET - 3; #endif - return TASK_SIZE64 - 7; + return TASK_SIZE_MAX - 7; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9c2dc5d7953..6fa9f175cba 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -963,7 +963,7 @@ static int fault_in_kernel_space(unsigned long address) #ifdef CONFIG_X86_32 return address >= TASK_SIZE; #else - return address >= TASK_SIZE64; + return address >= TASK_SIZE_MAX; #endif } diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 9c98cc6ba97..7133cdf9098 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) unsigned long addr, end; unsigned offset; end = (start + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE64) - end = TASK_SIZE64; + if (end >= TASK_SIZE_MAX) + end = TASK_SIZE_MAX; end -= len; /* This loses some more bits than a modulo, but is cheaper */ offset = get_random_int() & (PTRS_PER_PTE - 1); -- cgit v1.2.3-70-g09d2 From db949bba3c7cf2e664ac12e237c6d4c914f0c69d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:25:21 -0800 Subject: x86-32: use non-lazy io bitmap context switching Impact: remove 32-bit optimization to prepare unification x86-32 and -64 differ in the way they context-switch tasks with io permission bitmaps. x86-64 simply copies the next tasks io bitmap into place (if any) on context switch. x86-32 invalidates the bitmap on context switch, so that the next IO instruction will fault; at that point it installs the appropriate IO bitmap. This makes context switching IO-bitmap-using tasks a bit more less expensive, at the cost of making the next IO instruction slower due to the extra fault. This tradeoff only makes sense if IO-bitmap-using processes are relatively common, but they don't actually use IO instructions very often. However, in a typical desktop system, the only process likely to be using IO bitmaps is the X server, and nothing at all on a server. Therefore the lazy context switch doesn't really win all that much, and its just a gratuitious difference from 64-bit code. This patch removes the lazy context switch, with a view to unifying this code in a later change. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 6 ------ arch/x86/kernel/ioport.c | 11 ---------- arch/x86/kernel/process_32.c | 36 ++++++++----------------------- arch/x86/kernel/traps.c | 46 ---------------------------------------- 4 files changed, 9 insertions(+), 90 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7a98f73821..76139506c3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -248,7 +248,6 @@ struct x86_hw_tss { #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) #define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 struct tss_struct { /* @@ -263,11 +262,6 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; - /* - * Cache the current maximum and the last task that used the bitmap: - */ - unsigned long io_bitmap_max; - struct thread_struct *io_bitmap_owner; /* * .. and then another 0x100 bytes for the emergency kernel stack: diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index e41980a373a..99c4d308f16 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap_max = bytes; -#ifdef CONFIG_X86_32 - /* - * Sets the lazy trigger so that the next I/O operation will - * reload the correct bitmap. - * Reset the owner so that a process switch will not set - * tss->io_bitmap_base to IO_BITMAP_OFFSET. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; - tss->io_bitmap_owner = NULL; -#else /* Update the TSS: */ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -#endif put_cpu(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 646da41a620..a59314e877f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -248,11 +248,8 @@ void exit_thread(void) /* * Careful, clear this in the TSS too: */ - memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); + memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; - tss->io_bitmap_owner = NULL; - tss->io_bitmap_max = 0; - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } @@ -458,34 +455,19 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } - if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* - * Disable the bitmap via an invalid offset. We still cache - * the previous bitmap owner and the IO bitmap contents: + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - return; - } - - if (likely(next == tss->io_bitmap_owner)) { + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* - * Previous owner of the bitmap (hence the bitmap content) - * matches the next task, we dont have to do anything but - * to set a valid offset in the TSS: + * Clear any possible leftover bits: */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - return; + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } - /* - * Lazy TSS's I/O bitmap copy. We set an invalid offset here - * and we let the task to get a GPF in case an I/O instruction - * is performed. The handler of the GPF will verify that the - * faulting task has a valid I/O bitmap and, it true, does the - * real copy and restart the instruction. This will save us - * redundant copies when the currently switched task does not - * perform any I/O during its timeslice. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c05430ac1b4..a1d288327ff 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -118,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) if (!user_mode_vm(regs)) die(str, regs, err); } - -/* - * Perform the lazy TSS's I/O bitmap copy. If the TSS has an - * invalid offset set (the LAZY one) and the faulting thread has - * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, - * we set the offset field correctly and return 1. - */ -static int lazy_iobitmap_copy(void) -{ - struct thread_struct *thread; - struct tss_struct *tss; - int cpu; - - cpu = get_cpu(); - tss = &per_cpu(init_tss, cpu); - thread = ¤t->thread; - - if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && - thread->io_bitmap_ptr) { - memcpy(tss->io_bitmap, thread->io_bitmap_ptr, - thread->io_bitmap_max); - /* - * If the previously set map was extending to higher ports - * than the current one, pad extra space with 0xff (no access). - */ - if (thread->io_bitmap_max < tss->io_bitmap_max) { - memset((char *) tss->io_bitmap + - thread->io_bitmap_max, 0xff, - tss->io_bitmap_max - thread->io_bitmap_max); - } - tss->io_bitmap_max = thread->io_bitmap_max; - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - tss->io_bitmap_owner = thread; - put_cpu(); - - return 1; - } - put_cpu(); - - return 0; -} #endif static void __kprobes @@ -309,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); #ifdef CONFIG_X86_32 - if (lazy_iobitmap_copy()) { - /* restart the faulting instruction */ - return; - } - if (regs->flags & X86_VM_MASK) goto gp_in_vm86; #endif -- cgit v1.2.3-70-g09d2 From 155dd720d06a219ddf5a56b473cb3325441fc879 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 14:49:53 +1030 Subject: cpumask: convert struct cpuinfo_x86's llc_shared_map to cpumask_var_t Impact: reduce kernel memory usage when CONFIG_CPUMASK_OFFSTACK=y Signed-off-by: Rusty Russell --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/smpboot.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e..d794d9483c5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -94,7 +94,7 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; #ifdef CONFIG_SMP /* cpus sharing the last level cache: */ - cpumask_t llc_shared_map; + cpumask_var_t llc_shared_map; #endif /* cpuid returned max cores value: */ u16 x86_max_cores; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5a58a45ac1e..d6427aa5696 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -329,6 +329,23 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } +#ifdef CONFIG_CPUMASK_OFFSTACK +/* In this case, llc_shared_map is a pointer to a cpumask. */ +static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, + const struct cpuinfo_x86 *src) +{ + struct cpumask *llc = dst->llc_shared_map; + *dst = *src; + dst->llc_shared_map = llc; +} +#else +static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, + const struct cpuinfo_x86 *src) +{ + *dst = *src; +} +#endif /* CONFIG_CPUMASK_OFFSTACK */ + /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -338,7 +355,7 @@ void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); - *c = boot_cpu_data; + copy_cpuinfo_x86(c, &boot_cpu_data); c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); @@ -362,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, cpu_sibling_mask(i)); cpumask_set_cpu(i, cpu_core_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, &c->llc_shared_map); - cpumask_set_cpu(cpu, &o->llc_shared_map); + cpumask_set_cpu(i, c->llc_shared_map); + cpumask_set_cpu(cpu, o->llc_shared_map); } } } else { cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); } - cpumask_set_cpu(cpu, &c->llc_shared_map); + cpumask_set_cpu(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); @@ -381,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpumask_set_cpu(i, &c->llc_shared_map); - cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); + cpumask_set_cpu(i, c->llc_shared_map); + cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); } if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpumask_set_cpu(i, cpu_core_mask(cpu)); @@ -420,7 +437,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) if (sched_mc_power_savings || sched_smt_power_savings) return cpu_core_mask(cpu); else - return &c->llc_shared_map; + return c->llc_shared_map; } static void impress_friends(void) @@ -1039,8 +1056,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); cpumask_clear(per_cpu(cpu_core_map, i)); cpumask_clear(per_cpu(cpu_sibling_map, i)); + cpumask_clear(cpu_data(i).llc_shared_map); } set_cpu_sibling_map(0); -- cgit v1.2.3-70-g09d2 From 30e1e6d1af2b67558bccf322af2b3e0676b209ae Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Mar 2009 14:50:34 +1030 Subject: cpumask: fix CONFIG_CPUMASK_OFFSTACK=y cpu hotunplug crash Impact: Fix cpu offline when CONFIG_MAXSMP=y Changeset bc9b83dd1f66402b870301c3c7117b9c1484abb4 "cpumask: convert c1e_mask in arch/x86/kernel/process.c to cpumask_var_t" contained a bug: c1e_mask is manipulated even if C1E isn't detected (and hence not allocated). This is simply fixed by checking for NULL (which gcc optimizes out anyway of CONFIG_CPUMASK_OFFSTACK=n, since it knows ce1_mask can never be NULL). In addition, fix a leak where select_idle_routine re-allocates (and re-clears) c1e_mask on every cpu init. Reported-by: Ingo Molnar Signed-off-by: Rusty Russell Cc: Mike Travis LKML-Reference: <200903171450.34549.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/process.c | 14 +++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d794d9483c5..9874dd98a29 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -733,6 +733,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f6cc045ad..d7dd3c294e2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -812,6 +812,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + init_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6638294cec8..78533a519d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -479,7 +479,8 @@ static int c1e_detected; void c1e_remove_cpu(int cpu) { - cpumask_clear_cpu(cpu, c1e_mask); + if (c1e_mask != NULL) + cpumask_clear_cpu(cpu, c1e_mask); } /* @@ -556,13 +557,20 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = mwait_idle; } else if (check_c1e_idle(c)) { printk(KERN_INFO "using C1E aware idle routine\n"); - alloc_cpumask_var(&c1e_mask, GFP_KERNEL); - cpumask_clear(c1e_mask); pm_idle = c1e_idle; } else pm_idle = default_idle; } +void __init init_c1e_mask(void) +{ + /* If we're using c1e_idle, we need to allocate c1e_mask. */ + if (pm_idle == c1e_idle) { + alloc_cpumask_var(&c1e_mask, GFP_KERNEL); + cpumask_clear(c1e_mask); + } +} + static int __init idle_setup(char *str) { if (!str) -- cgit v1.2.3-70-g09d2