summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-20 08:23:45 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-20 08:23:45 +0100
commit8f5d36ed5bb6e33024619eaee15b7ce2e3d115b3 (patch)
tree1a73934430a84fa926c7a12e1901d7e2639da4ff /arch/x86
parent5cdc5e9e69d4dc3a3630ae1fa666401b2a8dcde6 (diff)
parent6b7c38d55587f43bcd2cbce3a98b1c0826982090 (diff)
Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/pda.h45
-rw-r--r--arch/x86/include/asm/percpu.h6
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/processor.h23
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/stackprotector.h17
-rw-r--r--arch/x86/include/asm/system.h22
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c21
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S37
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/setup_percpu.c34
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
-rw-r--r--arch/x86/xen/enlighten.c1
16 files changed, 75 insertions, 157 deletions
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index 5976cd803e9..00000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef _ASM_X86_PDA_H
-#define _ASM_X86_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <asm/page.h>
-#include <asm/percpu.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-struct x8664_pda {
- unsigned long unused1;
- unsigned long unused2;
- unsigned long unused3;
- unsigned long unused4;
- int unused5;
- unsigned int unused6; /* 36 was cpunumber */
- unsigned long stack_canary; /* 40 stack canary value */
- /* gcc-ABI: this canary MUST be at
- offset 40!!! */
- short in_bootmem; /* pda lives in bootmem */
-} ____cacheline_aligned_in_smp;
-
-DECLARE_PER_CPU(struct x8664_pda, __pda);
-extern void pda_init(int);
-
-#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
-
-#define read_pda(field) percpu_read(__pda.field)
-#define write_pda(field, val) percpu_write(__pda.field, val)
-#define add_pda(field, val) percpu_add(__pda.field, val)
-#define sub_pda(field, val) percpu_sub(__pda.field, val)
-#define or_pda(field, val) percpu_or(__pda.field, val)
-
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit, field) \
- x86_test_and_clear_bit_percpu(bit, __pda.field)
-
-#endif
-
-#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
-
-#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 165d5272ece..ce980db5e59 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -133,12 +133,6 @@ do { \
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
-#ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
-#else
-static inline void load_pda_offset(int cpu) { }
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289acca..1df9637dfda 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
-#include <asm/pda.h>
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f511246fa6c..48676b943b9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -379,8 +379,29 @@ union thread_xstate {
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
-DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+union irq_stack_union {
+ char irq_stack[IRQ_STACK_SIZE];
+ /*
+ * GCC hardcodes the stack canary as %gs:40. Since the
+ * irq_stack is the object at %gs:0, we reserve the bottom
+ * 48 bytes of the irq stack for the canary.
+ */
+ struct {
+ char gs_base[40];
+ unsigned long stack_canary;
+ };
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
+
+static inline void load_gs_base(int cpu)
+{
+ /* Memory clobbers used to order pda/percpu accesses */
+ mb();
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+ mb();
+}
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 68636e767a9..45ef8a1b9d7 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,7 +15,6 @@
# include <asm/io_apic.h>
# endif
#endif
-#include <asm/pda.h>
#include <asm/thread_info.h>
#include <asm/cpumask.h>
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c7f0d10bae7..36a700acaf2 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,7 +2,7 @@
#define _ASM_STACKPROTECTOR_H 1
#include <asm/tsc.h>
-#include <asm/pda.h>
+#include <asm/processor.h>
/*
* Initialize the stackprotector canary value.
@@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc;
/*
- * If we're the non-boot CPU, nothing set the PDA stack
- * canary up for us - and if we are the boot CPU we have
- * a 0 stack canary. This is a good place for updating
- * it, as we wont ever return from this function (so the
- * invalid canaries already on the stack wont ever
- * trigger).
- *
+ * Build time only check to make sure the stack_canary is at
+ * offset 40 in the pda; this is a gcc ABI requirement
+ */
+ BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
+
+ /*
* We both use the random pool and the current TSC as a source
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
@@ -33,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;
- write_pda(stack_canary, canary);
+ percpu_write(irq_stack_union.stack_canary, canary);
}
#endif
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8cadfe9b119..52eb748a68a 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,17 +86,28 @@ do { \
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
"r12", "r13", "r14", "r15"
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary \
+ "movq %P[task_canary](%%rsi),%%r8\n\t" \
+ "movq %%r8,%%gs:%P[gs_canary]\n\t"
+#define __switch_canary_param \
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
+ , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
+#else /* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_param
+#endif /* CC_STACKPROTECTOR */
+
/* Save restore flags to clear handle leaking NT */
#define switch_to(prev, next, last) \
- asm volatile(SAVE_CONTEXT \
+ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
- "movq %P[task_canary](%%rsi),%%r8\n\t" \
- "movq %%r8,%%gs:%P[pda_canary]\n\t" \
+ __switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
@@ -108,9 +119,8 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\
- [current_task] "m" (per_cpu_var(current_task)), \
- [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
+ [current_task] "m" (per_cpu_var(current_task)) \
+ __switch_canary_param \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 64c834a39aa..8793ab33e2c 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
#include <linux/hardirq.h>
#include <linux/suspend.h>
#include <linux/kbuild.h>
-#include <asm/pda.h>
#include <asm/processor.h>
#include <asm/segment.h>
#include <asm/thread_info.h>
@@ -48,10 +47,6 @@ int main(void)
#endif
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
- DEFINE(pda_size, sizeof(struct x8664_pda));
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
BLANK();
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7976a6a0f65..3887fcf6e51 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -30,7 +30,6 @@
#include <asm/genapic.h>
#endif
-#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
@@ -881,12 +880,13 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+ irq_stack_union) __aligned(PAGE_SIZE);
#ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else
DEFINE_PER_CPU(char *, irq_stack_ptr) =
- per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+ per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
#endif
DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
-void __cpuinit pda_init(int cpu)
-{
- /* Setup up data that may be needed in __get_free_pages early */
- loadsegment(fs, 0);
- loadsegment(gs, 0);
-
- load_pda_offset(cpu);
-}
-
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
@@ -967,9 +958,9 @@ void __cpuinit cpu_init(void)
struct task_struct *me;
int i;
- /* CPU 0 is initialised in head64.c */
- if (cpu != 0)
- pda_init(cpu);
+ loadsegment(fs, 0);
+ loadsegment(gs, 0);
+ load_gs_base(cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index af67d3227ea..f5b27224769 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
if (console_loglevel == 10)
early_printk("Kernel alive\n");
- pda_init(0);
-
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c8ace880661..a0a2b5ca9b7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP
/*
- * early_gdt_base should point to the gdt_page in static percpu init
- * data area. Computing this requires two symbols - __per_cpu_load
- * and per_cpu__gdt_page. As linker can't do no such relocation, do
- * it by hand. As early_gdt_descr is manipulated by C code for
- * secondary CPUs, this should be done only once for the boot CPU
- * when early_gdt_descr_base contains zero.
+ * Fix up static pointers that need __per_cpu_load added. The assembler
+ * is unable to do this directly. This is only needed for the boot cpu.
+ * These values are set up with the correct base addresses by C code for
+ * secondary cpus.
*/
- movq early_gdt_descr_base(%rip), %rax
- testq %rax, %rax
- jnz 1f
- movq $__per_cpu_load, %rax
- addq $per_cpu__gdt_page, %rax
- movq %rax, early_gdt_descr_base(%rip)
+ movq initial_gs(%rip), %rax
+ cmpl $0, per_cpu__cpu_number(%rax)
+ jne 1f
+ addq %rax, early_gdt_descr_base(%rip)
1:
#endif
/*
@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
/* Set up %gs.
*
- * On SMP, %gs should point to the per-cpu area. For initial
- * boot, make %gs point to the init data section. For a
- * secondary CPU,initial_gs should be set to its pda address
- * before the CPU runs this code.
- *
- * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
- * change.
+ * The base of %gs always points to the bottom of the irqstack
+ * union. If the stack protector canary is enabled, it is
+ * located at %gs:40. Note that, on SMP, the boot cpu uses
+ * init data section till per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movq initial_gs(%rip),%rax
@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
#ifdef CONFIG_SMP
.quad __per_cpu_load
#else
- .quad PER_CPU_VAR(__pda)
+ .quad PER_CPU_VAR(irq_stack_union)
#endif
__FINITDATA
@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
.globl early_gdt_descr
early_gdt_descr:
.word GDT_ENTRIES*8-1
-#ifdef CONFIG_SMP
early_gdt_descr_base:
- .quad 0x0000000000000000
-#else
.quad per_cpu__gdt_page
-#endif
ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index aa89eabf09e..c422eebb0c5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -47,7 +47,6 @@
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
-#include <asm/pda.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
@@ -638,13 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
-#ifdef CONFIG_CC_STACKPROTECTOR
- /*
- * Build time only check to make sure the stack_canary is at
- * offset 40 in the pda; this is a gcc ABI requirement
- */
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
-#endif
/*
* Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efbafbbff58..90b8e154bb5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
static inline void setup_node_to_cpumask_map(void) { }
#endif
-/*
- * Define load_pda_offset() and per-cpu __pda for x86_64.
- * load_pda_offset() is responsible for loading the offset of pda into
- * %gs.
- *
- * On SMP, pda offset also duals as percpu base address and thus it
- * should be at the start of per-cpu area. To achieve this, it's
- * preallocated in vmlinux_64.lds.S directly instead of using
- * DEFINE_PER_CPU().
- */
-#ifdef CONFIG_X86_64
-void __cpuinit load_pda_offset(int cpu)
-{
- /* Memory clobbers used to order pda/percpu accesses */
- mb();
- wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
- mb();
-}
-#ifndef CONFIG_SMP
-DEFINE_PER_CPU(struct x8664_pda, __pda);
-#endif
-EXPORT_PER_CPU_SYMBOL(__pda);
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
-
#ifdef CONFIG_X86_64
/* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
- (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+ per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/*
- * CPU0 modified pda in the init data area, reload pda
- * offset for CPU0 and clear the area for others.
+ * Up to this point, CPU0 has been using .data.init
+ * area. Reload %gs offset for CPU0.
*/
if (cpu == 0)
- load_pda_offset(0);
- else
- memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
+ load_gs_base(cpu);
#endif
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284..ed5aee5f3fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
-#include <asm/pda.h>
#else
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index a09abb8fb97..c9740996430 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
* so that it can be accessed as a percpu variable.
*/
. = ALIGN(PAGE_SIZE);
- PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
- per_cpu____pda = __per_cpu_start;
+ PERCPU_VADDR(0, :percpu)
#else
PERCPU(PAGE_SIZE)
#endif
@@ -262,3 +261,8 @@ SECTIONS
*/
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
+#endif
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75b94139e1f..bef941f6145 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
#ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0;
- pda_init(0);
#endif
xen_smp_init();