summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c84
1 files changed, 47 insertions, 37 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4bb8b77cd65..b0f84e5778a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,6 +33,11 @@
* Dave Jones : Report invalid combinations of Athlon CPUs.
* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
+
+/* SMP boot always wants to use real time delay to allow sufficient time for
+ * the APs to come online */
+#define USE_REAL_TIME_DELAY
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -52,6 +57,8 @@
#include <asm/desc.h>
#include <asm/arch_hooks.h>
#include <asm/nmi.h>
+#include <asm/pda.h>
+#include <asm/genapic.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
@@ -62,7 +69,7 @@ static int __devinitdata smp_b_stepping;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
-#ifdef CONFIG_X86_HT
+#ifdef CONFIG_SMP
EXPORT_SYMBOL(smp_num_siblings);
#endif
@@ -536,11 +543,11 @@ set_cpu_sibling_map(int cpu)
static void __devinit start_secondary(void *unused)
{
/*
- * Dont put anything before smp_callin(), SMP
+ * Don't put *anything* before secondary_cpu_init(), SMP
* booting is too fragile that we want to limit the
* things done here to the most necessary things.
*/
- cpu_init();
+ secondary_cpu_init();
preempt_disable();
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -599,13 +606,16 @@ void __devinit initialize_secondary(void)
"movl %0,%%esp\n\t"
"jmp *%1"
:
- :"r" (current->thread.esp),"r" (current->thread.eip));
+ :"m" (current->thread.esp),"m" (current->thread.eip));
}
+/* Static state in head.S used to set up a CPU */
extern struct {
void * esp;
unsigned short ss;
} stack_start;
+extern struct i386_pda *start_pda;
+extern struct Xgt_desc_struct cpu_gdt_descr;
#ifdef CONFIG_NUMA
@@ -936,9 +946,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- ++cpucount;
- alternatives_smp_switch(1);
-
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
@@ -946,15 +953,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
idle = alloc_idle_task(cpu);
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
+
+ /* Pre-allocate and initialize the CPU's GDT and PDA so it
+ doesn't have to do any memory allocation during the
+ delicate CPU-bringup phase. */
+ if (!init_gdt(cpu, idle)) {
+ printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
+ return -1; /* ? */
+ }
+
idle->thread.eip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
+ ++cpucount;
+ alternatives_smp_switch(1);
+
/* So we see what's up */
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
/* Stack for startup_32 can be just as for start_secondary onwards */
stack_start.esp = (void *) idle->thread.esp;
+ start_pda = cpu_pda(cpu);
+ cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu);
+
irq_ctx_init(cpu);
x86_cpu_to_apicid[cpu] = apicid;
@@ -1049,13 +1071,15 @@ void cpu_exit_clear(void)
struct warm_boot_cpu_info {
struct completion *complete;
+ struct work_struct task;
int apicid;
int cpu;
};
-static void __cpuinit do_warm_boot_cpu(void *p)
+static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
{
- struct warm_boot_cpu_info *info = p;
+ struct warm_boot_cpu_info *info =
+ container_of(work, struct warm_boot_cpu_info, task);
do_boot_cpu(info->apicid, info->cpu);
complete(info->complete);
}
@@ -1064,7 +1088,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
{
DECLARE_COMPLETION_ONSTACK(done);
struct warm_boot_cpu_info info;
- struct work_struct task;
int apicid, ret;
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
@@ -1089,15 +1112,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
info.complete = &done;
info.apicid = apicid;
info.cpu = cpu;
- INIT_WORK(&task, do_warm_boot_cpu, &info);
+ INIT_WORK(&info.task, do_warm_boot_cpu);
tsc_sync_disabled = 1;
/* init low mem mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- KERNEL_PGD_PTRS);
+ min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
flush_tlb_all();
- schedule_work(&task);
+ schedule_work(&info.task);
wait_for_completion(&done);
tsc_sync_disabled = 0;
@@ -1108,34 +1131,15 @@ exit:
}
#endif
-static void smp_tune_scheduling (void)
+static void smp_tune_scheduling(void)
{
unsigned long cachesize; /* kB */
- unsigned long bandwidth = 350; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- return;
- } else {
+ if (cpu_khz) {
cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
- max_cache_size = cachesize * 1024;
+
+ if (cachesize > 0)
+ max_cache_size = cachesize * 1024;
}
}
@@ -1461,6 +1465,12 @@ int __devinit __cpu_up(unsigned int cpu)
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
+
+#ifdef CONFIG_X86_GENERICARCH
+ if (num_online_cpus() > 8 && genapic == &apic_default)
+ panic("Default flat APIC routing can't be used with > 8 cpus\n");
+#endif
+
return 0;
}