diff options
Diffstat (limited to 'arch/x86/kernel')
47 files changed, 397 insertions, 218 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d7129..bfd10fd211c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +{ + if (!strcmp(mcfg->header.oem_id, "SGI")) + acpi_mcfg_64bit_base_addr = TRUE; + + return 0; +} + int __init acpi_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; @@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + + acpi_mcfg_oem_check(mcfg); + for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && + !acpi_mcfg_64bit_base_addr) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); kfree(pci_mmcfg_config); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fa2161d5003..426e5d91b63 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; /* address in low memory of the wakeup routine. */ static unsigned long acpi_realmode; -#ifdef CONFIG_64BIT +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) static char temp_stack[10240]; #endif @@ -86,7 +86,7 @@ int acpi_save_state_mem(void) #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4(); + header->pmode_cr4 = read_cr4_safe(); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 22d7d050905..69b4d060b21 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -65,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u8 *target; tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = (iommu->cmd_buf + tail); + target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); @@ -101,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret; + int ret, ready = 0; + unsigned status = 0; struct iommu_cmd cmd; - volatile u64 ready = 0; - unsigned long ready_phys = virt_to_phys(&ready); unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = upper_32_bits(ready_phys); - cmd.data[2] = 1; /* value written to 'ready' */ + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); iommu->need_sync = 0; @@ -122,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu) while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; - cpu_relax(); + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; } + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); @@ -161,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, address &= PAGE_MASK; CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; - cmd.data[2] = LOW_U32(address); + cmd.data[2] = lower_32_bits(address); cmd.data[3] = upper_32_bits(address); if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index d9a9da597e7..a69cc0f5204 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table) } /* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table(void) +{ + u16 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); + } +} + +/* * This function finally enables all IOMMUs found in the system after * they have been initialized */ @@ -931,6 +946,9 @@ int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; + /* init the device table */ + init_device_table(); + /* * let all alias entries point to itself */ @@ -954,15 +972,15 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; - ret = amd_iommu_init_dma_ops(); + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; - ret = sysdev_class_register(&amd_iommu_sysdev_class); + ret = sysdev_register(&device_amd_iommu); if (ret) goto free; - ret = sysdev_register(&device_amd_iommu); + ret = amd_iommu_init_dma_ops(); if (ret) goto free; diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..f88bd0d982b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1454,8 +1454,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; @@ -1482,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); @@ -1720,15 +1712,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init apic_set_verbosity(char *str) +static int __init apic_set_verbosity(char *arg) { - if (strcmp("debug", str) == 0) + if (!arg) + return -EINVAL; + + if (strcmp(arg, "debug") == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) + else if (strcmp(arg, "verbose") == 0) apic_verbosity = APIC_VERBOSE; - return 1; + + return 0; } -__setup("apic=", apic_set_verbosity); +early_param("apic", apic_set_verbosity); static int __init lapic_insert_resource(void) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..446c062e831 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -90,7 +90,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ @@ -1062,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a607..a6ef672adbb 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) return; - break; + + pat_disable("PAT WC disabled due to known CPU erratum."); + return; + case X86_VENDOR_AMD: case X86_VENDOR_CENTAUR: case X86_VENDOR_TRANSMETA: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9b58a806e8..c8e315f1aa8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; */ static void __init check_fpu(void) { + s32 fdiv_bug; + if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); @@ -74,8 +76,10 @@ static void __init check_fpu(void) "fistpl %0\n\t" "fwait\n\t" "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) + : "=m" (*&fdiv_bug) : "m" (*&x), "m" (*&y)); + + boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) printk("Hmm, FPU with FDIV bug.\n"); } diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596..efae3b22a0f 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -235,9 +235,9 @@ config X86_LONGHAUL If in doubt, say N. config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + tristate "VIA C7 Enhanced PowerSaver" select CPU_FREQ_TABLE - depends on X86_32 && EXPERIMENTAL + depends on X86_32 help This adds the CPUFreq driver for VIA C7 processors. diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f56..e4a4bf870e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -44,7 +44,7 @@ struct s_elan_multiplier { * It is important that the frequencies * are listed in ascending order here! */ -struct s_elan_multiplier elan_multiplier[] = { +static struct s_elan_multiplier elan_multiplier[] = { {1000, 0x02, 0x18}, {2000, 0x02, 0x10}, {4000, 0x02, 0x08}, diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c45ca6d4dce..84bb395038d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) return 800 + (fid * 100); } - /* Return a frequency in KHz, given an input fid */ static u32 find_khz_freq_from_fid(u32 fid) { @@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p return data[pstate].frequency; } - /* Return the vco fid for an input fid * * Each "low" fid has corresponding "high" fid, and you can get to "low" fids @@ -166,7 +164,6 @@ static void fidvid_msr_init(void) wrmsr(MSR_FIDVID_CTL, lo, hi); } - /* write the new fid value along with the other control fields to the msr */ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06..e710a21bb6e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -134,23 +134,6 @@ static void __cpuinit set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - /* * Configure later MediaGX and/or Geode processor. */ @@ -174,7 +157,6 @@ static void __cpuinit geode_configure(void) set_cx86_memwb(); set_cx86_reorder(); - set_cx86_inc(); local_irq_restore(flags); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 65a339678ec..726a5fcdf34 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = { }; DEFINE_PER_CPU(struct sys_device, device_mce); +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 88736cadbaa..5eb390a4b2e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: + kobject_del(b->kobj); kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; @@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, + unsigned int cpu) { - /* cpu was unsigned int to begin with */ - unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; + return; switch (action) { case CPU_ONLINE: @@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, default: break; } - out: - return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier __cpuinitdata = { - .notifier_call = threshold_cpu_callback, -}; - static __init int threshold_init_device(void) { unsigned lcpu = 0; @@ -684,7 +676,7 @@ static __init int threshold_init_device(void) if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + threshold_cpu_callback = amd_64_threshold_cpu_callback; return 0; } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eac..cb7d3b6a80e 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; + unsigned int tmp, hi; rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -392,8 +393,23 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | tmp; + /* Expand tmp with high bits to all 1s*/ + hi = fls(tmp); + if (hi > 0) { + tmp |= ~((1<<(hi - 1)) - 1); + + if (tmp != mask_lo) { + static int once = 1; + + if (once) { + printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + once = 0; + } + mask_lo = tmp; + } + } /* This works correctly if size is a power of two, i.e. a contiguous range. */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8fa..b117d7f8a56 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - if (!kvm_para_available()) { - printk(KERN_WARNING + WARN(!kvm_para_available(), KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); - WARN_ON(1); - } return 0; } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index de7439f82b9..05cc22dbd4f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P4_IQ_PERFCTR1; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); } evntsel = P4_ESCR_EVENT_SELECT(0x3F) diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13..5cab48ee61a 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = @@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2cfcbded888..bfa837cb16b 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -222,7 +222,7 @@ static __init void map_low_mmrs(void) enum map_type {map_wb, map_uc}; -static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) { unsigned long bytes, paddr; @@ -293,7 +293,9 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static __init void uv_system_init(void) +static bool uv_system_inited; + +void __init uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; union uvh_node_id_u node_id; @@ -383,6 +385,7 @@ static __init void uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); + uv_system_inited = true; } /* @@ -391,8 +394,7 @@ static __init void uv_system_init(void) */ void __cpuinit uv_cpu_init(void) { - if (!uv_node_to_blade) - uv_system_init(); + BUG_ON(!uv_system_inited); uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1b318e903bf..9bfc4d72fb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334..59fd3b6b130 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -359,6 +359,7 @@ static int hpet_clocksource_register(void) int __init hpet_enable(void) { unsigned long id; + int i; if (!is_hpet_capable()) return 0; @@ -369,6 +370,29 @@ int __init hpet_enable(void) * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); + + /* + * AMD SB700 based systems with spread spectrum enabled use a + * SMM based HPET emulation to provide proper frequency + * setting. The SMM code is initialized with the first HPET + * register access and takes some time to complete. During + * this time the config register reads 0xffffffff. We check + * for max. 1000 loops whether the config register reads a non + * 0xffffffff value to make sure that HPET is up and running + * before we go further. A counting loop is safe, as the HPET + * access takes thousands of CPU cycles. On non SB700 based + * machines this check is only done once and has no side + * effects. + */ + for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { + if (i == 1000) { + printk(KERN_WARNING + "HPET config register value = 0xFFFFFFFF. " + "Disabling HPET\n"); + goto out_nohpet; + } + } + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c..09cddb57bec 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -57,7 +57,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq) return vector; } -void setup_vector_irq(int cpu) -{ -} - static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d170..61a83b70c18 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -101,7 +101,7 @@ int timer_through_8259 __initdata; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + static int __assign_irq_vector(int irq, cpumask_t mask) { /* @@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -static void __setup_vector_irq(int cpu) +void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu) } } -void setup_vector_irq(int cpu) -{ - spin_lock(&vector_lock); - __setup_vector_irq(smp_processor_id()); - spin_unlock(&vector_lock); -} - - static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 9fe478d9840..0732adba05c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/numa.h> #include <linux/ftrace.h> +#include <linux/suspend.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -78,7 +79,7 @@ static void load_segments(void) /* * A architecture hook called to validate the * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE * have been allocated, but the segments have yet * been copied into the kernel. * @@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + int save_ftrace_enabled; asmlinkage unsigned long (*relocate_kernel_ptr)(unsigned long indirection_page, unsigned long control_page, @@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image) unsigned int has_pae, unsigned int preserve_context); - tracer_disable(); +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + save_processor_state(); +#endif + + save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); @@ -138,7 +145,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE/2); + memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); @@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image) (unsigned long)page_list, image->start, cpu_has_pae, image->preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + restore_processor_state(); +#endif + + __ftrace_enabled_restore(save_ftrace_enabled); } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f48..3b599518c32 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -33,6 +33,8 @@ #include <linux/module.h> #include <asm/geode.h> +#define MFGPT_DEFAULT_IRQ 7 + static struct mfgpt_timer_t { unsigned int avail:1; } mfgpt_timers[MFGPT_MAX_TIMERS]; @@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) } EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); -int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) +int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) { - u32 val, dummy; - int offset; + u32 zsel, lpc, dummy; + int shift; if (timer < 0 || timer >= MFGPT_MAX_TIMERS) return -EIO; - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) return -EIO; - rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = MFGPT_DEFAULT_IRQ; - offset = (timer % 4) * 4; - - val &= ~((0xF << offset) | (0xF << (offset + 16))); + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + /* All chosen and checked - go for it */ + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; if (enable) { - val |= (irq & 0x0F) << (offset); - val |= (irq & 0x0F) << (offset + 16); + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); } - wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); return 0; } @@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; static u16 mfgpt_event_clock; -static int irq = 7; +static int irq; static int __init mfgpt_setup(char *str) { get_option(&str, &irq); @@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) mfgpt_event_clock = timer; /* Set up the IRQ on the MFGPT side */ - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); return -EIO; } @@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) &mfgpt_clockevent); printk(KERN_INFO - "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); + "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", + timer, irq); clockevents_register_device(&mfgpt_clockevent); return 0; err: - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); printk(KERN_ERR "mfgpt-timer: Unable to set up the MFGPT clock source\n"); return -EIO; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b36..efc2f361fe8 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed..b3fb430725c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; @@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) if (x86_quirks->mpc_oem_bus_info) x86_quirks->mpc_oem_bus_info(m, str); else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, @@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, @@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) @@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", + bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9fd80955244..e4393808688 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file) ret = -EIO; /* MSR not supported */ out: unlock_kernel(); - return 0; + return ret; } /* diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac6d51222e7..abb78a2cc4a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) } #endif +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); +} + int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; @@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void) for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); } endflag = 1; if (!atomic_read(&nmi_active)) { diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20..eecc8c18f01 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) } -void __init numaq_tsc_disable(void) +void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) return; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..300da17e61c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b67a4b1d4ea..dcdac6c826e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { - printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " + WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); - WARN_ON(1); return; } @@ -1269,13 +1268,15 @@ static inline int __init determine_tce_table_size(u64 ram) static int __init build_detail_arrays(void) { unsigned long ptr; - int i, scal_detail_size, rio_detail_size; + unsigned numnodes, i; + int scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + numnodes = rio_table_hdr->num_scal_dev; + if (numnodes > MAX_NUMNODES){ printk(KERN_WARNING "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", - MAX_NUMNODES, rio_table_hdr->num_scal_dev); + MAX_NUMNODES, numnodes); return -ENODEV; } @@ -1296,8 +1297,7 @@ static int __init build_detail_arrays(void) } ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; - i++, ptr += scal_detail_size) + for (i = 0; i < numnodes; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; for (i = 0; i < rio_table_hdr->num_rio_dev; @@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) * Function for kdump case. Get the tce tables from first kernel * by reading the contents of the base adress register of calgary iommu */ -static void get_tce_space_from_tar() +static void __init get_tce_space_from_tar(void) { int bus; void __iomem *target; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8dbffb846de..87d4d6964ec 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 53bc653ed5c..3b7a1ddcc0b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -95,7 +95,6 @@ static inline void play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; @@ -104,8 +103,8 @@ static inline void play_dead(void) * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7d9a1..71553b664e2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 703310a9902..6f50664b2ba 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -20,10 +20,11 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAE_PGD_ATTR (_PAGE_PRESENT) -/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are - * used to save some data for jumping back +/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE + * ~ control_page + PAGE_SIZE are used as data storage and stack for + * jumping back */ -#define DATA(offset) (PAGE_SIZE/2+(offset)) +#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) /* Minimal CPU state */ #define ESP DATA(0x0) @@ -376,3 +377,6 @@ swap_pages: popl %ebx popl %ebp ret + + .globl kexec_control_code_size +.set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b520dae02bf..362d4e7f2d3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void) * @size: Size of the crashkernel memory to reserve. * Returns the base address on success, and -1ULL on failure. */ -unsigned long long find_and_reserve_crashkernel(unsigned long long size) +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) { const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long start = 0LL; @@ -670,6 +670,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + /* after early param, so could get panic from serial */ reserve_early_setup_data(); @@ -788,10 +796,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn); -#ifdef CONFIG_X86_64 - dma32_reserve_bootmem(); -#endif - #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -806,20 +810,21 @@ void __init setup_arch(char **cmdline_p) #endif reserve_crashkernel(); +#ifdef CONFIG_X86_64 + /* + * dma32_reserve_bootmem() allocates bootmem which may conflict + * with the crashkernel command line, so do that after + * reserve_crashkernel() + */ + dma32_reserve_bootmem(); +#endif + reserve_ibft_region(); #ifdef CONFIG_KVM_CLOCK kvmclock_init(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be after max_low_pfn is determined, and before kernel - * pagetables are setup. - */ - vmi_init(); -#endif - paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); paravirt_pagetable_setup_done(swapper_pg_dir); @@ -856,12 +861,6 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) - if (def_to_bigsmp) - printk(KERN_WARNING "More than 8 CPUs detected and " - "CONFIG_X86_PC cannot handle it.\nUse " - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); -#endif kvm_guest_init(); e820_reserve_resources(); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd65..ca316b5b742 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; } /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4..7985c5b3f91 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused) * for which cpus receive the IPI. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). + * + * We need to hold vector_lock so there the set of online cpus + * does not change while we are assigning vectors to cpus. Holding + * this lock ensures we don't half assign or remove an irq from a cpu. */ ipi_call_lock_irq(); -#ifdef CONFIG_X86_IO_APIC - setup_vector_irq(smp_processor_id()); -#endif + lock_vector_lock(); + __setup_vector_irq(smp_processor_id()); cpu_set(smp_processor_id(), cpu_online_map); + unlock_vector_lock(); ipi_call_unlock_irq(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -752,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work) } #ifdef CONFIG_X86_64 + +/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ +static void __ref free_bootmem_pda(struct x8664_pda *oldpda) +{ + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); +} + /* * Allocate node local memory for the AP pda. * @@ -780,8 +792,7 @@ int __cpuinit get_local_pda(int cpu) if (oldpda) { memcpy(newpda, oldpda, size); - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, size); + free_bootmem_pda(oldpda); } newpda->in_bootmem = 0; @@ -1044,6 +1055,34 @@ static __init void disable_smp(void) static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); + +#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) + if (def_to_bigsmp && nr_cpu_ids > 8) { + unsigned int cpu; + unsigned nr; + + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + + nr = 0; + for_each_present_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_present_map); + nr++; + } + + nr = 0; + for_each_possible_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_possible_map); + nr++; + } + + nr_cpu_ids = 8; + } +#endif + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk(KERN_WARNING "weird, boot CPU (#%d) not listed" "by the BIOS.\n", hard_smp_processor_id()); @@ -1182,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); + + if (is_uv_system()) + uv_system_init(); out: preempt_enable(); } @@ -1336,7 +1378,9 @@ int __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ + lock_vector_lock(); remove_cpu_from_maps(cpu); + unlock_vector_lock(); fixup_irqs(cpu_online_map); return 0; } @@ -1370,17 +1414,3 @@ void __cpu_die(unsigned int cpu) BUG(); } #endif - -/* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ -static int __init parse_maxcpus(char *arg) -{ - extern unsigned int maxcpus; - - if (arg) - maxcpus = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca..397e309839d 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -8,18 +8,21 @@ DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ +/* + * Initialize the CPU's GDT. This is either the boot CPU doing itself + * (still using the master per-cpu area), or a CPU doing it for a + * secondary which will soon come up. + */ __cpuinit void init_gdt(int cpu) { - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct gdt; - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], - __per_cpu_offset[cpu], 0xFFFFF, + pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; - gdt[GDT_ENTRY_PERCPU].s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab..8b8c0d6640f 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -17,6 +17,7 @@ #include <asm/genapic.h> #include <asm/idle.h> #include <asm/tsc.h> +#include <asm/irq_vectors.h> #include <mach_apic.h> @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) uv_init_blade(blade, node, cur_cpu); cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); return 0; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..513caaca711 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ - restore_fpu_checking(&me->thread.xstate->fxsave); + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + stts(); + force_sig(SIGSEGV, me); + return; + } task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..8e786b0d665 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; @@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); return 0; } @@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89..9ffb01c31c4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) __raw_spin_unlock(&sync_lock); } } - if (!(now-start)) { - printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", + WARN(!(now-start), + "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); - WARN_ON(1); - } } /* diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c4..594ef47f0a6 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early) return 1; } -extern unsigned int __cpuinitdata maxcpus; - /* * The Visual Workstation is Intel MP compliant in the hardware * sense, but it doesn't have a BIOS(-configuration table). @@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve) ncpus = CO_CPU_MAX; } - if (ncpus > maxcpus) - ncpus = maxcpus; + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922..6ca515d6db5 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -37,6 +37,7 @@ #include <asm/timer.h> #include <asm/vmi_time.h> #include <asm/kmap_types.h> +#include <asm/setup.h> /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -683,7 +684,7 @@ void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ if (vmi_ops.set_linear_mapping) - vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); + vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); } /* diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d..af5bdad8460 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -209,3 +209,11 @@ SECTIONS DWARF_DEBUG } + +#ifdef CONFIG_KEXEC +/* Link time checks */ +#include <asm/kexec.h> + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif |