diff options
Diffstat (limited to 'arch/x86/kernel')
52 files changed, 594 insertions, 225 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 488be461a38..c05872aa3ce 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); int acpi_noirq; /* skip ACPI IRQ initialization */ int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); -int acpi_ht __initdata = 1; /* enable HT */ int acpi_lapic; int acpi_ioapic; @@ -119,7 +118,7 @@ static unsigned int gsi_to_irq(unsigned int gsi) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; return irq; } @@ -130,10 +129,10 @@ static u32 irq_to_gsi(int irq) if (irq < NR_IRQS_LEGACY) gsi = isa_irq_to_gsi[irq]; - else if (irq <= gsi_end) + else if (irq < gsi_top) gsi = irq; - else if (irq <= (gsi_end + NR_IRQS_LEGACY)) - gsi = irq - gsi_end; + else if (irq < (gsi_top + NR_IRQS_LEGACY)) + gsi = irq - gsi_top; else gsi = 0xffffffff; @@ -1501,9 +1500,8 @@ void __init acpi_boot_table_init(void) /* * If acpi_disabled, bail out - * One exception: acpi=ht continues far enough to enumerate LAPICs */ - if (acpi_disabled && !acpi_ht) + if (acpi_disabled) return; /* @@ -1534,9 +1532,8 @@ int __init early_acpi_boot_init(void) { /* * If acpi_disabled, bail out - * One exception: acpi=ht continues far enough to enumerate LAPICs */ - if (acpi_disabled && !acpi_ht) + if (acpi_disabled) return 1; /* @@ -1554,9 +1551,8 @@ int __init acpi_boot_init(void) /* * If acpi_disabled, bail out - * One exception: acpi=ht continues far enough to enumerate LAPICs */ - if (acpi_disabled && !acpi_ht) + if (acpi_disabled) return 1; acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); @@ -1591,21 +1587,12 @@ static int __init parse_acpi(char *arg) /* acpi=force to over-ride black-list */ else if (strcmp(arg, "force") == 0) { acpi_force = 1; - acpi_ht = 1; acpi_disabled = 0; } /* acpi=strict disables out-of-spec workarounds */ else if (strcmp(arg, "strict") == 0) { acpi_strict = 1; } - /* Limit ACPI just to boot-time to enable HT */ - else if (strcmp(arg, "ht") == 0) { - if (!acpi_force) { - printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n"); - disable_acpi(); - } - acpi_ht = 1; - } /* acpi=rsdt use RSDT instead of XSDT */ else if (strcmp(arg, "rsdt") == 0) { acpi_rsdt_forced = 1; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080f..fb7a5f052e2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = cx->address; percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; } + + /* + * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, + * then we should skip checking BM_STS for this C-state. + * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) + cx->bm_sts_skip = 1; + return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f9961034e55..fcc3c61fdec 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -157,13 +157,16 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "sci_force_enable", 16) == 0) - acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 8ded418b059..13ab720573e 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -1,4 +1,4 @@ - .section .text.page_aligned + .section .text..page_aligned #include <linux/linkage.h> #include <asm/segment.h> #include <asm/page_types.h> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa5a1474cd1..0d20286d78c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1487,6 +1487,7 @@ static int __attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data, *alias_data; + int ret; dev_data = get_dev_data(dev); alias_data = get_dev_data(dev_data->alias); @@ -1498,13 +1499,14 @@ static int __attach_device(struct device *dev, spin_lock(&domain->lock); /* Some sanity checks */ + ret = -EBUSY; if (alias_data->domain != NULL && alias_data->domain != domain) - return -EBUSY; + goto out_unlock; if (dev_data->domain != NULL && dev_data->domain != domain) - return -EBUSY; + goto out_unlock; /* Do real assignment */ if (dev_data->alias != dev) { @@ -1520,10 +1522,14 @@ static int __attach_device(struct device *dev, atomic_inc(&dev_data->bind); + ret = 0; + +out_unlock: + /* ready */ spin_unlock(&domain->lock); - return 0; + return ret; } /* @@ -2324,10 +2330,6 @@ int __init amd_iommu_init_dma_ops(void) iommu_detected = 1; swiotlb = 0; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3bacb4d0844..3cc63e2b8dd 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -287,8 +287,12 @@ static u8 * __init iommu_map_mmio_space(u64 address) { u8 *ret; - if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { + pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", + address); + pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); return NULL; + } ret = ioremap_nocache(address, MMIO_REGION_LENGTH); if (ret != NULL) @@ -1314,7 +1318,7 @@ static int __init amd_iommu_init(void) ret = amd_iommu_init_dma_ops(); if (ret) - goto free; + goto free_disable; amd_iommu_init_api(); @@ -1332,9 +1336,10 @@ static int __init amd_iommu_init(void) out: return ret; -free: +free_disable: disable_iommus(); +free: amd_iommu_uninit_devices(); free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, @@ -1353,6 +1358,15 @@ free: free_unity_maps(); +#ifdef CONFIG_GART_IOMMU + /* + * We failed to initialize the AMD IOMMU - try fallback to GART + * if possible. + */ + gart_iommu_init(); + +#endif + goto out; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e5a4a1e0161..a96489ee6ca 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -51,6 +51,7 @@ #include <asm/smp.h> #include <asm/mce.h> #include <asm/kvm_para.h> +#include <asm/tsc.h> unsigned int num_processors; @@ -920,7 +921,7 @@ void disable_local_APIC(void) unsigned int value; /* APIC hasn't been mapped yet */ - if (!apic_phys) + if (!x2apic_mode && !apic_phys) return; clear_local_APIC(); @@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void) */ void __cpuinit setup_local_APIC(void) { - unsigned int value; - int i, j; + unsigned int value, queued; + int i, j, acked = 0; + unsigned long long tsc = 0, ntsc; + long long max_loops = cpu_khz; + + if (cpu_has_tsc) + rdtscll(tsc); if (disable_apic) { arch_disable_smp_support(); @@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void) * the interrupt. Hence a vector might get locked. It was noticed * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1<<j)) - ack_APIC_irq(); + do { + queued = 0; + for (i = APIC_ISR_NR - 1; i >= 0; i--) + queued |= apic_read(APIC_IRR + i*0x10); + + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<<j)) { + ack_APIC_irq(); + acked++; + } + } } - } + if (acked > 256) { + printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", + acked); + break; + } + if (cpu_has_tsc) { + rdtscll(ntsc); + max_loops = (cpu_khz << 10) - (ntsc - tsc); + } else + max_loops--; + } while (queued && max_loops > 0); + WARN_ON(max_loops <= 0); /* * Now that we are all set up, enable the APIC diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 33f3563a2a5..e41ed24ab26 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -89,8 +89,8 @@ int nr_ioapics; /* IO APIC gsi routing info */ struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; -/* The last gsi number used */ -u32 gsi_end; +/* The one past the highest gsi number used */ +u32 gsi_top; /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -1035,7 +1035,7 @@ static int pin_2_irq(int idx, int apic, int pin) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; } #ifdef CONFIG_X86_32 @@ -3853,7 +3853,7 @@ void __init probe_nr_irqs_gsi(void) { int nr; - nr = gsi_end + 1 + NR_IRQS_LEGACY; + nr = gsi_top + NR_IRQS_LEGACY; if (nr > nr_irqs_gsi) nr_irqs_gsi = nr; @@ -4294,8 +4294,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) */ nr_ioapic_registers[idx] = entries; - if (mp_gsi_routing[idx].gsi_end > gsi_end) - gsi_end = mp_gsi_routing[idx].gsi_end; + if (mp_gsi_routing[idx].gsi_end >= gsi_top) + gsi_top = mp_gsi_routing[idx].gsi_end + 1; printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc83a002786..68e4a6f2211 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1121,9 +1121,9 @@ void __cpuinit cpu_init(void) oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA - if (cpu != 0 && percpu_read(node_number) == 0 && - cpu_to_node(cpu) != NUMA_NO_NODE) - percpu_write(node_number, cpu_to_node(cpu)); + if (cpu != 0 && percpu_read(numa_node) == 0 && + early_cpu_to_node(cpu) != NUMA_NO_NODE) + set_numa_node(early_cpu_to_node(cpu)); #endif me = current; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1d3cddaa40e..246cd3afbb5 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -34,7 +34,6 @@ #include <linux/compiler.h> #include <linux/dmi.h> #include <linux/slab.h> -#include <trace/events/power.h> #include <linux/acpi.h> #include <linux/io.h> @@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); - switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; @@ -351,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; - for_each_cpu(i, cmd.mask) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } @@ -367,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - for_each_cpu(i, cmd.mask) { + for_each_cpu(i, policy->cpus) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 16e3483be9e..32974cf8423 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -169,12 +169,9 @@ static int gx_freq_mult[16] = { * Low Level chipset interface * ****************************************************************/ static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, - PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, - PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, - PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, + { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, + { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, { 0, }, }; @@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void) } /* detect which companion chip is used */ - while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { + for_each_pci_dev(gx_pci) { if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) return gx_pci; } diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f826..03162dac627 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -426,7 +426,7 @@ static int guess_fsb(int mult) } -static int __init longhaul_get_ranges(void) +static int __cpuinit longhaul_get_ranges(void) { unsigned int i, j, k = 0; unsigned int ratio; @@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void) } -static void __init longhaul_setup_voltagescaling(void) +static void __cpuinit longhaul_setup_voltagescaling(void) { union msr_longhaul longhaul; struct mV_pos minvid, maxvid, vid; @@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void) return 0; } -static int __init longhaul_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = &cpu_data(0); char *cpuname = NULL; diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f7..cbf48fbca88 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -56,7 +56,7 @@ union msr_longhaul { /* * VIA C3 Samuel 1 & Samuel 2 (stepping 0) */ -static const int __initdata samuel1_mults[16] = { +static const int __cpuinitdata samuel1_mults[16] = { -1, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = { -1, /* 1111 -> RESERVED */ }; -static const int __initdata samuel1_eblcr[16] = { +static const int __cpuinitdata samuel1_eblcr[16] = { 50, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = { /* * VIA C3 Samuel2 Stepping 1->15 */ -static const int __initdata samuel2_eblcr[16] = { +static const int __cpuinitdata samuel2_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { /* * VIA C3 Ezra */ -static const int __initdata ezra_mults[16] = { +static const int __cpuinitdata ezra_mults[16] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = { 120, /* 1111 -> 12.0x */ }; -static const int __initdata ezra_eblcr[16] = { +static const int __cpuinitdata ezra_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { /* * VIA C3 (Ezra-T) [C5M]. */ -static const int __initdata ezrat_mults[32] = { +static const int __cpuinitdata ezrat_mults[32] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = { -1, /* 1111 -> RESERVED (12.0x) */ }; -static const int __initdata ezrat_eblcr[32] = { +static const int __cpuinitdata ezrat_eblcr[32] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ -static const int __initdata nehemiah_mults[32] = { +static const int __cpuinitdata nehemiah_mults[32] = { 100, /* 0000 -> 10.0x */ -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = { -1, /* 1111 -> 12.0x */ }; -static const int __initdata nehemiah_eblcr[32] = { +static const int __cpuinitdata nehemiah_eblcr[32] = { 50, /* 0000 -> 5.0x */ 160, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -315,7 +315,7 @@ struct mV_pos { unsigned short pos; }; -static const struct mV_pos __initdata vrm85_mV[32] = { +static const struct mV_pos __cpuinitdata vrm85_mV[32] = { {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, @@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = { {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} }; -static const unsigned char __initdata mV_vrm85[32] = { +static const unsigned char __cpuinitdata mV_vrm85[32] = { 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 }; -static const struct mV_pos __initdata mobilevrm_mV[32] = { +static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, @@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = { {675, 3}, {650, 2}, {625, 1}, {600, 0} }; -static const unsigned char __initdata mV_mobilevrm[32] = { +static const unsigned char __cpuinitdata mV_mobilevrm[32] = { 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index e7b559d74c5..fc09f142d94 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu) * TMTA rules: * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) */ -static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, - unsigned int *high_freq) +static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, + unsigned int *high_freq) { u32 msr_lo, msr_hi; u32 save_lo, save_hi; @@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, } -static int __init longrun_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) { int result = 0; diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 7b8a8ba67b0..bd1cac747f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } } - if (c->x86 != 0xF) { - if (!cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Unknown CPU. " - "Please send an e-mail to " - "<cpufreq@vger.kernel.org>\n"); + if (c->x86 != 0xF) return 0; - } /* on P-4s, the TSC runs with constant frequency independent whether * throttling is active or not. */ diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ce7cde713e7..a36de5bbb62 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -368,22 +368,16 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) return -ENODEV; out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto out_free; - } + if (out_obj->type != ACPI_TYPE_BUFFER) + return -ENODEV; errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - ret = -ENODEV; - goto out_free; - } + if (errors) + return -ENODEV; supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) { - ret = -ENODEV; - goto out_free; - } + if (!(supported & 0x1)) + return -ENODEV; out_free: kfree(output.pointer); @@ -397,13 +391,17 @@ static int __init pcc_cpufreq_probe(void) struct pcc_memory_resource *mem_resource; struct pcc_register_resource *reg_resource; union acpi_object *out_obj, *member; - acpi_handle handle, osc_handle; + acpi_handle handle, osc_handle, pcch_handle; int ret = 0; status = acpi_get_handle(NULL, "\\_SB", &handle); if (ACPI_FAILURE(status)) return -ENODEV; + status = acpi_get_handle(handle, "PCCH", &pcch_handle); + if (ACPI_FAILURE(status)) + return -ENODEV; + status = acpi_get_handle(handle, "_OSC", &osc_handle); if (ACPI_SUCCESS(status)) { ret = pcc_cpufreq_do_osc(&osc_handle); @@ -543,13 +541,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!pcch_virt_addr) { result = -1; - goto pcch_null; + goto out; } result = pcc_get_offset(cpu); if (result) { dprintk("init: PCCP evaluation failed\n"); - goto free; + goto out; } policy->max = policy->cpuinfo.max_freq = @@ -558,14 +556,15 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) ioread32(&pcch_hdr->minimum_frequency) * 1000; policy->cur = pcc_get_freq(cpu); + if (!policy->cur) { + dprintk("init: Unable to get current CPU frequency\n"); + result = -EINVAL; + goto out; + } + dprintk("init: policy->max is %d, policy->min is %d\n", policy->max, policy->min); - - return 0; -free: - pcc_clear_mapping(); - free_percpu(pcc_cpu_info); -pcch_null: +out: return result; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e..4a45fd6e41b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy) * We will then get the same kind of behaviour already tested under * the "well-known" other OS. */ -static int __init fixup_sgtc(void) +static int __cpuinit fixup_sgtc(void) { unsigned int sgtc; unsigned int m; @@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu) } -static int __init acer_cpufreq_pst(const struct dmi_system_id *d) +static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) { printk(KERN_WARNING PFX "%s laptop with broken PST tables in BIOS detected.\n", @@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d) * A BIOS update is all that can save them. * Mention this, and disable cpufreq. */ -static struct dmi_system_id __initdata powernow_dmi_table[] = { +static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { { .callback = acer_cpufreq_pst, .ident = "Acer Aspire", @@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { { } }; -static int __init powernow_cpu_init(struct cpufreq_policy *policy) +static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) { union msr_fidvidstatus fidvidstatus; int result; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6f3dc8fbbfd..90cab2d4ac0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -806,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data) * www.amd.com */ printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); + printk(KERN_ERR PFX "Make sure that your BIOS is up to date" + " and Cool'N'Quiet support is enabled in BIOS setup\n"); return -ENODEV; } @@ -910,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, { int i; u32 hi = 0, lo = 0; - rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); - data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; + rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); + data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; @@ -1023,13 +1025,12 @@ static int get_transition_latency(struct powernow_k8_data *data) } if (max_latency == 0) { /* - * Fam 11h always returns 0 as transition latency. - * This is intended and means "very fast". While cpufreq core - * and governors currently can handle that gracefully, better - * set it to 1 to avoid problems in the future. - * For all others it's a BIOS bug. + * Fam 11h and later may return 0 as transition latency. This + * is intended and means "very fast". While cpufreq core and + * governors currently can handle that gracefully, better set it + * to 1 to avoid problems in the future. */ - if (boot_cpu_data.x86 != 0x11) + if (boot_cpu_data.x86 < 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; @@ -1497,8 +1498,8 @@ static struct cpufreq_driver cpufreq_amd64_driver = { * simply keep the boost-disable flag in sync with the current global * state. */ -static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, - void *hcpu) +static int cpb_notify(struct notifier_block *nb, unsigned long action, + void *hcpu) { unsigned cpu = (long)hcpu; u32 lo, hi; @@ -1528,7 +1529,7 @@ static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cpb_nb = { +static struct notifier_block cpb_nb = { .notifier_call = cpb_notify, }; diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11..bb34b03af25 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o + +obj-$(CONFIG_ACPI_APEI) += mce-apei.o diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 00000000000..745b54f9be8 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -0,0 +1,138 @@ +/* + * Bridge between MCE and APEI + * + * On some machine, corrected memory errors are reported via APEI + * generic hardware error source (GHES) instead of corrected Machine + * Check. These corrected memory errors can be reported to user space + * through /dev/mcelog via faking a corrected Machine Check, so that + * the error memory page can be offlined by /sbin/mcelog if the error + * count for one page is beyond the threshold. + * + * For fatal MCE, save MCE record into persistent storage via ERST, so + * that the MCE record can be logged after reboot via ERST. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/cper.h> +#include <acpi/apei.h> +#include <asm/mce.h> + +#include "mce-internal.h" + +void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) +{ + struct mce m; + + /* Only corrected MC is reported */ + if (!corrected) + return; + + mce_setup(&m); + m.bank = 1; + /* Fake a memory read corrected error with unknown channel */ + m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; + m.addr = mem_err->physical_addr; + mce_log(&m); + mce_notify_irq(); +} +EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); + +#define CPER_CREATOR_MCE \ + UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ + 0x64, 0x90, 0xb8, 0x9d) +#define CPER_SECTION_TYPE_MCE \ + UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ + 0x04, 0x4a, 0x38, 0xfc) + +/* + * CPER specification (in UEFI specification 2.3 appendix N) requires + * byte-packed. + */ +struct cper_mce_record { + struct cper_record_header hdr; + struct cper_section_descriptor sec_hdr; + struct mce mce; +} __packed; + +int apei_write_mce(struct mce *m) +{ + struct cper_mce_record rcd; + + memset(&rcd, 0, sizeof(rcd)); + memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + rcd.hdr.revision = CPER_RECORD_REV; + rcd.hdr.signature_end = CPER_SIG_END; + rcd.hdr.section_count = 1; + rcd.hdr.error_severity = CPER_SER_FATAL; + /* timestamp, platform_id, partition_id are all invalid */ + rcd.hdr.validation_bits = 0; + rcd.hdr.record_length = sizeof(rcd); + rcd.hdr.creator_id = CPER_CREATOR_MCE; + rcd.hdr.notification_type = CPER_NOTIFY_MCE; + rcd.hdr.record_id = cper_next_record_id(); + rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; + + rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; + rcd.sec_hdr.section_length = sizeof(rcd.mce); + rcd.sec_hdr.revision = CPER_SEC_REV; + /* fru_id and fru_text is invalid */ + rcd.sec_hdr.validation_bits = 0; + rcd.sec_hdr.flags = CPER_SEC_PRIMARY; + rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; + rcd.sec_hdr.section_severity = CPER_SER_FATAL; + + memcpy(&rcd.mce, m, sizeof(*m)); + + return erst_write(&rcd.hdr); +} + +ssize_t apei_read_mce(struct mce *m, u64 *record_id) +{ + struct cper_mce_record rcd; + ssize_t len; + + len = erst_read_next(&rcd.hdr, sizeof(rcd)); + if (len <= 0) + return len; + /* Can not skip other records in storage via ERST unless clear them */ + else if (len != sizeof(rcd) || + uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { + if (printk_ratelimit()) + pr_warning( + "MCE-APEI: Can not skip the unknown record in ERST"); + return -EIO; + } + + memcpy(m, &rcd.mce, sizeof(*m)); + *record_id = rcd.hdr.record_id; + + return sizeof(*m); +} + +/* Check whether there is record in ERST */ +int apei_check_mce(void) +{ + return erst_get_record_count(); +} + +int apei_clear_mce(u64 record_id) +{ + return erst_clear(record_id); +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab6..fefcc69ee8b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,3 +28,26 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_ACPI_APEI +int apei_write_mce(struct mce *m); +ssize_t apei_read_mce(struct mce *m, u64 *record_id); +int apei_check_mce(void); +int apei_clear_mce(u64 record_id); +#else +static inline int apei_write_mce(struct mce *m) +{ + return -EINVAL; +} +static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) +{ + return 0; +} +static inline int apei_check_mce(void) +{ + return 0; +} +static inline int apei_clear_mce(u64 record_id) +{ + return -EINVAL; +} +#endif diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7a355ddcc64..1970ef911c9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -36,6 +36,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/debugfs.h> +#include <linux/edac_mce.h> #include <asm/processor.h> #include <asm/hw_irq.h> @@ -169,6 +170,15 @@ void mce_log(struct mce *mce) entry = rcu_dereference_check_mce(mcelog.next); for (;;) { /* + * If edac_mce is enabled, it will check the error type + * and will process it, if it is a known error. + * Otherwise, the error will be sent through mcelog + * interface + */ + if (edac_mce_parse(mce)) + return; + + /* * When the buffer fills up discard new entries. * Assume that the earlier errors are the more * interesting ones: @@ -264,7 +274,7 @@ static void wait_for_panic(void) static void mce_panic(char *msg, struct mce *final, char *exp) { - int i; + int i, apei_err = 0; if (!fake_panic) { /* @@ -287,8 +297,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) struct mce *m = &mcelog.entry[i]; if (!(m->status & MCI_STATUS_VAL)) continue; - if (!(m->status & MCI_STATUS_UC)) + if (!(m->status & MCI_STATUS_UC)) { print_mce(m); + if (!apei_err) + apei_err = apei_write_mce(m); + } } /* Now print uncorrected but with the final one last */ for (i = 0; i < MCE_LOG_LEN; i++) { @@ -297,11 +310,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) continue; if (!(m->status & MCI_STATUS_UC)) continue; - if (!final || memcmp(m, final, sizeof(struct mce))) + if (!final || memcmp(m, final, sizeof(struct mce))) { print_mce(m); + if (!apei_err) + apei_err = apei_write_mce(m); + } } - if (final) + if (final) { print_mce(final); + if (!apei_err) + apei_err = apei_write_mce(final); + } if (cpu_missing) printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); print_mce_tail(); @@ -581,6 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) */ if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { mce_log(&m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); add_taint(TAINT_MACHINE_CHECK); } @@ -1493,6 +1513,43 @@ static void collect_tscs(void *data) rdtscll(cpu_tsc[smp_processor_id()]); } +static int mce_apei_read_done; + +/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ +static int __mce_read_apei(char __user **ubuf, size_t usize) +{ + int rc; + u64 record_id; + struct mce m; + + if (usize < sizeof(struct mce)) + return -EINVAL; + + rc = apei_read_mce(&m, &record_id); + /* Error or no more MCE record */ + if (rc <= 0) { + mce_apei_read_done = 1; + return rc; + } + rc = -EFAULT; + if (copy_to_user(*ubuf, &m, sizeof(struct mce))) + return rc; + /* + * In fact, we should have cleared the record after that has + * been flushed to the disk or sent to network in + * /sbin/mcelog, but we have no interface to support that now, + * so just clear it to avoid duplication. + */ + rc = apei_clear_mce(record_id); + if (rc) { + mce_apei_read_done = 1; + return rc; + } + *ubuf += sizeof(struct mce); + + return 0; +} + static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { @@ -1506,15 +1563,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, return -ENOMEM; mutex_lock(&mce_read_mutex); + + if (!mce_apei_read_done) { + err = __mce_read_apei(&buf, usize); + if (err || buf != ubuf) + goto out; + } + next = rcu_dereference_check_mce(mcelog.next); /* Only supports full reads right now */ - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { - mutex_unlock(&mce_read_mutex); - kfree(cpu_tsc); - - return -EINVAL; - } + err = -EINVAL; + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) + goto out; err = 0; prev = 0; @@ -1562,10 +1623,15 @@ timeout: memset(&mcelog.entry[i], 0, sizeof(struct mce)); } } + + if (err) + err = -EFAULT; + +out: mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); - return err ? -EFAULT : buf - ubuf; + return err ? err : buf - ubuf; } static unsigned int mce_poll(struct file *file, poll_table *wait) @@ -1573,6 +1639,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) poll_wait(file, &mce_wait, wait); if (rcu_dereference_check_mce(mcelog.next)) return POLLIN | POLLRDNORM; + if (!mce_apei_read_done && apei_check_mce()) + return POLLIN | POLLRDNORM; return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb2..e1a0a3bf971 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_unlock(&therm_cpu_lock); break; } - return err ? NOTIFY_BAD : NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fd4db0db370..5db5b7d65a1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -106,6 +106,7 @@ struct cpu_hw_events { int n_events; int n_added; + int n_txn; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ @@ -983,6 +984,7 @@ static int x86_pmu_enable(struct perf_event *event) out: cpuc->n_events = n; cpuc->n_added += n - n0; + cpuc->n_txn += n - n0; return 0; } @@ -1089,6 +1091,14 @@ static void x86_pmu_disable(struct perf_event *event) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; + /* + * If we're called during a txn, we don't need to do anything. + * The events never got scheduled and ->cancel_txn will truncate + * the event_list. + */ + if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + return; + x86_pmu_stop(event); for (i = 0; i < cpuc->n_events; i++) { @@ -1379,6 +1389,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); cpuc->group_flag |= PERF_EVENT_TXN_STARTED; + cpuc->n_txn = 0; } /* @@ -1391,6 +1402,11 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; + /* + * Truncate the collected events. + */ + cpuc->n_added -= cpuc->n_txn; + cpuc->n_events -= cpuc->n_txn; } /* @@ -1419,6 +1435,12 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); + /* + * Clear out the txn count so that ->cancel_txn() which gets + * run after ->commit_txn() doesn't undo things. + */ + cpuc->n_txn = 0; + return 0; } @@ -1717,7 +1739,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski */ regs->bp = rewind_frame_pointer(skip + 1); regs->cs = __KERNEL_CS; - local_save_flags(regs->flags); + /* + * We abuse bit 3 to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ + regs->flags = 0; } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 611df11ba15..c2897b7b4a3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] = [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, }; static u64 amd_pmu_event_map(int hw_event) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index fdbc652d3fe..214ac860ebe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] = INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 424fc8de68e..ae85d69644d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -465,15 +465,21 @@ out: return rc; } -static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) +static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) { - unsigned long dummy; + int overflow = 0; + u32 low, high; - rdmsrl(hwc->config_base + hwc->idx, dummy); - if (dummy & P4_CCCR_OVF) { + rdmsr(hwc->config_base + hwc->idx, low, high); + + /* we need to check high bit for unflagged overflows */ + if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { + overflow = 1; (void)checking_wrmsrl(hwc->config_base + hwc->idx, - ((u64)dummy) & ~P4_CCCR_OVF); + ((u64)low) & ~P4_CCCR_OVF); } + + return overflow; } static inline void p4_pmu_disable_event(struct perf_event *event) @@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) WARN_ON_ONCE(hwc->idx != idx); - /* - * FIXME: Redundant call, actually not needed - * but just to check if we're screwed - */ - p4_pmu_clear_cccr_ovf(hwc); + /* it might be unflagged overflow */ + handled = p4_pmu_clear_cccr_ovf(hwc); val = x86_perf_event_update(event); - if (val & (1ULL << (x86_pmu.cntval_bits - 1))) + if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) continue; - /* - * event overflow - */ - handled = 1; - data.period = event->hw.last_period; + /* event overflow for sure */ + data.period = event->hw.last_period; if (!x86_perf_event_set_period(event)) continue; @@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) /* * ESCR address hashing is tricky, ESCRs are not sequential - * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and + * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and * the metric between any ESCRs is laid in range [0xa0,0xe1] * * so we make ~70% filled hashtable @@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr) { unsigned int idx = P4_ESCR_MSR_IDX(addr); - if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || - !p4_escr_table[idx])) { + if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || + !p4_escr_table[idx] || + p4_escr_table[idx] != addr)) { WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); return -1; } @@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign { unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); struct hw_perf_event *hwc; struct p4_event_bind *bind; unsigned int i, thread, num; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 8b862d5900f..1b7b31ab7d8 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -170,7 +170,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, cpuid_device_destroy(cpu); break; } - return err ? NOTIFY_BAD : NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block __refdata cpuid_class_cpu_notifier = diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7bca3c6a02f..0d6fc71bedb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void) struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); + suspend_nvs_register(ei->addr, ei->size); } return 0; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf268..e5cc7e82e60 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,6 +18,7 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/hpet.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif +/* + * Force the read back of the CMP register in hpet_next_event() + * to work around the problem that the CMP register write seems to be + * delayed. See hpet_next_event() for details. + * + * We do this on all SMBUS incarnations for now until we have more + * information about the affected chipsets. + */ +static void __init ati_hpet_bugs(int num, int slot, int func) +{ +#ifdef CONFIG_HPET_TIMER + hpet_readback_cmp = 1; +#endif +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, + { PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0697ff13983..4db7c4d12ff 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -571,8 +571,8 @@ auditsys: * masked off. */ sysret_audit: - movq %rax,%rsi /* second arg, syscall return value */ - cmpq $0,%rax /* is it < 0? */ + movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ + cmpq $0,%rsi /* is it < 0? */ setl %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a198b7c87a1..ba390d73117 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -964,7 +964,7 @@ fs_initcall(hpet_late_init); void hpet_disable(void) { - if (is_hpet_capable()) { + if (is_hpet_capable() && hpet_virt_address) { unsigned int cfg = hpet_readl(HPET_CFG); if (hpet_legacy_int_enabled) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b3225..c4444bce846 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -107,7 +107,7 @@ void __cpuinit fpu_init(void) } #endif /* CONFIG_X86_64 */ -static void fpu_finit(struct fpu *fpu) +void fpu_finit(struct fpu *fpu) { #ifdef CONFIG_X86_32 if (!HAVE_HWFP) { @@ -132,6 +132,7 @@ static void fpu_finit(struct fpu *fpu) fp->fos = 0xffff0000u; } } +EXPORT_SYMBOL_GPL(fpu_finit); /* * The _current_ task is using the FPU for the first time diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 7c9f02c130f..cafa7c80ac9 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -276,16 +276,6 @@ static struct sys_device device_i8259A = { .cls = &i8259_sysdev_class, }; -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - static void mask_8259A(void) { unsigned long flags; @@ -407,3 +397,18 @@ struct legacy_pic default_legacy_pic = { }; struct legacy_pic *legacy_pic = &default_legacy_pic; + +static int __init i8259A_init_sysfs(void) +{ + int error; + + if (legacy_pic != &default_legacy_pic) + return 0; + + error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3a54dcb9cd0..43e9ccf4494 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -34,7 +34,7 @@ EXPORT_SYMBOL(init_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned + * so they are allowed to end up in the .data..cacheline_aligned * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 4f4af75b948..01ab17ae2ae 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -572,7 +572,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_STOP; } -#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP int kgdb_ll_trap(int cmd, const char *str, struct pt_regs *regs, long err, int trap, int sig) { @@ -590,7 +589,6 @@ int kgdb_ll_trap(int cmd, const char *str, return __kgdb_notify(&args, cmd); } -#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) @@ -625,6 +623,12 @@ int kgdb_arch_init(void) return register_die_notifier(&kgdb_notifier); } +static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, + struct perf_sample_data *data, struct pt_regs *regs) +{ + kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); +} + void kgdb_arch_late(void) { int i, cpu; @@ -655,6 +659,7 @@ void kgdb_arch_late(void) for_each_online_cpu(cpu) { pevent = per_cpu_ptr(breakinfo[i].pev, cpu); pevent[0]->hw.sample_period = 1; + pevent[0]->overflow_handler = kgdb_hw_overflow_handler; if (pevent[0]->destroy != NULL) { pevent[0]->destroy = NULL; release_bp_slot(*pevent); diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 345a4b1fe14..675879b65ce 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -640,8 +640,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* Skip cs, ip, orig_ax and gs. */ \ " subl $16, %esp\n" \ " pushl %fs\n" \ - " pushl %ds\n" \ " pushl %es\n" \ + " pushl %ds\n" \ " pushl %eax\n" \ " pushl %ebp\n" \ " pushl %edi\n" \ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5ae5d2426ed..d86dbf7e54b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -123,7 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->apicid, m->apicver, m->apicaddr); - mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); } static void print_MP_intsrc_info(struct mpc_intsrc *m) diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index e796448f0eb..5915e0b3330 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void) setup_boot_APIC_clock(); }; +/* MID systems don't have i8042 controller */ +static int mrst_i8042_detect(void) +{ + return 0; +} + /* * Moorestown specific x86_init function overrides and early setup * calls. @@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void) x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; x86_platform.calibrate_tsc = mrst_calibrate_tsc; + x86_platform.i8042_detect = mrst_i8042_detect; x86_init.pci.init = pci_mrst_init; x86_init.pci.fixup_irqs = x86_init_noop; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4d4468e9f47..7bf2dc4c8f7 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -230,7 +230,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, msr_device_destroy(cpu); break; } - return err ? NOTIFY_BAD : NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block __refdata msr_class_cpu_notifier = { diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fb99f7edb34..078d4ec1a9d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -103,11 +103,16 @@ int use_calgary __read_mostly = 0; #define PMR_SOFTSTOPFAULT 0x40000000 #define PMR_HARDSTOP 0x20000000 -#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ -#define MAX_NUM_CHASSIS 8 /* max number of chassis */ -/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */ -#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) -#define PHBS_PER_CALGARY 4 +/* + * The maximum PHB bus number. + * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384 + * x3950M2: 4 chassis, 48 PHBs per chassis = 192 + * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 + * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 + */ +#define MAX_PHB_BUS_NUM 256 + +#define PHBS_PER_CALGARY 4 /* register offsets in Calgary's internal register space */ static const unsigned long tar_offsets[] = { @@ -1051,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev) struct iommu_table *tbl; int ret; - BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 7d2829dde20..a5bc528d432 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -31,8 +31,6 @@ static struct dma_map_ops swiotlb_dma_ops = { .free_coherent = swiotlb_free_coherent, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .map_sg = swiotlb_map_sg_attrs, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32..cbcf013a0ec 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,6 +28,7 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +EXPORT_SYMBOL_GPL(task_xstate_cachep); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { @@ -371,7 +372,7 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -441,7 +442,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1); + trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -457,7 +458,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -478,7 +479,7 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0); + trace_power_start(POWER_CSTATE, 0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e72d3fc6547..939b9e98245 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,15 +498,10 @@ void force_hpet_resume(void) * See erratum #27 (Misinterpreted MSI Requests May Result in * Corrupted LPC DMA Data) in AMD Publication #46837, * "SB700 Family Product Errata", Rev. 1.0, March 2010. - * - * Also force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. */ static void force_disable_hpet_msi(struct pci_dev *unused) { hpet_msi_disable = 1; - hpet_readback_cmp = 1; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 8e1aac86b50..e3af342fe83 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,6 +228,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), }, }, + { /* Handle problems with rebooting on Dell T7400's */ + .callback = set_bios_reboot, + .ident = "Dell Precision T7400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"), + }, + }, { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, .ident = "HP Compaq Laptop", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e8029896309..b4ae4acbd03 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -676,6 +676,17 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), }, }, + /* + * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so + * match on the product name. + */ + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix BIOS", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + }, #endif {} }; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef6370b00e7..a60df9ae645 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -21,12 +21,6 @@ #include <asm/cpu.h> #include <asm/stackprotector.h> -#ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) -#else -# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) -#endif - DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); @@ -244,10 +238,19 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); + /* + * Ensure that the boot cpu numa_node is correct when the boot + * cpu is on a node that doesn't have memory installed. + * Also cpu_up() will call cpu_to_node() for APs when + * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set + * up later with c_init aka intel_init/amd_init. + * So set them all (boot cpu and all APs). + */ + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif #endif /* - * Up to this point, the boot CPU has been using .data.init + * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) @@ -263,14 +266,6 @@ void __init setup_per_cpu_areas(void) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) - /* - * make sure boot cpu node_number is right, when boot cpu is on the - * node that doesn't have mem installed - */ - per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); -#endif - /* Setup node to cpumask map */ setup_node_to_cpumask_map(); diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 7ded57896c0..cb22acf3ed0 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c @@ -93,7 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) pentry = (struct sfi_apic_table_entry *)sb->pentry; for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); + mp_register_ioapic(i, pentry->phys_addr, gsi_top); pentry++; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 763d815e27a..c4f33b2e77d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -686,7 +686,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) static void __cpuinit announce_cpu(int cpu, int apicid) { static int current_node = -1; - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { @@ -1215,9 +1215,17 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - if (setup_possible_cpus == -1) - possible = num_processors + disabled_cpus; - else + i = setup_max_cpus ?: 1; + if (setup_possible_cpus == -1) { + possible = num_processors; +#ifdef CONFIG_HOTPLUG_CPU + if (setup_max_cpus) + possible += disabled_cpus; +#else + if (possible > i) + possible = i; +#endif + } else possible = setup_possible_cpus; total_cpus = max_t(int, possible, num_processors + disabled_cpus); @@ -1230,11 +1238,23 @@ __init void prefill_possible_map(void) possible = nr_cpu_ids; } +#ifdef CONFIG_HOTPLUG_CPU + if (!setup_max_cpus) +#endif + if (possible > i) { + printk(KERN_WARNING + "%d Processors exceeds max_cpus limit of %u\n", + possible, setup_max_cpus); + possible = i; + } + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); for (i = 0; i < possible; i++) set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); nr_cpu_ids = possible; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 142d70c74b0..725ef4d17cd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -526,6 +526,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + int user_icebp = 0; unsigned long dr6; int si_code; @@ -534,6 +535,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + if (!dr6 && user_mode(regs)) + user_icebp = 1; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; @@ -575,7 +584,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2cc249718c4..d0bb52296fa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -97,7 +97,7 @@ SECTIONS HEAD_TEXT #ifdef CONFIG_X86_32 . = ALIGN(PAGE_SIZE); - *(.text.page_aligned) + *(.text..page_aligned) #endif . = ALIGN(8); _stext = .; @@ -305,7 +305,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; - *(.bss.page_aligned) + *(.bss..page_aligned) *(.bss) . = ALIGN(4); __bss_stop = .; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 61a1e8c7e19..cd6da6bf3ec 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,6 +5,7 @@ */ #include <linux/init.h> #include <linux/ioport.h> +#include <linux/module.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> @@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { }; static void default_nmi_init(void) { }; +static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, @@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = { .set_wallclock = mach_set_rtc_mmss, .iommu_shutdown = iommu_shutdown_noop, .is_untracked_pat_range = is_ISA_range, - .nmi_init = default_nmi_init + .nmi_init = default_nmi_init, + .i8042_detect = default_i8042_detect }; + +EXPORT_SYMBOL_GPL(x86_platform); |