summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c27
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c20
-rw-r--r--arch/x86/kernel/apic/apic.c41
-rw-r--r--arch/x86/kernel/apic/io_apic.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c138
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c89
-rw-r--r--arch/x86/kernel/cpu/perf_event.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/init_task.c2
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/mrst.c7
-rw-r--r--arch/x86/kernel/pci-calgary_64.c17
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c11
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/sfi.c2
-rw-r--r--arch/x86/kernel/smpboot.c28
-rw-r--r--arch/x86/kernel/traps.c11
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/x86_init.c7
28 files changed, 425 insertions, 89 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 488be461a38..c05872aa3ce 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled);
int acpi_noirq; /* skip ACPI IRQ initialization */
int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
-int acpi_ht __initdata = 1; /* enable HT */
int acpi_lapic;
int acpi_ioapic;
@@ -119,7 +118,7 @@ static unsigned int gsi_to_irq(unsigned int gsi)
if (gsi >= NR_IRQS_LEGACY)
irq = gsi;
else
- irq = gsi_end + 1 + gsi;
+ irq = gsi_top + gsi;
return irq;
}
@@ -130,10 +129,10 @@ static u32 irq_to_gsi(int irq)
if (irq < NR_IRQS_LEGACY)
gsi = isa_irq_to_gsi[irq];
- else if (irq <= gsi_end)
+ else if (irq < gsi_top)
gsi = irq;
- else if (irq <= (gsi_end + NR_IRQS_LEGACY))
- gsi = irq - gsi_end;
+ else if (irq < (gsi_top + NR_IRQS_LEGACY))
+ gsi = irq - gsi_top;
else
gsi = 0xffffffff;
@@ -1501,9 +1500,8 @@ void __init acpi_boot_table_init(void)
/*
* If acpi_disabled, bail out
- * One exception: acpi=ht continues far enough to enumerate LAPICs
*/
- if (acpi_disabled && !acpi_ht)
+ if (acpi_disabled)
return;
/*
@@ -1534,9 +1532,8 @@ int __init early_acpi_boot_init(void)
{
/*
* If acpi_disabled, bail out
- * One exception: acpi=ht continues far enough to enumerate LAPICs
*/
- if (acpi_disabled && !acpi_ht)
+ if (acpi_disabled)
return 1;
/*
@@ -1554,9 +1551,8 @@ int __init acpi_boot_init(void)
/*
* If acpi_disabled, bail out
- * One exception: acpi=ht continues far enough to enumerate LAPICs
*/
- if (acpi_disabled && !acpi_ht)
+ if (acpi_disabled)
return 1;
acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1591,21 +1587,12 @@ static int __init parse_acpi(char *arg)
/* acpi=force to over-ride black-list */
else if (strcmp(arg, "force") == 0) {
acpi_force = 1;
- acpi_ht = 1;
acpi_disabled = 0;
}
/* acpi=strict disables out-of-spec workarounds */
else if (strcmp(arg, "strict") == 0) {
acpi_strict = 1;
}
- /* Limit ACPI just to boot-time to enable HT */
- else if (strcmp(arg, "ht") == 0) {
- if (!acpi_force) {
- printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n");
- disable_acpi();
- }
- acpi_ht = 1;
- }
/* acpi=rsdt use RSDT instead of XSDT */
else if (strcmp(arg, "rsdt") == 0) {
acpi_rsdt_forced = 1;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index f9961034e55..82e508677b9 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,8 +162,6 @@ static int __init acpi_sleep_setup(char *str)
#endif
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
- if (strncmp(str, "sci_force_enable", 16) == 0)
- acpi_set_sci_en_on_resume();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 8ded418b059..13ab720573e 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,4 +1,4 @@
- .section .text.page_aligned
+ .section .text..page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa5a1474cd1..0d20286d78c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1487,6 +1487,7 @@ static int __attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data, *alias_data;
+ int ret;
dev_data = get_dev_data(dev);
alias_data = get_dev_data(dev_data->alias);
@@ -1498,13 +1499,14 @@ static int __attach_device(struct device *dev,
spin_lock(&domain->lock);
/* Some sanity checks */
+ ret = -EBUSY;
if (alias_data->domain != NULL &&
alias_data->domain != domain)
- return -EBUSY;
+ goto out_unlock;
if (dev_data->domain != NULL &&
dev_data->domain != domain)
- return -EBUSY;
+ goto out_unlock;
/* Do real assignment */
if (dev_data->alias != dev) {
@@ -1520,10 +1522,14 @@ static int __attach_device(struct device *dev,
atomic_inc(&dev_data->bind);
+ ret = 0;
+
+out_unlock:
+
/* ready */
spin_unlock(&domain->lock);
- return 0;
+ return ret;
}
/*
@@ -2324,10 +2330,6 @@ int __init amd_iommu_init_dma_ops(void)
iommu_detected = 1;
swiotlb = 0;
-#ifdef CONFIG_GART_IOMMU
- gart_iommu_aperture_disabled = 1;
- gart_iommu_aperture = 0;
-#endif
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3bacb4d0844..3cc63e2b8dd 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -287,8 +287,12 @@ static u8 * __init iommu_map_mmio_space(u64 address)
{
u8 *ret;
- if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
+ if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
+ pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
+ address);
+ pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
return NULL;
+ }
ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
if (ret != NULL)
@@ -1314,7 +1318,7 @@ static int __init amd_iommu_init(void)
ret = amd_iommu_init_dma_ops();
if (ret)
- goto free;
+ goto free_disable;
amd_iommu_init_api();
@@ -1332,9 +1336,10 @@ static int __init amd_iommu_init(void)
out:
return ret;
-free:
+free_disable:
disable_iommus();
+free:
amd_iommu_uninit_devices();
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
@@ -1353,6 +1358,15 @@ free:
free_unity_maps();
+#ifdef CONFIG_GART_IOMMU
+ /*
+ * We failed to initialize the AMD IOMMU - try fallback to GART
+ * if possible.
+ */
+ gart_iommu_init();
+
+#endif
+
goto out;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e5a4a1e0161..c02cc692985 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -51,6 +51,7 @@
#include <asm/smp.h>
#include <asm/mce.h>
#include <asm/kvm_para.h>
+#include <asm/tsc.h>
unsigned int num_processors;
@@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void)
*/
void __cpuinit setup_local_APIC(void)
{
- unsigned int value;
- int i, j;
+ unsigned int value, queued;
+ int i, j, acked = 0;
+ unsigned long long tsc = 0, ntsc;
+ long long max_loops = cpu_khz;
+
+ if (cpu_has_tsc)
+ rdtscll(tsc);
if (disable_apic) {
arch_disable_smp_support();
@@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void)
* the interrupt. Hence a vector might get locked. It was noticed
* for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
*/
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for (j = 31; j >= 0; j--) {
- if (value & (1<<j))
- ack_APIC_irq();
+ do {
+ queued = 0;
+ for (i = APIC_ISR_NR - 1; i >= 0; i--)
+ queued |= apic_read(APIC_IRR + i*0x10);
+
+ for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+ value = apic_read(APIC_ISR + i*0x10);
+ for (j = 31; j >= 0; j--) {
+ if (value & (1<<j)) {
+ ack_APIC_irq();
+ acked++;
+ }
+ }
}
- }
+ if (acked > 256) {
+ printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
+ acked);
+ break;
+ }
+ if (cpu_has_tsc) {
+ rdtscll(ntsc);
+ max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ } else
+ max_loops--;
+ } while (queued && max_loops > 0);
+ WARN_ON(max_loops <= 0);
/*
* Now that we are all set up, enable the APIC
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 33f3563a2a5..e41ed24ab26 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -89,8 +89,8 @@ int nr_ioapics;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
-/* The last gsi number used */
-u32 gsi_end;
+/* The one past the highest gsi number used */
+u32 gsi_top;
/* MP IRQ source entries */
struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
@@ -1035,7 +1035,7 @@ static int pin_2_irq(int idx, int apic, int pin)
if (gsi >= NR_IRQS_LEGACY)
irq = gsi;
else
- irq = gsi_end + 1 + gsi;
+ irq = gsi_top + gsi;
}
#ifdef CONFIG_X86_32
@@ -3853,7 +3853,7 @@ void __init probe_nr_irqs_gsi(void)
{
int nr;
- nr = gsi_end + 1 + NR_IRQS_LEGACY;
+ nr = gsi_top + NR_IRQS_LEGACY;
if (nr > nr_irqs_gsi)
nr_irqs_gsi = nr;
@@ -4294,8 +4294,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
*/
nr_ioapic_registers[idx] = entries;
- if (mp_gsi_routing[idx].gsi_end > gsi_end)
- gsi_end = mp_gsi_routing[idx].gsi_end;
+ if (mp_gsi_routing[idx].gsi_end >= gsi_top)
+ gsi_top = mp_gsi_routing[idx].gsi_end + 1;
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6f3dc8fbbfd..7ec2123838e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1497,8 +1497,8 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
* simply keep the boost-disable flag in sync with the current global
* state.
*/
-static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action,
- void *hcpu)
+static int cpb_notify(struct notifier_block *nb, unsigned long action,
+ void *hcpu)
{
unsigned cpu = (long)hcpu;
u32 lo, hi;
@@ -1528,7 +1528,7 @@ static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata cpb_nb = {
+static struct notifier_block cpb_nb = {
.notifier_call = cpb_notify,
};
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 4ac6d48fe11..bb34b03af25 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+
+obj-$(CONFIG_ACPI_APEI) += mce-apei.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
new file mode 100644
index 00000000000..745b54f9be8
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -0,0 +1,138 @@
+/*
+ * Bridge between MCE and APEI
+ *
+ * On some machine, corrected memory errors are reported via APEI
+ * generic hardware error source (GHES) instead of corrected Machine
+ * Check. These corrected memory errors can be reported to user space
+ * through /dev/mcelog via faking a corrected Machine Check, so that
+ * the error memory page can be offlined by /sbin/mcelog if the error
+ * count for one page is beyond the threshold.
+ *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/cper.h>
+#include <acpi/apei.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
+{
+ struct mce m;
+
+ /* Only corrected MC is reported */
+ if (!corrected)
+ return;
+
+ mce_setup(&m);
+ m.bank = 1;
+ /* Fake a memory read corrected error with unknown channel */
+ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+ m.addr = mem_err->physical_addr;
+ mce_log(&m);
+ mce_notify_irq();
+}
+EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
+
+#define CPER_CREATOR_MCE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ struct mce mce;
+} __packed;
+
+int apei_write_mce(struct mce *m)
+{
+ struct cper_mce_record rcd;
+
+ memset(&rcd, 0, sizeof(rcd));
+ memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd.hdr.revision = CPER_RECORD_REV;
+ rcd.hdr.signature_end = CPER_SIG_END;
+ rcd.hdr.section_count = 1;
+ rcd.hdr.error_severity = CPER_SER_FATAL;
+ /* timestamp, platform_id, partition_id are all invalid */
+ rcd.hdr.validation_bits = 0;
+ rcd.hdr.record_length = sizeof(rcd);
+ rcd.hdr.creator_id = CPER_CREATOR_MCE;
+ rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd.hdr.record_id = cper_next_record_id();
+ rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+ rcd.sec_hdr.section_length = sizeof(rcd.mce);
+ rcd.sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd.sec_hdr.validation_bits = 0;
+ rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+ rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+ memcpy(&rcd.mce, m, sizeof(*m));
+
+ return erst_write(&rcd.hdr);
+}
+
+ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+ struct cper_mce_record rcd;
+ ssize_t len;
+
+ len = erst_read_next(&rcd.hdr, sizeof(rcd));
+ if (len <= 0)
+ return len;
+ /* Can not skip other records in storage via ERST unless clear them */
+ else if (len != sizeof(rcd) ||
+ uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+ if (printk_ratelimit())
+ pr_warning(
+ "MCE-APEI: Can not skip the unknown record in ERST");
+ return -EIO;
+ }
+
+ memcpy(m, &rcd.mce, sizeof(*m));
+ *record_id = rcd.hdr.record_id;
+
+ return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+ return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+ return erst_clear(record_id);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab6..fefcc69ee8b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
extern struct mce_bank *mce_banks;
+#ifdef CONFIG_ACPI_APEI
+int apei_write_mce(struct mce *m);
+ssize_t apei_read_mce(struct mce *m, u64 *record_id);
+int apei_check_mce(void);
+int apei_clear_mce(u64 record_id);
+#else
+static inline int apei_write_mce(struct mce *m)
+{
+ return -EINVAL;
+}
+static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+ return 0;
+}
+static inline int apei_check_mce(void)
+{
+ return 0;
+}
+static inline int apei_clear_mce(u64 record_id)
+{
+ return -EINVAL;
+}
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a355ddcc64..18cc4256225 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
+#include <linux/edac_mce.h>
#include <asm/processor.h>
#include <asm/hw_irq.h>
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce)
entry = rcu_dereference_check_mce(mcelog.next);
for (;;) {
/*
+ * If edac_mce is enabled, it will check the error type
+ * and will process it, if it is a known error.
+ * Otherwise, the error will be sent through mcelog
+ * interface
+ */
+ if (edac_mce_parse(mce))
+ return;
+
+ /*
* When the buffer fills up discard new entries.
* Assume that the earlier errors are the more
* interesting ones:
@@ -264,7 +274,7 @@ static void wait_for_panic(void)
static void mce_panic(char *msg, struct mce *final, char *exp)
{
- int i;
+ int i, apei_err = 0;
if (!fake_panic) {
/*
@@ -287,8 +297,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
struct mce *m = &mcelog.entry[i];
if (!(m->status & MCI_STATUS_VAL))
continue;
- if (!(m->status & MCI_STATUS_UC))
+ if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
+ if (!apei_err)
+ apei_err = apei_write_mce(m);
+ }
}
/* Now print uncorrected but with the final one last */
for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +310,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
continue;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || memcmp(m, final, sizeof(struct mce)))
+ if (!final || memcmp(m, final, sizeof(struct mce))) {
print_mce(m);
+ if (!apei_err)
+ apei_err = apei_write_mce(m);
+ }
}
- if (final)
+ if (final) {
print_mce(final);
+ if (!apei_err)
+ apei_err = apei_write_mce(final);
+ }
if (cpu_missing)
printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
print_mce_tail();
@@ -1493,6 +1512,43 @@ static void collect_tscs(void *data)
rdtscll(cpu_tsc[smp_processor_id()]);
}
+static int mce_apei_read_done;
+
+/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
+static int __mce_read_apei(char __user **ubuf, size_t usize)
+{
+ int rc;
+ u64 record_id;
+ struct mce m;
+
+ if (usize < sizeof(struct mce))
+ return -EINVAL;
+
+ rc = apei_read_mce(&m, &record_id);
+ /* Error or no more MCE record */
+ if (rc <= 0) {
+ mce_apei_read_done = 1;
+ return rc;
+ }
+ rc = -EFAULT;
+ if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
+ return rc;
+ /*
+ * In fact, we should have cleared the record after that has
+ * been flushed to the disk or sent to network in
+ * /sbin/mcelog, but we have no interface to support that now,
+ * so just clear it to avoid duplication.
+ */
+ rc = apei_clear_mce(record_id);
+ if (rc) {
+ mce_apei_read_done = 1;
+ return rc;
+ }
+ *ubuf += sizeof(struct mce);
+
+ return 0;
+}
+
static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
loff_t *off)
{
@@ -1506,15 +1562,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
return -ENOMEM;
mutex_lock(&mce_read_mutex);
+
+ if (!mce_apei_read_done) {
+ err = __mce_read_apei(&buf, usize);
+ if (err || buf != ubuf)
+ goto out;
+ }
+
next = rcu_dereference_check_mce(mcelog.next);
/* Only supports full reads right now */
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
- mutex_unlock(&mce_read_mutex);
- kfree(cpu_tsc);
-
- return -EINVAL;
- }
+ err = -EINVAL;
+ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
+ goto out;
err = 0;
prev = 0;
@@ -1562,10 +1622,15 @@ timeout:
memset(&mcelog.entry[i], 0, sizeof(struct mce));
}
}
+
+ if (err)
+ err = -EFAULT;
+
+out:
mutex_unlock(&mce_read_mutex);
kfree(cpu_tsc);
- return err ? -EFAULT : buf - ubuf;
+ return err ? err : buf - ubuf;
}
static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1638,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
poll_wait(file, &mce_wait, wait);
if (rcu_dereference_check_mce(mcelog.next))
return POLLIN | POLLRDNORM;
+ if (!mce_apei_read_done && apei_check_mce())
+ return POLLIN | POLLRDNORM;
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c77586061bc..5db5b7d65a1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -106,6 +106,7 @@ struct cpu_hw_events {
int n_events;
int n_added;
+ int n_txn;
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
@@ -983,6 +984,7 @@ static int x86_pmu_enable(struct perf_event *event)
out:
cpuc->n_events = n;
cpuc->n_added += n - n0;
+ cpuc->n_txn += n - n0;
return 0;
}
@@ -1089,6 +1091,14 @@ static void x86_pmu_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
+ /*
+ * If we're called during a txn, we don't need to do anything.
+ * The events never got scheduled and ->cancel_txn will truncate
+ * the event_list.
+ */
+ if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ return;
+
x86_pmu_stop(event);
for (i = 0; i < cpuc->n_events; i++) {
@@ -1379,6 +1389,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuc->n_txn = 0;
}
/*
@@ -1391,6 +1402,11 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ /*
+ * Truncate the collected events.
+ */
+ cpuc->n_added -= cpuc->n_txn;
+ cpuc->n_events -= cpuc->n_txn;
}
/*
@@ -1419,6 +1435,12 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
+ /*
+ * Clear out the txn count so that ->cancel_txn() which gets
+ * run after ->commit_txn() doesn't undo things.
+ */
+ cpuc->n_txn = 0;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 611df11ba15..c2897b7b4a3 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] =
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
};
static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index fdbc652d3fe..214ac860ebe 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7bca3c6a02f..0d6fc71bedb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void)
struct e820entry *ei = &e820.map[i];
if (ei->type == E820_NVS)
- hibernate_nvs_register(ei->addr, ei->size);
+ suspend_nvs_register(ei->addr, ei->size);
}
return 0;
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3a54dcb9cd0..43e9ccf4494 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -34,7 +34,7 @@ EXPORT_SYMBOL(init_task);
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
+ * so they are allowed to end up in the .data..cacheline_aligned
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5ae5d2426ed..d86dbf7e54b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -123,7 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
m->apicid, m->apicver, m->apicaddr);
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1);
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
}
static void print_MP_intsrc_info(struct mpc_intsrc *m)
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index e796448f0eb..5915e0b3330 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void)
setup_boot_APIC_clock();
};
+/* MID systems don't have i8042 controller */
+static int mrst_i8042_detect(void)
+{
+ return 0;
+}
+
/*
* Moorestown specific x86_init function overrides and early setup
* calls.
@@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void)
x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
+ x86_platform.i8042_detect = mrst_i8042_detect;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index fb99f7edb34..078d4ec1a9d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -103,11 +103,16 @@ int use_calgary __read_mostly = 0;
#define PMR_SOFTSTOPFAULT 0x40000000
#define PMR_HARDSTOP 0x20000000
-#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
-#define MAX_NUM_CHASSIS 8 /* max number of chassis */
-/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
-#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
-#define PHBS_PER_CALGARY 4
+/*
+ * The maximum PHB bus number.
+ * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384
+ * x3950M2: 4 chassis, 48 PHBs per chassis = 192
+ * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256
+ * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128
+ */
+#define MAX_PHB_BUS_NUM 256
+
+#define PHBS_PER_CALGARY 4
/* register offsets in Calgary's internal register space */
static const unsigned long tar_offsets[] = {
@@ -1051,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev)
struct iommu_table *tbl;
int ret;
- BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
-
bbar = busno_to_bbar(dev->bus->number);
ret = calgary_setup_tar(dev, bbar);
if (ret)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 8e1aac86b50..e3af342fe83 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,6 +228,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
},
},
+ { /* Handle problems with rebooting on Dell T7400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T7400",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
+ },
+ },
{ /* Handle problems with rebooting on HP laptops */
.callback = set_bios_reboot,
.ident = "HP Compaq Laptop",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e8029896309..b4ae4acbd03 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -676,6 +676,17 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
},
},
+ /*
+ * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
+ * match on the product name.
+ */
+ {
+ .callback = dmi_low_memory_corruption,
+ .ident = "Phoenix BIOS",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
+ },
+ },
#endif
{}
};
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a867940a6df..de3b63ae3da 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,12 +21,6 @@
#include <asm/cpu.h>
#include <asm/stackprotector.h>
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
-#else
-# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
@@ -247,7 +241,7 @@ void __init setup_per_cpu_areas(void)
#endif
#endif
/*
- * Up to this point, the boot CPU has been using .data.init
+ * Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
*/
if (cpu == boot_cpu_id)
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index 7ded57896c0..cb22acf3ed0 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -93,7 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
pentry = (struct sfi_apic_table_entry *)sb->pentry;
for (i = 0; i < num; i++) {
- mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1);
+ mp_register_ioapic(i, pentry->phys_addr, gsi_top);
pentry++;
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 763d815e27a..c4f33b2e77d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -686,7 +686,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
static void __cpuinit announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
- int node = cpu_to_node(cpu);
+ int node = early_cpu_to_node(cpu);
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
@@ -1215,9 +1215,17 @@ __init void prefill_possible_map(void)
if (!num_processors)
num_processors = 1;
- if (setup_possible_cpus == -1)
- possible = num_processors + disabled_cpus;
- else
+ i = setup_max_cpus ?: 1;
+ if (setup_possible_cpus == -1) {
+ possible = num_processors;
+#ifdef CONFIG_HOTPLUG_CPU
+ if (setup_max_cpus)
+ possible += disabled_cpus;
+#else
+ if (possible > i)
+ possible = i;
+#endif
+ } else
possible = setup_possible_cpus;
total_cpus = max_t(int, possible, num_processors + disabled_cpus);
@@ -1230,11 +1238,23 @@ __init void prefill_possible_map(void)
possible = nr_cpu_ids;
}
+#ifdef CONFIG_HOTPLUG_CPU
+ if (!setup_max_cpus)
+#endif
+ if (possible > i) {
+ printk(KERN_WARNING
+ "%d Processors exceeds max_cpus limit of %u\n",
+ possible, setup_max_cpus);
+ possible = i;
+ }
+
printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
possible, max_t(int, possible - num_processors, 0));
for (i = 0; i < possible; i++)
set_cpu_possible(i, true);
+ for (; i < NR_CPUS; i++)
+ set_cpu_possible(i, false);
nr_cpu_ids = possible;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 142d70c74b0..725ef4d17cd 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -526,6 +526,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
+ int user_icebp = 0;
unsigned long dr6;
int si_code;
@@ -534,6 +535,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
+ /*
+ * If dr6 has no reason to give us about the origin of this trap,
+ * then it's very likely the result of an icebp/int01 trap.
+ * User wants a sigtrap for that.
+ */
+ if (!dr6 && user_mode(regs))
+ user_icebp = 1;
+
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
@@ -575,7 +584,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 2cc249718c4..d0bb52296fa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -97,7 +97,7 @@ SECTIONS
HEAD_TEXT
#ifdef CONFIG_X86_32
. = ALIGN(PAGE_SIZE);
- *(.text.page_aligned)
+ *(.text..page_aligned)
#endif
. = ALIGN(8);
_stext = .;
@@ -305,7 +305,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
- *(.bss.page_aligned)
+ *(.bss..page_aligned)
*(.bss)
. = ALIGN(4);
__bss_stop = .;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 61a1e8c7e19..cd6da6bf3ec 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -5,6 +5,7 @@
*/
#include <linux/init.h>
#include <linux/ioport.h>
+#include <linux/module.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
@@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
};
static void default_nmi_init(void) { };
+static int default_i8042_detect(void) { return 1; };
struct x86_platform_ops x86_platform = {
.calibrate_tsc = native_calibrate_tsc,
@@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = {
.set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop,
.is_untracked_pat_range = is_ISA_range,
- .nmi_init = default_nmi_init
+ .nmi_init = default_nmi_init,
+ .i8042_detect = default_i8042_detect
};
+
+EXPORT_SYMBOL_GPL(x86_platform);