summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event.c59
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c2
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/hpet.c31
-rw-r--r--arch/x86/kernel/trampoline.c3
-rw-r--r--arch/x86/kernel/tsc.c2
10 files changed, 85 insertions, 64 deletions
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7b598b84c90..f744f54cb24 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -698,9 +698,11 @@ void __init uv_system_init(void)
for (j = 0; j < 64; j++) {
if (!test_bit(j, &present))
continue;
- uv_blade_info[blade].pnode = (i * 64 + j);
+ pnode = (i * 64 + j);
+ uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
+ max_pnode = max(pnode, max_pnode);
blade++;
}
}
@@ -738,7 +740,6 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
- max_pnode = max(pnode, max_pnode);
}
/* Add blade/pnode info for nodes without cpus */
@@ -750,7 +751,6 @@ void __init uv_system_init(void)
pnode = (paddr >> m_val) & pnode_mask;
blade = boot_pnode_to_blade(pnode);
uv_node_to_blade[nid] = blade;
- max_pnode = max(pnode, max_pnode);
}
map_gru_high(max_pnode);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 224392d8fe8..5e975298fa8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
err = -ENOMEM;
goto out;
}
- if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
+ if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
kfree(b);
err = -ENOMEM;
goto out;
@@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifndef CONFIG_SMP
cpumask_setall(b->cpus);
#else
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_set_cpu(cpu, b->cpus);
#endif
per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index c2a8b26d4fe..d9368eeda30 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level)
#ifdef CONFIG_SYSFS
/* Add/Remove thermal_throttle interface for CPU device: */
-static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
+static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
+ unsigned int cpu)
{
int err;
- struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
if (err)
@@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
mutex_lock(&therm_cpu_lock);
- err = thermal_throttle_add_dev(sys_dev);
+ err = thermal_throttle_add_dev(sys_dev, cpu);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
@@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void)
#endif
/* connect live CPUs to sysfs */
for_each_online_cpu(cpu) {
- err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+ err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
WARN_ON(err);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2da20fda02..3efdf2870a3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
/*
* event overflow
*/
- handled = 1;
+ handled++;
data.period = event->hw.last_period;
if (!x86_perf_event_set_period(event))
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+struct pmu_nmi_state {
+ unsigned int marked;
+ int handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
static int __kprobes
perf_event_nmi_handler(struct notifier_block *self,
unsigned long cmd, void *__args)
{
struct die_args *args = __args;
- struct pt_regs *regs;
+ unsigned int this_nmi;
+ int handled;
if (!atomic_read(&active_events))
return NOTIFY_DONE;
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
case DIE_NMI:
case DIE_NMI_IPI:
break;
-
+ case DIE_NMIUNKNOWN:
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ /* let the kernel handle the unknown nmi */
+ return NOTIFY_DONE;
+ /*
+ * This one is a PMU back-to-back nmi. Two events
+ * trigger 'simultaneously' raising two back-to-back
+ * NMIs. If the first NMI handles both, the latter
+ * will be empty and daze the CPU. So, we drop it to
+ * avoid false-positive 'unknown nmi' messages.
+ */
+ return NOTIFY_STOP;
default:
return NOTIFY_DONE;
}
- regs = args->regs;
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
- /*
- * Can't rely on the handled return value to say it was our NMI, two
- * events could trigger 'simultaneously' raising two back-to-back NMIs.
- *
- * If the first NMI handles both, the latter will be empty and daze
- * the CPU.
- */
- x86_pmu.handle_irq(regs);
+
+ handled = x86_pmu.handle_irq(args->regs);
+ if (!handled)
+ return NOTIFY_DONE;
+
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if ((handled > 1) ||
+ /* the next nmi could be a back-to-back nmi */
+ ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+ (__get_cpu_var(pmu_nmi).handled > 1))) {
+ /*
+ * We could have two subsequent back-to-back nmis: The
+ * first handles more than one counter, the 2nd
+ * handles only one counter and the 3rd handles no
+ * counter.
+ *
+ * This is the 2nd nmi because the previous was
+ * handling more than one counter. We will mark the
+ * next (3rd) and then drop it if unhandled.
+ */
+ __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
+ __get_cpu_var(pmu_nmi).handled = handled;
+ }
return NOTIFY_STOP;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d8d86d01400..ee05c90012d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
int bit, loops;
- u64 ack, status;
+ u64 status;
+ int handled = 0;
perf_sample_data_init(&data, 0);
@@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
loops = 0;
again:
+ intel_pmu_ack_status(status);
if (++loops > 100) {
WARN_ONCE(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
@@ -736,19 +738,22 @@ again:
}
inc_irq_stat(apic_perf_irqs);
- ack = status;
intel_pmu_lbr_read();
/*
* PEBS overflow sets bit 62 in the global status register
*/
- if (__test_and_clear_bit(62, (unsigned long *)&status))
+ if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ handled++;
x86_pmu.drain_pebs(regs);
+ }
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ handled++;
+
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -761,8 +766,6 @@ again:
x86_pmu_stop(event);
}
- intel_pmu_ack_status(ack);
-
/*
* Repeat if there is more work to be done:
*/
@@ -772,7 +775,7 @@ again:
done:
intel_pmu_enable_all(0);
- return 1;
+ return handled;
}
static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 7e578e9cc58..b560db3305b 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -692,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
inc_irq_stat(apic_perf_irqs);
}
- return handled > 0;
+ return handled;
}
/*
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index e5cc7e82e60..ebdb85cf268 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,7 +18,6 @@
#include <asm/apic.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/hpet.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
-/*
- * Force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
- *
- * We do this on all SMBUS incarnations for now until we have more
- * information about the affected chipsets.
- */
-static void __init ati_hpet_bugs(int num, int slot, int func)
-{
-#ifdef CONFIG_HPET_TIMER
- hpet_readback_cmp = 1;
-#endif
-}
-
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
- { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
- PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
{}
};
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 351f9c0fea1..410fdb3f193 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -35,7 +35,6 @@
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
u8 hpet_msi_disable;
-u8 hpet_readback_cmp;
#ifdef CONFIG_PCI_MSI
static unsigned long hpet_num_timers;
@@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta,
* at that point and we would wait for the next hpet interrupt
* forever. We found out that reading the CMP register back
* forces the transfer so we can rely on the comparison with
- * the counter register below.
+ * the counter register below. If the read back from the
+ * compare register does not match the value we programmed
+ * then we might have a real hardware problem. We can not do
+ * much about it here, but at least alert the user/admin with
+ * a prominent warning.
*
- * That works fine on those ATI chipsets, but on newer Intel
- * chipsets (ICH9...) this triggers due to an erratum: Reading
- * the comparator immediately following a write is returning
- * the old value.
+ * An erratum on some chipsets (ICH9,..), results in
+ * comparator read immediately following a write returning old
+ * value. Workaround for this is to read this value second
+ * time, when first read returns old value.
*
- * We restrict the read back to the affected ATI chipsets (set
- * by quirks) and also run it with hpet=verbose for debugging
- * purposes.
+ * In fact the write to the comparator register is delayed up
+ * to two HPET cycles so the workaround we tried to restrict
+ * the readback to those known to be borked ATI chipsets
+ * failed miserably. So we give up on optimizations forever
+ * and penalize all HPET incarnations unconditionally.
*/
- if (hpet_readback_cmp || hpet_verbose) {
- u32 cmp = hpet_readl(HPET_Tn_CMP(timer));
-
- if (cmp != cnt)
+ if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+ if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
printk_once(KERN_WARNING
- "hpet: compare register read back failed.\n");
+ "hpet: compare register read back failed.\n");
}
return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a874495b367..e2a59525739 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void)
/* Copy kernel address range */
clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS,
- KERNEL_PGD_BOUNDARY));
+ KERNEL_PGD_PTRS);
/* Initialize low mappings */
clone_pgd_range(trampoline_pg_dir,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index d632934cb63..26a863a9c2a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -655,7 +655,7 @@ void restore_sched_clock_state(void)
local_irq_save(flags);
- get_cpu_var(cyc2ns_offset) = 0;
+ __get_cpu_var(cyc2ns_offset) = 0;
offset = cyc2ns_suspend - sched_clock();
for_each_possible_cpu(cpu)