summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2009-05-23 16:42:19 -0700
committerH. Peter Anvin <hpa@zytor.com>2009-05-23 16:42:19 -0700
commitee0736627d3347be0be2769fa7b26431f9726c9d (patch)
tree203e2204daaec4cf005463fdf2c7bf380d6eef36 /arch/x86/kernel
parentcf9972a921470b0a2da7906104bcd540b20e33bf (diff)
parent0af48f42df15b97080b450d24219dd95db7b929a (diff)
Merge branch 'x86/urgent' into x86/setup
Resolved conflicts: arch/x86/boot/memory.c Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/amd_iommu_init.c16
-rw-r--r--arch/x86/kernel/apic/es7000_32.c8
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/apic/nmi.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c31
-rw-r--r--arch/x86/kernel/bios_uv.c3
-rw-r--r--arch/x86/kernel/cpu/common.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c58
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/e820.c11
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/kgdb.c3
-rw-r--r--arch/x86/kernel/kvmclock.c7
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode_core.c35
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/tlb_uv.c189
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/uv_sysfs.c4
-rw-r--r--arch/x86/kernel/uv_time.c10
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c6
34 files changed, 334 insertions, 186 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 145cce75cda..88d1bfc847d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -89,7 +89,8 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 42c33cebf00..8c0be0902da 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -49,10 +49,10 @@
#define IVHD_DEV_EXT_SELECT 0x46
#define IVHD_DEV_EXT_SELECT_RANGE 0x47
-#define IVHD_FLAG_HT_TUN_EN 0x00
-#define IVHD_FLAG_PASSPW_EN 0x01
-#define IVHD_FLAG_RESPASSPW_EN 0x02
-#define IVHD_FLAG_ISOC_EN 0x03
+#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
+#define IVHD_FLAG_PASSPW_EN_MASK 0x02
+#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
+#define IVHD_FLAG_ISOC_EN_MASK 0x08
#define IVMD_FLAG_EXCL_RANGE 0x08
#define IVMD_FLAG_UNITY_MAP 0x01
@@ -569,19 +569,19 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
* First set the recommended feature enable bits from ACPI
* into the IOMMU control registers
*/
- h->flags & IVHD_FLAG_HT_TUN_EN ?
+ h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
- h->flags & IVHD_FLAG_PASSPW_EN ?
+ h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
- h->flags & IVHD_FLAG_RESPASSPW_EN ?
+ h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
- h->flags & IVHD_FLAG_ISOC_EN ?
+ h->flags & IVHD_FLAG_ISOC_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
iommu_feature_disable(iommu, CONTROL_ISOC_EN);
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 1c11b819f24..30294777557 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr)
}
#ifdef CONFIG_ACPI
-static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
struct es7000_oem_table *table;
@@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
return 0;
}
-static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
if (!oem_addr)
return;
@@ -306,7 +306,7 @@ static int es7000_check_dsdt(void)
static int es7000_acpi_ret;
/* Hook from generic ACPI tables.c */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr = 0;
int check_dsdt;
@@ -717,7 +717,7 @@ struct apic apic_es7000_cluster = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
-struct apic apic_es7000 = {
+struct apic __refdata apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 767fe7e46d6..30da617d18e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2524,7 +2524,6 @@ static void irq_complete_move(struct irq_desc **descp)
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
-#ifdef CONFIG_X86_X2APIC
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
int apic, pin;
@@ -2558,6 +2557,7 @@ eoi_ioapic_irq(struct irq_desc *desc)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+#ifdef CONFIG_X86_X2APIC
static void ack_x2apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,6 +2634,9 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
+ if (irq_remapped(irq))
+ eoi_ioapic_irq(desc);
+
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -3667,12 +3670,14 @@ int arch_setup_hpet_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
+ struct irq_desc *desc = irq_to_desc(irq);
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
+ desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
"edge");
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index d6bd6240715..ce4fbfa315a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -138,7 +138,7 @@ int __init check_nmi_watchdog(void)
if (!prev_nmi_count)
goto error;
- alloc_cpumask_var(&backtrace_mask, GFP_KERNEL);
+ alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
@@ -414,7 +414,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
touched = 1;
}
- if (cpumask_test_cpu(cpu, backtrace_mask)) {
+ /* We can be called before check_nmi_watchdog, hence NULL check. */
+ if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1248318436e..2bda6935297 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -19,6 +19,7 @@
#include <linux/timer.h>
#include <linux/cpu.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -34,6 +35,17 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
+static int early_get_nodeid(void)
+{
+ union uvh_node_id_u node_id;
+ unsigned long *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
+ node_id.v = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return node_id.s.node_id;
+}
+
static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
@@ -42,6 +54,8 @@ static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
+ __get_cpu_var(x2apic_extra_bits) =
+ early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1);
uv_system_type = UV_NON_UNIQUE_APIC;
return 1;
}
@@ -549,7 +563,8 @@ void __init uv_system_init(void)
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
int max_pnode = 0;
- unsigned long mmr_base, present;
+ unsigned long mmr_base, present, paddr;
+ unsigned short pnode_mask;
map_low_mmrs();
@@ -592,6 +607,7 @@ void __init uv_system_init(void)
}
}
+ pnode_mask = (1 << n_val) - 1;
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
@@ -615,7 +631,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -631,6 +647,17 @@ void __init uv_system_init(void)
lcpu, blade);
}
+ /* Add blade/pnode info for nodes without cpus */
+ for_each_online_node(nid) {
+ if (uv_node_to_blade[nid] >= 0)
+ continue;
+ paddr = node_start_pfn(nid) << PAGE_SHIFT;
+ paddr = uv_soc_phys_ram_to_gpa(paddr);
+ pnode = (paddr >> m_val) & pnode_mask;
+ blade = boot_pnode_to_blade(pnode);
+ uv_node_to_blade[nid] = blade;
+ }
+
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_config_high(max_pnode);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f63882728d9..63a88e1f987 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -182,7 +182,8 @@ void uv_bios_init(void)
memcpy(&uv_systab, tab, sizeof(struct uv_systab));
iounmap(tab);
- printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+ printk(KERN_INFO "EFI UV System Table Revision %d\n",
+ uv_systab.revision);
}
#else /* !CONFIG_EFI */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c4f667896c2..77848d9fca6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+static int __init x86_xsave_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+ return 1;
+}
+__setup("noxsave", x86_xsave_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -1203,6 +1210,8 @@ void __cpuinit cpu_init(void)
load_TR_desc();
load_LDT(&init_mm.context);
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 9d3af380c6b..208ecf6643d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -65,14 +65,18 @@ enum {
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
struct cpufreq_frequency_table *freq_table;
- unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
- u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+struct acpi_msr_data {
+ u64 saved_aperf, saved_mperf;
+};
+
+static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
+
DEFINE_TRACE(power_mark);
/* acpi_perf_data is a pointer to percpu data. */
@@ -153,7 +157,8 @@ struct drv_cmd {
u32 val;
};
-static long do_drv_read(void *_cmd)
+/* Called via smp_call_function_single(), on the target CPU */
+static void do_drv_read(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 h;
@@ -170,10 +175,10 @@ static long do_drv_read(void *_cmd)
default:
break;
}
- return 0;
}
-static long do_drv_write(void *_cmd)
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 lo, hi;
@@ -192,23 +197,24 @@ static long do_drv_write(void *_cmd)
default:
break;
}
- return 0;
}
static void drv_read(struct drv_cmd *cmd)
{
cmd->val = 0;
- work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
+ smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1);
}
static void drv_write(struct drv_cmd *cmd)
{
- unsigned int i;
+ int this_cpu;
- for_each_cpu(i, cmd->mask) {
- work_on_cpu(i, do_drv_write, cmd);
- }
+ this_cpu = get_cpu();
+ if (cpumask_test_cpu(this_cpu, cmd->mask))
+ do_drv_write(cmd);
+ smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+ put_cpu();
}
static u32 get_cur_val(const struct cpumask *mask)
@@ -252,15 +258,13 @@ struct perf_pair {
} aperf, mperf;
};
-
-static long read_measured_perf_ctrs(void *_cur)
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
{
struct perf_pair *cur = _cur;
rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
-
- return 0;
}
/*
@@ -283,15 +287,15 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int perf_percent;
unsigned int retval;
- if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin))
+ if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
return 0;
cur.aperf.whole = readin.aperf.whole -
- per_cpu(drv_data, cpu)->saved_aperf;
+ per_cpu(msr_data, cpu).saved_aperf;
cur.mperf.whole = readin.mperf.whole -
- per_cpu(drv_data, cpu)->saved_mperf;
- per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
- per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+ per_cpu(msr_data, cpu).saved_mperf;
+ per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
+ per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
#ifdef __i386__
/*
@@ -335,7 +339,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
#endif
- retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
+ retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
return retval;
}
@@ -688,16 +692,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
policy->cpuinfo.transition_latency > 20 * 1000) {
- static int print_once;
policy->cpuinfo.transition_latency = 20 * 1000;
- if (!print_once) {
- print_once = 1;
- printk(KERN_INFO "Capping off P-state tranision latency"
- " at 20 uS\n");
- }
+ printk_once(KERN_INFO "Capping off P-state tranision"
+ " latency at 20 uS\n");
}
- data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
for (i = 0; i < perf->state_count; i++) {
if (i > 0 && perf->states[i].core_frequency >=
@@ -716,6 +715,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (result)
goto err_freqfree;
+ if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
+ printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
/* Current speed is unknown and not detectable by IO port */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 863f89568b1..6fb0b359d2a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -239,9 +239,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
-
- mce_log(&m);
- add_taint(TAINT_MACHINE_CHECK);
+ if (!(flags & MCP_DONTLOG)) {
+ mce_log(&m);
+ add_taint(TAINT_MACHINE_CHECK);
+ }
/*
* Clear state for this bank.
@@ -452,13 +453,14 @@ void mce_log_therm_throt_event(__u64 status)
*/
static int check_interval = 5 * 60; /* 5 minutes */
-static int next_interval; /* in jiffies */
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
static void mcheck_timer(unsigned long);
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
+ int *n;
WARN_ON(smp_processor_id() != data);
@@ -470,14 +472,14 @@ static void mcheck_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
+ n = &__get_cpu_var(next_interval);
if (mce_notify_user()) {
- next_interval = max(next_interval/2, HZ/100);
+ *n = max(*n/2, HZ/100);
} else {
- next_interval = min(next_interval * 2,
- (int)round_jiffies_relative(check_interval*HZ));
+ *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
}
- t->expires = jiffies + next_interval;
+ t->expires = jiffies + *n;
add_timer(t);
}
@@ -584,7 +586,7 @@ static void mce_init(void *dummy)
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC, &all_banks);
+ machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -632,14 +634,13 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
+ int *n = &__get_cpu_var(next_interval);
- /* data race harmless because everyone sets to the same value */
- if (!next_interval)
- next_interval = check_interval * HZ;
- if (!next_interval)
+ *n = check_interval * HZ;
+ if (!*n)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + *n);
add_timer(t);
}
@@ -907,7 +908,6 @@ static void mce_cpu_restart(void *data)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
- next_interval = check_interval * HZ;
on_each_cpu(mce_cpu_restart, NULL, 1);
}
@@ -1110,7 +1110,8 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies +
+ __get_cpu_var(next_interval));
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index d6b72df89d6..cef3ee30744 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -151,10 +151,11 @@ static void print_update(char *type, int *hdr, int num)
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+ unsigned long flags;
int hdr = 0;
int i;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
@@ -184,7 +185,7 @@ static void cmci_discover(int banks, int boot)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
@@ -211,13 +212,14 @@ void cmci_recheck(void)
*/
void cmci_clear(void)
{
+ unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
@@ -227,7 +229,7 @@ void cmci_clear(void)
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0b776c09aff..d21d4fb161f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -275,7 +275,11 @@ static void __init print_mtrr_state(void)
}
printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
- high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+ if (size_or_mask & 0xffffffffUL)
+ high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
+ else
+ high_width = ffs(size_or_mask>>32) + 32 - 1;
+ high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index f93047fed79..d5e30397246 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
- cpumask_weight(cpu_sibling_mask(cpu)));
+ cpumask_weight(cpu_core_mask(cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ef2c3563357..00628130292 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1074,12 +1074,13 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
u64 addr;
u64 start;
- start = startt;
- while (size < sizet && (start + 1))
+ for (start = startt; ; start += size) {
start = find_e820_area_size(start, &size, align);
-
- if (size < sizet)
- return 0;
+ if (!(start + 1))
+ return 0;
+ if (size >= sizet)
+ break;
+ }
#ifdef CONFIG_X86_32
if (start >= MAXMEM)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a331ec38af9..38946c6e843 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1410,7 +1410,10 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+ RESTORE_ALL 8
+ jmp irq_return
paranoid_restore:
+ TRACE_IRQS_IRETQ 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 18dfa30795c..b79c5533c42 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -442,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 4b)
- : [old] "=r" (old), [faulted] "=r" (faulted)
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
: "memory"
);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 648b3a2a3a4..81408b93f88 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -236,6 +236,10 @@ static void hpet_stop_counter(void)
unsigned long cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
+}
+
+static void hpet_reset_counter(void)
+{
hpet_writel(0, HPET_COUNTER);
hpet_writel(0, HPET_COUNTER + 4);
}
@@ -250,6 +254,7 @@ static void hpet_start_counter(void)
static void hpet_restart_counter(void)
{
hpet_stop_counter();
+ hpet_reset_counter();
hpet_start_counter();
}
@@ -309,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg;
+ unsigned long cfg, cmp, now;
uint64_t delta;
switch (mode) {
@@ -317,12 +322,23 @@ static void hpet_set_mode(enum clock_event_mode mode,
hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
+ now = hpet_readl(HPET_COUNTER);
+ cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cmp, HPET_Tn_CMP(timer));
+ udelay(1);
+ /*
+ * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
+ * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
+ * bit is automatically cleared after the first write.
+ * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
+ * Publication # 24674)
+ */
hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
@@ -722,7 +738,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
/*
* Clock source related code
*/
-static cycle_t read_hpet(void)
+static cycle_t read_hpet(struct clocksource *cs)
{
return (cycle_t)hpet_readl(HPET_COUNTER);
}
@@ -756,7 +772,7 @@ static int hpet_clocksource_register(void)
hpet_restart_counter();
/* Verify whether hpet counter works */
- t1 = read_hpet();
+ t1 = hpet_readl(HPET_COUNTER);
rdtscll(start);
/*
@@ -770,7 +786,7 @@ static int hpet_clocksource_register(void)
rdtscll(now);
} while ((now - start) < 200000UL);
- if (t1 == read_hpet()) {
+ if (t1 == hpet_readl(HPET_COUNTER)) {
printk(KERN_WARNING
"HPET counter not counting. HPET disabled\n");
return -ENODEV;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 3475440baa5..c2e0bb0890d 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -129,7 +129,7 @@ void __init setup_pit_timer(void)
* to just read by itself. So use jiffies to emulate a free
* running counter:
*/
-static cycle_t pit_read(void)
+static cycle_t pit_read(struct clocksource *cs)
{
static int old_count;
static u32 old_jifs;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index eedfaebe106..b1f4dffb919 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -88,6 +88,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
+ gdb_regs[GDB_SP] = (int)&regs->sp;
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -100,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
-#endif
gdb_regs[GDB_SP] = regs->sp;
+#endif
}
/**
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 137f2e8132d..223af43f152 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void)
return ret;
}
+static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
+{
+ return kvm_clock_read();
+}
+
/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
@@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void)
static struct clocksource kvm_clock = {
.name = "kvm-clock",
- .read = kvm_clock_read,
+ .read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.mult = 1 << KVM_SCALE,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index e7368c1da01..c1c429d0013 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -194,7 +194,7 @@ void machine_kexec(struct kimage *image)
unsigned int preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -253,7 +253,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 89cea4d4467..84c3bf209e9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -274,7 +274,7 @@ void machine_kexec(struct kimage *image)
int save_ftrace_enabled;
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -333,7 +333,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index a0f3851ef31..98c470c069d 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -108,40 +108,29 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(ucode_cpu_info);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-struct update_for_cpu {
- const void __user *buf;
- size_t size;
-};
-
-static long update_for_cpu(void *_ufc)
-{
- struct update_for_cpu *ufc = _ufc;
- int error;
-
- error = microcode_ops->request_microcode_user(smp_processor_id(),
- ufc->buf, ufc->size);
- if (error < 0)
- return error;
- if (!error)
- microcode_ops->apply_microcode(smp_processor_id());
- return error;
-}
-
static int do_microcode_update(const void __user *buf, size_t size)
{
+ cpumask_t old;
int error = 0;
int cpu;
- struct update_for_cpu ufc = { .buf = buf, .size = size };
+
+ old = current->cpus_allowed;
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
- error = work_on_cpu(cpu, update_for_cpu, &ufc);
+
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ error = microcode_ops->request_microcode_user(cpu, buf, size);
if (error < 0)
- break;
+ goto out;
+ if (!error)
+ microcode_ops->apply_microcode(cpu);
}
+out:
+ set_cpus_allowed_ptr(current, &old);
return error;
}
@@ -391,8 +380,6 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
return err;
err = microcode_init_cpu(cpu);
- if (err)
- sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return err;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index dce99dca6cf..70fd7e414c1 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -679,7 +679,7 @@ void __init get_smp_config(void)
__get_smp_config(0);
}
-static void smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
@@ -838,7 +838,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
-static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
+static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
{
int i;
@@ -866,7 +866,8 @@ static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
}
}
#else /* CONFIG_X86_IO_APIC */
-static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f446488..9faf43bea33 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -134,7 +134,9 @@ static void *get_call_destination(u8 type)
.pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
+#endif
};
return *((void **)&tmpl + type);
}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 34f12e9996e..221a3853e26 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -50,7 +50,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-struct dma_map_ops swiotlb_dma_ops = {
+static struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = x86_swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e95022e4f5d..7563b31b4f0 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -261,8 +261,6 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
{
if (hpet_force_user)
old_ich_force_enable_hpet(dev);
- else
- hpet_print_force_info();
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1340dad417f..667188e0b5a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -232,6 +232,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
},
},
+ { /* Handle problems with rebooting on Sony VGN-Z540N */
+ .callback = set_bios_reboot,
+ .ident = "Sony VGN-Z540N",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index deb5ebb32c3..ed0c33761e6 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -25,6 +25,8 @@ static int uv_bau_retry_limit __read_mostly;
/* position of pnode (which is nasid>>1): */
static int uv_nshift __read_mostly;
+/* base pnode in this partition */
+static int uv_partition_base_pnode __read_mostly;
static unsigned long uv_mmask __read_mostly;
@@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
/*
+ * Determine the first node on a blade.
+ */
+static int __init blade_to_first_node(int blade)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (blade == b)
+ return node;
+ }
+ return -1; /* shouldn't happen */
+}
+
+/*
+ * Determine the apicid of the first cpu on a blade.
+ */
+static int __init blade_to_first_apicid(int blade)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (blade == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
+
+/*
* Free a software acknowledge hardware resource by clearing its Pending
* bit. This will return a reply to the sender.
* If the message has timed out, a reply has already been sent by the
@@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
cpu = uv_blade_processor_id();
msg->number_of_cpus =
- uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+ uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
this_cpu_mask = 1UL << cpu;
if (msp->seen_by.bits & this_cpu_mask)
return;
@@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
-const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
int tries = 0;
- int blade;
+ int pnode;
int bit;
unsigned long mmr_offset;
unsigned long index;
@@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
* use the IPI method of shootdown on them.
*/
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade == this_blade)
+ pnode = uv_cpu_to_pnode(bit);
+ if (pnode == this_pnode)
continue;
cpumask_clear_cpu(bit, flush_mask);
}
@@ -309,16 +339,16 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
int i;
int bit;
- int blade;
+ int pnode;
int uv_cpu;
- int this_blade;
+ int this_pnode;
int locals = 0;
struct bau_desc *bau_desc;
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
- this_blade = uv_numa_blade_id();
+ this_pnode = uv_hub_info->pnode;
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
@@ -326,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
i = 0;
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
- if (blade == this_blade) {
+ pnode = uv_cpu_to_pnode(bit);
+ BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+ if (pnode == this_pnode) {
locals++;
continue;
}
- bau_node_set(blade, &bau_desc->distribution);
+ bau_node_set(pnode - uv_partition_base_pnode,
+ &bau_desc->distribution);
i++;
}
if (i == 0) {
@@ -350,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = cpu;
- return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+ return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
}
/*
@@ -418,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+/*
+ * uv_enable_timeouts
+ *
+ * Each target blade (i.e. blades that have cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
static void uv_enable_timeouts(void)
{
- int i;
int blade;
- int last_blade;
+ int nblades;
int pnode;
- int cur_cpu = 0;
- unsigned long apicid;
+ unsigned long mmr_image;
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ nblades = uv_num_possible_blades();
+
+ for (blade = 0; blade < nblades; blade++) {
+ if (!uv_blade_nr_possible_cpus(blade))
continue;
- last_blade = blade;
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+
pnode = uv_blade_to_pnode(blade);
- cur_cpu += uv_blade_nr_possible_cpus(i);
+ mmr_image =
+ uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= ((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
}
@@ -482,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
stat->requestee, stat->onetlb, stat->alltlb,
stat->s_retry, stat->d_retry, stat->ptc_i);
seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
- uv_read_global_mmr64(uv_blade_to_pnode
- (uv_cpu_to_blade_id(cpu)),
+ uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
stat->sflush, stat->dflush,
stat->retriesok, stat->nomsg,
@@ -617,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
* finish the initialization of the per-blade control structures
*/
static void __init
-uv_table_bases_finish(int blade, int node, int cur_cpu,
+uv_table_bases_finish(int blade,
struct bau_control *bau_tablesp,
struct bau_desc *adp)
{
struct bau_control *bcp;
- int i;
+ int cpu;
- for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
- bcp = (struct bau_control *)&per_cpu(bau_control, i);
+ for_each_present_cpu(cpu) {
+ if (blade != uv_cpu_to_blade_id(cpu))
+ continue;
+ bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
bcp->bau_msg_head = bau_tablesp->va_queue_first;
bcp->va_queue_first = bau_tablesp->va_queue_first;
bcp->va_queue_last = bau_tablesp->va_queue_last;
@@ -649,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode)
struct bau_desc *adp;
struct bau_desc *ad2;
- adp = (struct bau_desc *)
- kmalloc_node(16384, GFP_KERNEL, node);
+ adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
BUG_ON(!adp);
- pa = __pa((unsigned long)adp);
+ pa = uv_gpa(adp); /* need the real nasid*/
n = pa >> uv_nshift;
m = pa & uv_mmask;
@@ -667,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode)
for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
memset(ad2, 0, sizeof(struct bau_desc));
ad2->header.sw_ack_flag = 1;
- ad2->header.base_dest_nodeid =
- uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+ /*
+ * base_dest_nodeid is the first node in the partition, so
+ * the bit map will indicate partition-relative node numbers.
+ * note that base_dest_nodeid is actually a nasid.
+ */
+ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
ad2->header.command = UV_NET_ENDPOINT_INTD;
ad2->header.int_both = 1;
/*
@@ -686,6 +755,8 @@ static struct bau_payload_queue_entry * __init
uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
{
struct bau_payload_queue_entry *pqp;
+ unsigned long pa;
+ int pn;
char *cp;
pqp = (struct bau_payload_queue_entry *) kmalloc_node(
@@ -696,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
cp = (char *)pqp + 31;
pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
bau_tablesp->va_queue_first = pqp;
+ /*
+ * need the pnode of where the memory was really allocated
+ */
+ pa = uv_gpa(pqp);
+ pn = pa >> uv_nshift;
uv_write_global_mmr64(pnode,
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
- ((unsigned long)pnode <<
- UV_PAYLOADQ_PNODE_SHIFT) |
+ ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
uv_physnodeaddr(pqp));
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
uv_physnodeaddr(pqp));
@@ -715,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
/*
* Initialization of each UV blade's structures
*/
-static int __init uv_init_blade(int blade, int node, int cur_cpu)
+static int __init uv_init_blade(int blade)
{
+ int node;
int pnode;
unsigned long pa;
unsigned long apicid;
@@ -724,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
struct bau_payload_queue_entry *pqp;
struct bau_control *bau_tablesp;
+ node = blade_to_first_node(blade);
bau_tablesp = uv_table_bases_init(blade, node);
pnode = uv_blade_to_pnode(blade);
adp = uv_activation_descriptor_init(node, pnode);
pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
- uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+ uv_table_bases_finish(blade, bau_tablesp, adp);
/*
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
*/
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+ apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
if ((pa & 0xff) != UV_BAU_MESSAGE) {
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -748,9 +825,7 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
static int __init uv_bau_init(void)
{
int blade;
- int node;
int nblades;
- int last_blade;
int cur_cpu;
if (!is_uv_system())
@@ -763,29 +838,21 @@ static int __init uv_bau_init(void)
uv_bau_retry_limit = 1;
uv_nshift = uv_hub_info->n_val;
uv_mmask = (1UL << uv_hub_info->n_val) - 1;
- nblades = 0;
- last_blade = -1;
- cur_cpu = 0;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- nblades++;
- }
+ nblades = uv_num_possible_blades();
+
uv_bau_table_bases = (struct bau_control **)
kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
BUG_ON(!uv_bau_table_bases);
- last_blade = -1;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- uv_init_blade(blade, node, cur_cpu);
- cur_cpu += uv_blade_nr_possible_cpus(blade);
- }
+ uv_partition_base_pnode = 0x7fffffff;
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade) &&
+ (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+ uv_partition_base_pnode = uv_blade_to_pnode(blade);
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade))
+ uv_init_blade(blade);
+
alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7a567ebe636..d57de05dc43 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc;
* code, which is necessary to support wrapping clocksources like pm
* timer.
*/
-static cycle_t read_tsc(void)
+static cycle_t read_tsc(struct clocksource *cs)
{
cycle_t ret = (cycle_t)get_cycles();
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 67f9b9dbf80..36afb98675a 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -21,6 +21,7 @@
#include <linux/sysdev.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
struct kobject *sgi_uv_kobj;
@@ -47,6 +48,9 @@ static int __init sgi_uv_sysfs_init(void)
{
unsigned long ret;
+ if (!is_uv_system())
+ return -ENODEV;
+
if (!sgi_uv_kobj)
sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
if (!sgi_uv_kobj) {
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 2ffb6c53326..583f11d5c48 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -29,7 +29,7 @@
#define RTC_NAME "sgi_rtc"
-static cycle_t uv_read_rtc(void);
+static cycle_t uv_read_rtc(struct clocksource *cs);
static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
static void uv_rtc_timer_setup(enum clock_event_mode,
struct clock_event_device *);
@@ -123,7 +123,7 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
+ return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode));
}
/*
@@ -256,7 +256,7 @@ static int uv_rtc_unset_timer(int cpu)
spin_lock_irqsave(&head->lock, flags);
- if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
+ if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t)
rc = 1;
*t = ULLONG_MAX;
@@ -278,7 +278,7 @@ static int uv_rtc_unset_timer(int cpu)
/*
* Read the RTC.
*/
-static cycle_t uv_read_rtc(void)
+static cycle_t uv_read_rtc(struct clocksource *cs)
{
return (cycle_t)uv_read_local_mmr(UVH_RTC);
}
@@ -291,7 +291,7 @@ static int uv_rtc_next_event(unsigned long delta,
{
int ced_cpu = cpumask_first(ced->cpumask);
- return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
}
/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index d303369a7ba..2b3eb82efee 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void)
/** vmi clocksource */
static struct clocksource clocksource_vmi;
-static cycle_t read_real_cycles(void)
+static cycle_t read_real_cycles(struct clocksource *cs)
{
cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
return max(ret, clocksource_vmi.cycle_last);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 2b54fe002e9..c5ee17e8c6d 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -89,7 +89,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_USEDFPU) {
/*
* Start with clearing the user buffer. This will present a
@@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf)
return -1;
}
+ clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
@@ -324,7 +326,7 @@ void __ref xsave_cntxt_init(void)
}
/*
- * for now OS knows only about FP/SSE
+ * Support only the state known to OS.
*/
pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
xsave_init();