summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/amd_iommu.c54
-rw-r--r--arch/x86/kernel/amd_iommu_init.c7
-rw-r--r--arch/x86/kernel/apic.c127
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/bios_uv.c58
-rw-r--r--arch/x86/kernel/check.c161
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c58
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c346
-rw-r--r--arch/x86/kernel/cpu/vmware.c112
-rw-r--r--arch/x86/kernel/crash.c70
-rw-r--r--arch/x86/kernel/ds.c9
-rw-r--r--arch/x86/kernel/dumpstack.c319
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c302
-rw-r--r--arch/x86/kernel/dumpstack_64.c282
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/early_printk.c47
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/es7000_32.c62
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c111
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c3
-rw-r--r--arch/x86/kernel/irq_64.c24
-rw-r--r--arch/x86/kernel/machine_kexec_32.c104
-rw-r--r--arch/x86/kernel/microcode_amd.c232
-rw-r--r--arch/x86/kernel/microcode_core.c25
-rw-r--r--arch/x86/kernel/microcode_intel.c8
-rw-r--r--arch/x86/kernel/mpparse.c24
-rw-r--r--arch/x86/kernel/nmi.c58
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c6
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/ptrace.c9
-rw-r--r--arch/x86/kernel/reboot.c126
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S115
-rw-r--r--arch/x86/kernel/setup.c179
-rw-r--r--arch/x86/kernel/sigframe.h42
-rw-r--r--arch/x86/kernel/signal.c (renamed from arch/x86/kernel/signal_32.c)567
-rw-r--r--arch/x86/kernel/signal_64.c516
-rw-r--r--arch/x86/kernel/smp.c13
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_32.c11
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline.c19
-rw-r--r--arch/x86/kernel/traps.c34
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/vmi_32.c16
64 files changed, 2121 insertions, 2351 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828..1f208aaee78 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
endif
#
@@ -23,9 +24,9 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
CFLAGS_tsc.o := $(nostackp)
-obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
+obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y += time_$(BITS).o ioport.o ldt.o
+obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -105,6 +106,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd3..65d0b72777e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void)
disable_acpi();
}
}
+
+ /*
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic)
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
+ "information\n");
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) "
+ "configuration information\n");
#endif
return;
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e4899e0e878..0a60d60ed03 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
+ if (!ret)
+ iommu->need_sync = 1;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
- iommu->need_sync = 0;
-
spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->need_sync)
+ goto out;
+
+ iommu->need_sync = 0;
+
ret = __iommu_queue_command(iommu, &cmd);
if (ret)
@@ -230,8 +235,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
- if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
- printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
+ if (unlikely(i == EXIT_LOOP_COUNT))
+ panic("AMD IOMMU: Completion wait loop failed\n");
+
out:
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -254,8 +260,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -281,8 +285,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -343,7 +345,7 @@ static int iommu_map(struct protection_domain *dom,
u64 __pte, *pte, *page;
bus_addr = PAGE_ALIGN(bus_addr);
- phys_addr = PAGE_ALIGN(bus_addr);
+ phys_addr = PAGE_ALIGN(phys_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
@@ -599,7 +601,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
continue;
p2 = IOMMU_PTE_PAGE(p1[i]);
- for (j = 0; j < 512; ++i) {
+ for (j = 0; j < 512; ++j) {
if (!IOMMU_PTE_PRESENT(p2[j]))
continue;
p3 = IOMMU_PTE_PAGE(p2[j]);
@@ -762,8 +764,6 @@ static void set_device_domain(struct amd_iommu *iommu,
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
iommu_queue_inv_dev_entry(iommu, devid);
-
- iommu->need_sync = 1;
}
/*****************************************************************************
@@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev,
print_devid(_bdf, 1);
}
+ if (domain_for_device(_bdf) == NULL)
+ set_device_domain(*iommu, *domain, _bdf);
+
return 1;
}
@@ -908,7 +911,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
if (address >= dom->aperture_size)
return;
- WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+ WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
@@ -920,8 +923,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
/*
* This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is uses by all
- * mapping functions provided by this IOMMU driver.
+ * contiguous memory region into DMA address space. It is used by all
+ * mapping functions provided with this IOMMU driver.
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev,
@@ -981,7 +984,8 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_addr_t i, start;
unsigned int pages;
- if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+ if ((dma_addr == bad_dma_address) ||
+ (dma_addr + size > dma_dom->aperture_size))
return;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -1031,8 +1035,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
if (addr == bad_dma_address)
goto out;
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1060,8 +1063,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1127,8 +1129,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1173,8 +1174,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
s->dma_address = s->dma_length = 0;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1225,8 +1225,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
goto out;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1257,8 +1256,7 @@ static void free_coherent(struct device *dev, size_t size,
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 30ae2701b3d..c6cc22815d3 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -427,6 +427,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
+ /* set head and tail to zero manually */
+ writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
return cmd_buf;
@@ -1074,7 +1078,8 @@ int __init amd_iommu_init(void)
goto free;
/* IOMMU rlookup table - find the IOMMU for a specific device */
- amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
+ amd_iommu_rlookup_table = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO,
get_order(rlookup_table_size));
if (amd_iommu_rlookup_table == NULL)
goto free;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b52..20c6e12c047 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -441,6 +441,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
+ apic_write(APIC_TMICT, 0xffffffff);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -559,13 +560,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
} else {
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- printk(KERN_WARNING "APIC calibration not consistent "
+ pr_warning("APIC calibration not consistent "
"with PM Timer: %ldms instead of 100ms\n",
(long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
do_div(res, deltapm);
- printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+ pr_info("APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long)res, *delta);
*delta = (long)res;
}
@@ -645,8 +646,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (calibration_result < (1000000 / HZ)) {
local_irq_enable();
- printk(KERN_WARNING
- "APIC frequency too slow, disabling apic timer\n");
+ pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
}
@@ -672,13 +672,9 @@ static int __init calibrate_APIC_clock(void)
while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
cpu_relax();
- local_irq_disable();
-
/* Stop the lapic timer */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
- local_irq_enable();
-
/* Jiffies delta */
deltaj = lapic_cal_j2 - lapic_cal_j1;
apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
@@ -692,8 +688,7 @@ static int __init calibrate_APIC_clock(void)
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- printk(KERN_WARNING
- "APIC timer disabled due to verification failure.\n");
+ pr_warning("APIC timer disabled due to verification failure.\n");
return -1;
}
@@ -714,7 +709,7 @@ void __init setup_boot_APIC_clock(void)
* broadcast mechanism is used. On UP systems simply ignore it.
*/
if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
+ pr_info("Disabling APIC timer\n");
/* No broadcast on UP ! */
if (num_possible_cpus() > 1) {
lapic_clockevent.mult = 1;
@@ -741,7 +736,7 @@ void __init setup_boot_APIC_clock(void)
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
else
- printk(KERN_WARNING "APIC timer registered as dummy,"
+ pr_warning("APIC timer registered as dummy,"
" due to nmi_watchdog=%d!\n", nmi_watchdog);
/* Setup the lapic or request the broadcast */
@@ -773,8 +768,7 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- printk(KERN_WARNING
- "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
/* Switch it off */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
return;
@@ -814,9 +808,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
-#ifdef CONFIG_X86_64
exit_idle();
-#endif
irq_enter();
local_apic_timer_interrupt();
irq_exit();
@@ -1093,7 +1085,7 @@ static void __cpuinit lapic_setup_esr(void)
unsigned int oldvalue, value, maxlvt;
if (!lapic_is_integrated()) {
- printk(KERN_INFO "No ESR for 82489DX.\n");
+ pr_info("No ESR for 82489DX.\n");
return;
}
@@ -1104,7 +1096,7 @@ static void __cpuinit lapic_setup_esr(void)
* ESR disabled - we can't do anything useful with the
* errors anyway - mbligh
*/
- printk(KERN_INFO "Leaving ESR disabled.\n");
+ pr_info("Leaving ESR disabled.\n");
return;
}
@@ -1298,7 +1290,7 @@ void check_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (msr & X2APIC_ENABLE) {
- printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
apic_ops = &x2apic_ops;
}
@@ -1310,7 +1302,7 @@ void enable_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
- printk("Enabling x2apic\n");
+ pr_info("Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
@@ -1325,9 +1317,8 @@ void __init enable_IR_x2apic(void)
return;
if (!x2apic_preenabled && disable_x2apic) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of nox2apic\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
return;
}
@@ -1335,22 +1326,19 @@ void __init enable_IR_x2apic(void)
panic("Bios already enabled x2apic, can't enforce nox2apic");
if (!x2apic_preenabled && skip_ioapic_setup) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of skipping io-apic setup\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
return;
}
ret = dmar_table_init();
if (ret) {
- printk(KERN_INFO
- "dmar_table_init() failed with %d:\n", ret);
+ pr_info("dmar_table_init() failed with %d:\n", ret);
if (x2apic_preenabled)
panic("x2apic enabled by bios. But IR enabling failed");
else
- printk(KERN_INFO
- "Not enabling x2apic,Intr-remapping\n");
+ pr_info("Not enabling x2apic,Intr-remapping\n");
return;
}
@@ -1359,7 +1347,7 @@ void __init enable_IR_x2apic(void)
ret = save_mask_IO_APIC_setup();
if (ret) {
- printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+ pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
}
@@ -1394,14 +1382,11 @@ end:
if (!ret) {
if (!x2apic_preenabled)
- printk(KERN_INFO
- "Enabled x2apic and interrupt-remapping\n");
+ pr_info("Enabled x2apic and interrupt-remapping\n");
else
- printk(KERN_INFO
- "Enabled Interrupt-remapping\n");
+ pr_info("Enabled Interrupt-remapping\n");
} else
- printk(KERN_ERR
- "Failed to enable Interrupt-remapping and x2apic\n");
+ pr_err("Failed to enable Interrupt-remapping and x2apic\n");
#else
if (!cpu_has_x2apic)
return;
@@ -1410,8 +1395,8 @@ end:
panic("x2apic enabled prior OS handover,"
" enable CONFIG_INTR_REMAP");
- printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
- " and x2apic\n");
+ pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
#endif
return;
@@ -1428,7 +1413,7 @@ end:
static int __init detect_init_APIC(void)
{
if (!cpu_has_apic) {
- printk(KERN_INFO "No local APIC present\n");
+ pr_info("No local APIC present\n");
return -1;
}
@@ -1469,8 +1454,8 @@ static int __init detect_init_APIC(void)
* "lapic" specified.
*/
if (!force_enable_local_apic) {
- printk(KERN_INFO "Local APIC disabled by BIOS -- "
- "you can enable it with \"lapic\"\n");
+ pr_info("Local APIC disabled by BIOS -- "
+ "you can enable it with \"lapic\"\n");
return -1;
}
/*
@@ -1480,8 +1465,7 @@ static int __init detect_init_APIC(void)
*/
rdmsr(MSR_IA32_APICBASE, l, h);
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- printk(KERN_INFO
- "Local APIC disabled by BIOS -- reenabling.\n");
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
wrmsr(MSR_IA32_APICBASE, l, h);
@@ -1494,7 +1478,7 @@ static int __init detect_init_APIC(void)
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- printk(KERN_WARNING "Could not enable APIC!\n");
+ pr_warning("Could not enable APIC!\n");
return -1;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1505,14 +1489,14 @@ static int __init detect_init_APIC(void)
if (l & MSR_IA32_APICBASE_ENABLE)
mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
- printk(KERN_INFO "Found and enabled local APIC!\n");
+ pr_info("Found and enabled local APIC!\n");
apic_pm_activate();
return 0;
no_apic:
- printk(KERN_INFO "No local APIC present or hardware disabled\n");
+ pr_info("No local APIC present or hardware disabled\n");
return -1;
}
#endif
@@ -1588,12 +1572,12 @@ int __init APIC_init_uniprocessor(void)
{
#ifdef CONFIG_X86_64
if (disable_apic) {
- printk(KERN_INFO "Apic disabled\n");
+ pr_info("Apic disabled\n");
return -1;
}
if (!cpu_has_apic) {
disable_apic = 1;
- printk(KERN_INFO "Apic disabled by BIOS\n");
+ pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
@@ -1605,8 +1589,8 @@ int __init APIC_init_uniprocessor(void)
*/
if (!cpu_has_apic &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
- boot_cpu_physical_apicid);
+ pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+ boot_cpu_physical_apicid);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
@@ -1682,9 +1666,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
{
u32 v;
-#ifdef CONFIG_X86_64
exit_idle();
-#endif
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
@@ -1699,8 +1681,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
add_pda(irq_spurious_count, 1);
#else
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
- "should never happen.\n", smp_processor_id());
+ pr_info("spurious APIC interrupt on CPU#%d, "
+ "should never happen.\n", smp_processor_id());
__get_cpu_var(irq_stat).irq_spurious_count++;
#endif
irq_exit();
@@ -1713,9 +1695,7 @@ void smp_error_interrupt(struct pt_regs *regs)
{
u32 v, v1;
-#ifdef CONFIG_X86_64
exit_idle();
-#endif
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
@@ -1724,17 +1704,18 @@ void smp_error_interrupt(struct pt_regs *regs)
ack_APIC_irq();
atomic_inc(&irq_err_count);
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+ /*
+ * Here is what the APIC error bits mean:
+ * 0: Send CS error
+ * 1: Receive CS error
+ * 2: Send accept error
+ * 3: Receive accept error
+ * 4: Reserved
+ * 5: Send illegal vector
+ * 6: Received illegal vector
+ * 7: Illegal register address
+ */
+ pr_debug("APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
@@ -1838,15 +1819,15 @@ void __cpuinit generic_processor_info(int apicid, int version)
* Validate version
*/
if (version == 0x0) {
- printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
+ pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ version);
version = 0x10;
}
apic_version[apicid] = version;
if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ pr_warning("WARNING: NR_CPUS limit of %i reached."
" Processor ignored.\n", NR_CPUS);
return;
}
@@ -2209,7 +2190,7 @@ static int __init apic_set_verbosity(char *arg)
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+ pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bb..3a26525a3f3 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
#else
static int power_off = 1;
#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
-#else
static int realmode_power_off;
-#endif
#ifdef CONFIG_APM_ALLOW_INTS
static int allow_ints = 1;
#else
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6649d09ad88..ee4df08feee 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -11,7 +11,7 @@
#include <linux/suspend.h>
#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include "sigframe.h"
+#include <asm/sigframe.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 7fcf63d22f8..1d41d3f1edb 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,6 +20,8 @@
#include <xen/interface/xen.h>
+#include <asm/sigframe.h>
+
#define __NO_STUBS 1
#undef __SYSCALL
#undef _ASM_X86_UNISTD_64_H
@@ -87,7 +89,7 @@ int main(void)
BLANK();
#undef ENTRY
DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe32, uc.uc_mcontext));
+ offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
BLANK();
#endif
DEFINE(pbe_address, offsetof(struct pbe, address));
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f0dfe6f17e7..2a0a2a3cac2 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
long sn_partition_id;
EXPORT_SYMBOL_GPL(sn_partition_id);
-long uv_coherency_id;
-EXPORT_SYMBOL_GPL(uv_coherency_id);
-long uv_region_size;
-EXPORT_SYMBOL_GPL(uv_region_size);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL_GPL(sn_region_size);
int uv_type;
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
return ret;
}
+int
+uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
+ unsigned long *intr_mmr_offset)
+{
+ union uv_watchlist_u size_blade;
+ u64 watchlist;
+ s64 ret;
+
+ size_blade.size = mq_size;
+ size_blade.blade = blade;
+
+ /*
+ * bios returns watchlist number or negative error number.
+ */
+ ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
+ size_blade.val, (u64)intr_mmr_offset,
+ (u64)&watchlist, 0);
+ if (ret < BIOS_STATUS_SUCCESS)
+ return ret;
+
+ return watchlist;
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
+
+int
+uv_bios_mq_watchlist_free(int blade, int watchlist_num)
+{
+ return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
+ blade, watchlist_num, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
+
+s64
+uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
+{
+ return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
+ perms, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
+
+s64
+uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+ s64 ret;
+
+ ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+ (u64)addr, buf, (u64)len, 0);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
{
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
new file mode 100644
index 00000000000..2ac0ab71412
--- /dev/null
+++ b/arch/x86/kernel/check.c
@@ -0,0 +1,161 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <asm/e820.h>
+#include <asm/proto.h>
+
+/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable. Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#define MAX_SCAN_AREAS 8
+
+static int __read_mostly memory_corruption_check = -1;
+
+static unsigned __read_mostly corruption_check_size = 64*1024;
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
+
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+
+static __init int set_corruption_check(char *arg)
+{
+ char *end;
+
+ memory_corruption_check = simple_strtol(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check", set_corruption_check);
+
+static __init int set_corruption_check_period(char *arg)
+{
+ char *end;
+
+ corruption_check_period = simple_strtoul(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_period", set_corruption_check_period);
+
+static __init int set_corruption_check_size(char *arg)
+{
+ char *end;
+ unsigned size;
+
+ size = memparse(arg, &end);
+
+ if (*end == '\0')
+ corruption_check_size = size;
+
+ return (size == corruption_check_size) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_size", set_corruption_check_size);
+
+
+void __init setup_bios_corruption_check(void)
+{
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
+
+ if (memory_corruption_check == -1) {
+ memory_corruption_check =
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+ 1
+#else
+ 0
+#endif
+ ;
+ }
+
+ if (corruption_check_size == 0)
+ memory_corruption_check = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
+
+ while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+ u64 size;
+ addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+ if (addr == 0)
+ break;
+
+ if ((addr + size) > corruption_check_size)
+ size = corruption_check_size - addr;
+
+ if (size == 0)
+ break;
+
+ e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+ scan_areas[num_scan_areas].addr = addr;
+ scan_areas[num_scan_areas].size = size;
+ num_scan_areas++;
+
+ /* Assume we've already mapped this early memory */
+ memset(__va(addr), 0, size);
+
+ addr += size;
+ }
+
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
+ num_scan_areas);
+ update_e820();
+}
+
+
+void check_for_bios_corruption(void)
+{
+ int i;
+ int corruption = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ for (i = 0; i < num_scan_areas; i++) {
+ unsigned long *addr = __va(scan_areas[i].addr);
+ unsigned long size = scan_areas[i].size;
+
+ for (; size; addr++, size -= sizeof(unsigned long)) {
+ if (!*addr)
+ continue;
+ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+ addr, __pa(addr), *addr);
+ corruption = 1;
+ *addr = 0;
+ }
+ }
+
+ WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n");
+}
+
+static void check_corruption(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(bios_check_work, check_corruption);
+
+static void check_corruption(struct work_struct *dummy)
+{
+ check_for_bios_corruption();
+ schedule_delayed_work(&bios_check_work,
+ round_jiffies_relative(corruption_check_period*HZ));
+}
+
+static int start_periodic_check_for_corruption(void)
+{
+ if (!memory_corruption_check || corruption_check_period == 0)
+ return 0;
+
+ printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
+ corruption_check_period);
+
+ /* First time we run the checks right away */
+ schedule_delayed_work(&bios_check_work, 0);
+ return 0;
+}
+
+module_init(start_periodic_check_for_corruption);
+
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec6075c05..a5c04e88777 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -4,6 +4,7 @@
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
+obj-y += vmware.o hypervisor.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aba49c782fd..42e0853030c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -36,6 +36,7 @@
#include <asm/proto.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>
#include "cpu.h"
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
detect_ht(c);
#endif
+ init_hypervisor(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 00000000000..fb5b86af0b0
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,58 @@
+/*
+ * Common hypervisor code
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/vmware.h>
+#include <asm/hypervisor.h>
+
+static inline void __cpuinit
+detect_hypervisor_vendor(struct cpuinfo_x86 *c)
+{
+ if (vmware_platform()) {
+ c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
+ } else {
+ c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
+ }
+}
+
+unsigned long get_hypervisor_tsc_freq(void)
+{
+ if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
+ return vmware_get_tsc_khz();
+ return 0;
+}
+
+static inline void __cpuinit
+hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
+{
+ if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
+ vmware_set_feature_bits(c);
+ return;
+ }
+}
+
+void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
+{
+ detect_hypervisor_vendor(c);
+ hypervisor_set_feature_bits(c);
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d5..816f27f289b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P4);
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
+#endif
if (cpu_has_bts)
ptrace_bts_init_intel(c);
-#endif
-
detect_extended_topology(c);
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 4b031a4ac85..1c838032fd3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -510,12 +510,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
{
- static cpumask_t mce_cpus = CPU_MASK_NONE;
-
mce_cpu_quirks(c);
if (mce_dont_init ||
- cpu_test_and_set(smp_processor_id(), mce_cpus) ||
!mce_available(c))
return;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea..1159e269e59 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
}
static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
#ifdef CONFIG_MTRR_SANITIZER
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
#define PSHIFT (PAGE_SHIFT - 10)
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
-static struct res_range __initdata range_new[RANGE_NUM];
static unsigned long __initdata min_loss_pfn[RANGE_NUM];
-static int __init mtrr_cleanup(unsigned address_bits)
+static void __init print_out_mtrr_range_state(void)
{
- unsigned long extra_remove_base, extra_remove_size;
- unsigned long base, size, def, dummy;
- mtrr_type type;
- int nr_range, nr_range_new;
- u64 chunk_size, gran_size;
- unsigned long range_sums, range_sums_new;
- int index_good;
- int num_reg_good;
int i;
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+ mtrr_type type;
- /* extra one for all 0 */
- int num[MTRR_NUM_TYPES + 1];
+ for (i = 0; i < num_var_ranges; i++) {
- if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
- return 0;
- rdmsr(MTRRdefType_MSR, def, dummy);
- def &= 0xff;
- if (def != MTRR_TYPE_UNCACHABLE)
- return 0;
+ size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+ if (!size_base)
+ continue;
- /* get it and store it aside */
- memset(range_state, 0, sizeof(range_state));
- for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i, &base, &size, &type);
- range_state[i].base_pfn = base;
- range_state[i].size_pfn = size;
- range_state[i].type = type;
+ size_base = to_size_factor(size_base, &size_factor),
+ start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+ start_base = to_size_factor(start_base, &start_factor),
+ type = range_state[i].type;
+
+ printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ i, start_base, start_factor,
+ size_base, size_factor,
+ (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+ ((type == MTRR_TYPE_WRPROT) ? "WP" :
+ ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+ );
}
+}
+
+static int __init mtrr_need_cleanup(void)
+{
+ int i;
+ mtrr_type type;
+ unsigned long size;
+ /* extra one for all 0 */
+ int num[MTRR_NUM_TYPES + 1];
/* check entries number */
memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
- /* print original var MTRRs at first, for debugging: */
- printk(KERN_DEBUG "original variable MTRRs\n");
- for (i = 0; i < num_var_ranges; i++) {
- char start_factor = 'K', size_factor = 'K';
- unsigned long start_base, size_base;
+ return 1;
+}
- size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
- if (!size_base)
- continue;
+static unsigned long __initdata range_sums;
+static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+ unsigned long extra_remove_base,
+ unsigned long extra_remove_size,
+ int i)
+{
+ int num_reg;
+ static struct res_range range_new[RANGE_NUM];
+ static int nr_range_new;
+ unsigned long range_sums_new;
+
+ /* convert ranges to var ranges state */
+ num_reg = x86_setup_var_mtrrs(range, nr_range,
+ chunk_size, gran_size);
+
+ /* we got new setting in range_state, check it */
+ memset(range_new, 0, sizeof(range_new));
+ nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ extra_remove_base, extra_remove_size);
+ range_sums_new = sum_ranges(range_new, nr_range_new);
+
+ result[i].chunk_sizek = chunk_size >> 10;
+ result[i].gran_sizek = gran_size >> 10;
+ result[i].num_reg = num_reg;
+ if (range_sums < range_sums_new) {
+ result[i].lose_cover_sizek =
+ (range_sums_new - range_sums) << PSHIFT;
+ result[i].bad = 1;
+ } else
+ result[i].lose_cover_sizek =
+ (range_sums - range_sums_new) << PSHIFT;
- size_base = to_size_factor(size_base, &size_factor),
- start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
- start_base = to_size_factor(start_base, &start_factor),
- type = range_state[i].type;
+ /* double check it */
+ if (!result[i].bad && !result[i].lose_cover_sizek) {
+ if (nr_range_new != nr_range ||
+ memcmp(range, range_new, sizeof(range)))
+ result[i].bad = 1;
+ }
- printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
- i, start_base, start_factor,
- size_base, size_factor,
- (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
- ((type == MTRR_TYPE_WRPROT) ? "WP" :
- ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
- );
+ if (!result[i].bad && (range_sums - range_sums_new <
+ min_loss_pfn[num_reg])) {
+ min_loss_pfn[num_reg] =
+ range_sums - range_sums_new;
}
+}
+
+static void __init mtrr_print_out_one_result(int i)
+{
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad ? "*BAD*" : " ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad ? "-" : "",
+ lose_base, lose_factor);
+}
+
+static int __init mtrr_search_optimal_index(void)
+{
+ int i;
+ int num_reg_good;
+ int index_good;
+
+ if (nr_mtrr_spare_reg >= num_var_ranges)
+ nr_mtrr_spare_reg = num_var_ranges - 1;
+ num_reg_good = -1;
+ for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+ if (!min_loss_pfn[i])
+ num_reg_good = i;
+ }
+
+ index_good = -1;
+ if (num_reg_good != -1) {
+ for (i = 0; i < NUM_RESULT; i++) {
+ if (!result[i].bad &&
+ result[i].num_reg == num_reg_good &&
+ !result[i].lose_cover_sizek) {
+ index_good = i;
+ break;
+ }
+ }
+ }
+
+ return index_good;
+}
+
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+ unsigned long extra_remove_base, extra_remove_size;
+ unsigned long base, size, def, dummy;
+ mtrr_type type;
+ u64 chunk_size, gran_size;
+ int index_good;
+ int i;
+
+ if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+ return 0;
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (def != MTRR_TYPE_UNCACHABLE)
+ return 0;
+
+ /* get it and store it aside */
+ memset(range_state, 0, sizeof(range_state));
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ range_state[i].base_pfn = base;
+ range_state[i].size_pfn = size;
+ range_state[i].type = type;
+ }
+
+ /* check if we need handle it and can handle it */
+ if (!mtrr_need_cleanup())
+ return 0;
+
+ /* print original var MTRRs at first, for debugging: */
+ printk(KERN_DEBUG "original variable MTRRs\n");
+ print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
- int num_reg;
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
- debug_print++;
- /* convert ranges to var ranges state */
- num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
- mtrr_gran_size);
+ i = 0;
+ mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+ extra_remove_base, extra_remove_size, i);
- /* we got new setting in range_state, check it */
- memset(range_new, 0, sizeof(range_new));
- nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
- extra_remove_base,
- extra_remove_size);
- range_sums_new = sum_ranges(range_new, nr_range_new);
+ mtrr_print_out_one_result(i);
- i = 0;
- result[i].chunk_sizek = mtrr_chunk_size >> 10;
- result[i].gran_sizek = mtrr_gran_size >> 10;
- result[i].num_reg = num_reg;
- if (range_sums < range_sums_new) {
- result[i].lose_cover_sizek =
- (range_sums_new - range_sums) << PSHIFT;
- result[i].bad = 1;
- } else
- result[i].lose_cover_sizek =
- (range_sums - range_sums_new) << PSHIFT;
-
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
"will find optimal one\n");
- debug_print--;
- memset(result, 0, sizeof(result[0]));
}
i = 0;
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
memset(result, 0, sizeof(result));
for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
- char gran_factor;
- unsigned long gran_base;
-
- if (debug_print)
- gran_base = to_size_factor(gran_size >> 10, &gran_factor);
for (chunk_size = gran_size; chunk_size < (1ULL<<32);
chunk_size <<= 1) {
- int num_reg;
- if (debug_print) {
- char chunk_factor;
- unsigned long chunk_base;
-
- chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
- printk(KERN_INFO "\n");
- printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
- gran_base, gran_factor, chunk_base, chunk_factor);
- }
if (i >= NUM_RESULT)
continue;
- /* convert ranges to var ranges state */
- num_reg = x86_setup_var_mtrrs(range, nr_range,
- chunk_size, gran_size);
-
- /* we got new setting in range_state, check it */
- memset(range_new, 0, sizeof(range_new));
- nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
- extra_remove_base, extra_remove_size);
- range_sums_new = sum_ranges(range_new, nr_range_new);
-
- result[i].chunk_sizek = chunk_size >> 10;
- result[i].gran_sizek = gran_size >> 10;
- result[i].num_reg = num_reg;
- if (range_sums < range_sums_new) {
- result[i].lose_cover_sizek =
- (range_sums_new - range_sums) << PSHIFT;
- result[i].bad = 1;
- } else
- result[i].lose_cover_sizek =
- (range_sums - range_sums_new) << PSHIFT;
-
- /* double check it */
- if (!result[i].bad && !result[i].lose_cover_sizek) {
- if (nr_range_new != nr_range ||
- memcmp(range, range_new, sizeof(range)))
- result[i].bad = 1;
+ mtrr_calc_range_state(chunk_size, gran_size,
+ extra_remove_base, extra_remove_size, i);
+ if (debug_print) {
+ mtrr_print_out_one_result(i);
+ printk(KERN_INFO "\n");
}
- if (!result[i].bad && (range_sums - range_sums_new <
- min_loss_pfn[num_reg])) {
- min_loss_pfn[num_reg] =
- range_sums - range_sums_new;
- }
i++;
}
}
- /* print out all */
- for (i = 0; i < NUM_RESULT; i++) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
- }
-
/* try to find the optimal index */
- if (nr_mtrr_spare_reg >= num_var_ranges)
- nr_mtrr_spare_reg = num_var_ranges - 1;
- num_reg_good = -1;
- for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
- if (!min_loss_pfn[i])
- num_reg_good = i;
- }
-
- index_good = -1;
- if (num_reg_good != -1) {
- for (i = 0; i < NUM_RESULT; i++) {
- if (!result[i].bad &&
- result[i].num_reg == num_reg_good &&
- !result[i].lose_cover_sizek) {
- index_good = i;
- break;
- }
- }
- }
+ index_good = mtrr_search_optimal_index();
if (index_good != -1) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
i = index_good;
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
- result[i].num_reg, lose_base, lose_factor);
+ mtrr_print_out_one_result(i);
+
/* convert ranges to var ranges state */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
gran_size <<= 10;
- debug_print++;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
- debug_print--;
set_var_mtrr_all(address_bits);
+ printk(KERN_DEBUG "New variable MTRRs\n");
+ print_out_mtrr_range_state();
return 1;
+ } else {
+ /* print out all */
+ for (i = 0; i < NUM_RESULT; i++)
+ mtrr_print_out_one_result(i);
}
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
- int nr_range;
u64 total_trim_size;
/* extra one for all 0 */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 00000000000..284c399e323
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,112 @@
+/*
+ * VMware Detection code.
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/dmi.h>
+#include <asm/div64.h>
+#include <asm/vmware.h>
+
+#define CPUID_VMWARE_INFO_LEAF 0x40000000
+#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
+#define VMWARE_HYPERVISOR_PORT 0x5658
+
+#define VMWARE_PORT_CMD_GETVERSION 10
+#define VMWARE_PORT_CMD_GETHZ 45
+
+#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
+ __asm__("inl (%%dx)" : \
+ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
+ "0"(VMWARE_HYPERVISOR_MAGIC), \
+ "1"(VMWARE_PORT_CMD_##cmd), \
+ "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \
+ "memory");
+
+static inline int __vmware_platform(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
+ return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+}
+
+static unsigned long __vmware_get_tsc_khz(void)
+{
+ uint64_t tsc_hz;
+ uint32_t eax, ebx, ecx, edx;
+
+ VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+
+ if (ebx == UINT_MAX)
+ return 0;
+ tsc_hz = eax | (((uint64_t)ebx) << 32);
+ do_div(tsc_hz, 1000);
+ BUG_ON(tsc_hz >> 32);
+ return tsc_hz;
+}
+
+/*
+ * While checking the dmi string infomation, just checking the product
+ * serial key should be enough, as this will always have a VMware
+ * specific string when running under VMware hypervisor.
+ */
+int vmware_platform(void)
+{
+ if (cpu_has_hypervisor) {
+ unsigned int eax, ebx, ecx, edx;
+ char hyper_vendor_id[13];
+
+ cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+ memcpy(hyper_vendor_id + 0, &ebx, 4);
+ memcpy(hyper_vendor_id + 4, &ecx, 4);
+ memcpy(hyper_vendor_id + 8, &edx, 4);
+ hyper_vendor_id[12] = '\0';
+ if (!strcmp(hyper_vendor_id, "VMwareVMware"))
+ return 1;
+ } else if (dmi_available && dmi_name_in_serial("VMware") &&
+ __vmware_platform())
+ return 1;
+
+ return 0;
+}
+
+unsigned long vmware_get_tsc_khz(void)
+{
+ BUG_ON(!vmware_platform());
+ return __vmware_get_tsc_khz();
+}
+
+/*
+ * VMware hypervisor takes care of exporting a reliable TSC to the guest.
+ * Still, due to timing difference when running on virtual cpus, the TSC can
+ * be marked as unstable in some cases. For example, the TSC sync check at
+ * bootup can fail due to a marginal offset between vcpus' TSCs (though the
+ * TSCs do not drift from each other). Also, the ACPI PM timer clocksource
+ * is not suitable as a watchdog when running on a hypervisor because the
+ * kernel may miss a wrap of the counter if the vcpu is descheduled for a
+ * long time. To skip these checks at runtime we set these capability bits,
+ * so that the kernel could just trust the hypervisor with providing a
+ * reliable virtual TSC that is suitable for timekeeping.
+ */
+void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
+{
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 26855381790..d84a852e4cd 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,34 +29,17 @@
#include <mach_ipi.h>
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
-static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct notifier_block *self,
- unsigned long val, void *data)
+static void kdump_nmi_callback(int cpu, struct die_args *args)
{
struct pt_regs *regs;
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
#endif
- int cpu;
- if (val != DIE_NMI_IPI)
- return NOTIFY_OK;
-
- regs = ((struct die_args *)data)->regs;
- cpu = raw_smp_processor_id();
-
- /* Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return NOTIFY_STOP;
- local_irq_disable();
+ regs = args->regs;
#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
@@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self,
}
#endif
crash_save_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- halt();
- for (;;)
- cpu_relax();
-
- return 1;
-}
-static void smp_send_nmi_allbutself(void)
-{
- cpumask_t mask = cpu_online_map;
- cpu_clear(safe_smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, NMI_VECTOR);
+ disable_local_APIC();
}
-static struct notifier_block crash_nmi_nb = {
- .notifier_call = crash_nmi_callback,
-};
-
-static void nmi_shootdown_cpus(void)
+static void kdump_nmi_shootdown_cpus(void)
{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- if (register_die_notifier(&crash_nmi_nb))
- return; /* return what? */
- /* Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
+ nmi_shootdown_cpus(kdump_nmi_callback);
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
-
- /* Leave the nmi callback set */
disable_local_APIC();
}
+
#else
-static void nmi_shootdown_cpus(void)
+static void kdump_nmi_shootdown_cpus(void)
{
/* There are no cpus to shootdown */
}
@@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
/* The kernel is broken so disable interrupts */
local_irq_disable();
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = safe_smp_processor_id();
- nmi_shootdown_cpus();
+ kdump_nmi_shootdown_cpus();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index a2d1176c38e..d6938d9351c 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -847,17 +847,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
ds_configure(&ds_cfg_var);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
ds_configure(&ds_cfg_64);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 00000000000..5962176dfab
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/sysfs.h>
+
+#include <asm/stacktrace.h>
+
+#include "dumpstack.h"
+
+int panic_on_unrecovered_nmi;
+unsigned int code_bytes = 64;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+ printk(" [<%p>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
+}
+
+/*
+ * x86-64 can have up to three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + sizeof(long)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
+ }
+ return bp;
+}
+
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk("%s <%s> ", (char *)data, name);
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ touch_nmi_watchdog();
+ printk(data);
+ printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+ printk("%sCall Trace:\n", log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long bp = 0;
+ unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ get_bp(bp);
+#endif
+
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ show_trace(NULL, NULL, &stack, bp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!__raw_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ __raw_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ if (regs && kexec_should_crash(current))
+ crash_kexec(regs);
+
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ die_nest_count--;
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+ oops_exit();
+
+ if (!signr)
+ return;
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+#ifdef CONFIG_X86_32
+ unsigned short ss;
+ unsigned long sp;
+#endif
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ sysfs_printk_last_file();
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ show_registers(regs);
+#ifdef CONFIG_X86_32
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+#else
+ /* Executive summary in case the oops scrolled away */
+ printk(KERN_ALERT "RIP ");
+ printk_address(regs->ip, 1);
+ printk(" RSP <%016lx>\n", regs->sp);
+#endif
+ return 0;
+}
+
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin();
+ int sig = SIGSEGV;
+
+ if (!user_mode_vm(regs))
+ report_bug(regs->ip, regs);
+
+ if (__die(str, regs, err))
+ sig = 0;
+ oops_end(flags, regs, sig);
+}
+
+void notrace __kprobes
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+ unsigned long flags;
+
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
+
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ flags = oops_begin();
+ printk(KERN_EMERG "%s", str);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
+ show_registers(regs);
+ oops_end(flags, regs, 0);
+ if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
+ nmi_exit();
+ local_irq_enable();
+ do_exit(SIGBUS);
+}
+
+static int __init oops_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 00000000000..3119a801c32
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
+#ifndef DUMPSTACK_H
+#define DUMPSTACK_H
+
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end);
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+extern int kstack_depth_to_print;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197..7b031b106ec 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,64 +17,7 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
+#include "dumpstack.h"
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
@@ -119,57 +62,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -196,33 +89,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-
-EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
@@ -283,167 +149,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- unsigned long flags;
-
- oops_enter();
-
- if (die_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_local_irq_save(flags);
- __raw_spin_lock(&die_lock);
- die_owner = smp_processor_id();
- die_nest_count = 0;
- bust_spinlocks(1);
- } else {
- raw_local_irq_save(flags);
- }
- die_nest_count++;
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- bust_spinlocks(0);
- die_owner = -1;
- add_taint(TAINT_DIE);
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
-
- if (!regs)
- return;
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned short ss;
- unsigned long sp;
-
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss & 0xffff;
- }
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
- return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (die_nest_count < 3) {
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- } else {
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
- }
-
- oops_end(flags, regs, SIGSEGV);
-}
-
-static DEFINE_SPINLOCK(nmi_print_lock);
-
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out:
- */
- bust_spinlocks(1);
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (do_panic)
- panic("Non maskable interrupt");
- console_silent();
- spin_unlock(&nmi_print_lock);
-
- /*
- * If we are in kernel we are probably nested up pretty bad
- * and might aswell get out now while we still can:
- */
- if (!user_mode_vm(regs)) {
- current->thread.trap_no = 2;
- crash_kexec(regs);
- }
-
- bust_spinlocks(0);
- do_exit(SIGSEGV);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a..33ff10287a5 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
+#include "dumpstack.h"
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
@@ -248,57 +191,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -342,33 +235,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-EXPORT_SYMBOL(dump_stack);
-
void show_registers(struct pt_regs *regs)
{
int i;
@@ -429,147 +295,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu;
- unsigned long flags;
-
- oops_enter();
-
- /* racy, but better than risking deadlock. */
- raw_local_irq_save(flags);
- cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- __raw_spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (!die_nest_count)
- /* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
- if (!regs) {
- oops_exit();
- return;
- }
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- add_taint(TAINT_DIE);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->ip, 1);
- printk(" RSP <%016lx>\n", regs->sp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- return 0;
-}
-
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (!user_mode(regs))
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- oops_end(flags, regs, SIGSEGV);
-}
-
-notrace __kprobes void
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- flags = oops_begin();
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- oops_end(flags, NULL, SIGBUS);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263e..65a13943e09 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -677,22 +677,6 @@ struct early_res {
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
{ 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
-#endif
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
-#endif
{}
};
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 34ad997d383..23b138e31e9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = {
};
#endif
-/* Console interface to a host file on AMD's SimNow! */
-
-static int simnow_fd;
-
-enum {
- MAGIC1 = 0xBACCD00A,
- MAGIC2 = 0xCA110000,
- XOPEN = 5,
- XWRITE = 4,
-};
-
-static noinline long simnow(long cmd, long a, long b, long c)
-{
- long ret;
-
- asm volatile("cpuid" :
- "=a" (ret) :
- "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
- return ret;
-}
-
-static void __init simnow_init(char *str)
-{
- char *fn = "klog";
-
- if (*str == '=')
- fn = ++str;
- /* error ignored */
- simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
-}
-
-static void simnow_write(struct console *con, const char *s, unsigned n)
-{
- simnow(XWRITE, simnow_fd, (unsigned long)s, n);
-}
-
-static struct console simnow_console = {
- .name = "simnow",
- .write = simnow_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
static int __initdata early_console_initialized;
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf)
max_ypos = boot_params.screen_info.orig_video_lines;
current_ypos = boot_params.screen_info.orig_y;
early_console = &early_vga_console;
- } else if (!strncmp(buf, "simnow", 6)) {
- simnow_init(buf + 6);
- early_console = &simnow_console;
- keep_early = 1;
#ifdef CONFIG_EARLY_PRINTK_DBGP
} else if (!strncmp(buf, "dbgp", 4)) {
if (early_dbgp_init(buf+4) < 0)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca..f6402c4ba10 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1051,6 +1051,7 @@ ENTRY(kernel_thread_helper)
push %eax
CFI_ADJUST_CFA_OFFSET 4
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 54927784bab..42571baaca3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -255,6 +255,7 @@ ENTRY(ret_from_fork)
call schedule_tail
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ CFI_REMEMBER_STATE
jnz rff_trace
rff_action:
RESTORE_REST
@@ -264,6 +265,7 @@ rff_action:
jnz int_ret_from_sys_call
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
jmp ret_from_sys_call
+ CFI_RESTORE_STATE
rff_trace:
movq %rsp,%rdi
call syscall_trace_leave
@@ -1170,6 +1172,7 @@ child_rip:
# exit
mov %eax, %edi
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(child_rip)
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 0aa2c443d60..53699c931ad 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/atomic.h>
#include <asm/apicdef.h>
#include <mach_mpparse.h>
+#include <asm/genapic.h>
+#include <asm/setup.h>
/*
* ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+}
+
+static void noop_wait_for_deassert(atomic_t *deassert_not_used)
+{
+}
+
+static int __init es7000_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+
+ /* MPENTIUMIII */
+ if (boot_cpu_data.x86 == 6 &&
+ (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+ es7000_update_genapic_to_cluster();
+ genapic->wait_for_init_deassert = noop_wait_for_deassert;
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+ }
+
+ return 0;
+}
+
void __init
setup_unisys(void)
{
@@ -176,6 +216,8 @@ setup_unisys(void)
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
+
+ x86_quirks->update_genapic = es7000_update_genapic;
}
/*
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
return status;
}
-int
-es7000_start_cpu(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-
-}
-
void __init
es7000_sw_apic(void)
{
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e9..2bced78b0b8 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
extern struct genapic apic_flat;
extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
+
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb9827..dece1728973 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/threads.h>
+#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/ctype.h>
@@ -17,6 +18,9 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <linux/proc_fs.h>
+#include <asm/current.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
@@ -356,6 +360,103 @@ static __init void uv_rtc_init(void)
}
/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+ struct timer_list *timer = &uv_hub_info->scir.timer;
+ unsigned char bits = uv_hub_info->scir.state;
+
+ /* flip heartbeat bit */
+ bits ^= SCIR_CPU_HEARTBEAT;
+
+ /* is this cpu idle? */
+ if (idle_cpu(raw_smp_processor_id()))
+ bits &= ~SCIR_CPU_ACTIVITY;
+ else
+ bits |= SCIR_CPU_ACTIVITY;
+
+ /* update system controller interface reg */
+ uv_set_scir_bits(bits);
+
+ /* enable next timer period */
+ mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+ if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+ struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+ uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+ setup_timer(timer, uv_heartbeat, cpu);
+ timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+ add_timer_on(timer, cpu);
+ uv_cpu_hub_info(cpu)->scir.enabled = 1;
+ }
+
+ /* check boot cpu */
+ if (!uv_cpu_hub_info(0)->scir.enabled)
+ uv_heartbeat_enable(0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+ if (uv_cpu_hub_info(cpu)->scir.enabled) {
+ uv_cpu_hub_info(cpu)->scir.enabled = 0;
+ del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ }
+ uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ uv_heartbeat_enable(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ uv_heartbeat_disable(cpu);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+ hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+ int cpu;
+
+ if (is_uv_system())
+ for_each_online_cpu(cpu)
+ uv_heartbeat_enable(cpu);
+ return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/*
* Called on each cpu to initialize the per_cpu UV data area.
* ZZZ hotplug not supported yet
*/
@@ -428,7 +529,7 @@ void __init uv_system_init(void)
uv_bios_init();
uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
- &uv_coherency_id, &uv_region_size);
+ &sn_coherency_id, &sn_region_size);
uv_rtc_init();
for_each_present_cpu(cpu) {
@@ -439,8 +540,7 @@ void __init uv_system_init(void)
uv_blade_info[blade].nr_possible_cpus++;
uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
- uv_cpu_hub_info(cpu)->lowmem_remap_top =
- lowmem_redir_base + lowmem_redir_size;
+ uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
uv_cpu_hub_info(cpu)->m_val = m_val;
uv_cpu_hub_info(cpu)->n_val = m_val;
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
@@ -450,7 +550,8 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
+ uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+ uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
max_pnode = max(pnode, max_pnode);
@@ -467,4 +568,6 @@ void __init uv_system_init(void)
map_mmioh_high(max_pnode);
uv_cpu_init();
+ uv_scir_register_cpu_notifier();
+ proc_mkdir("sgi_uv", NULL);
}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 1dcb0f13897..3e66bd364a9 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void)
/* start of EBDA area */
ebda_addr = get_bios_ebda();
- printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index fa1d25dd83e..ac108d1fe18 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -12,9 +12,12 @@
#include <asm/sections.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+#include <asm/trampoline.h>
void __init i386_start_kernel(void)
{
+ reserve_trampoline_memory();
+
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d16084f9064..388e05a5fc1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,6 +24,7 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+#include <asm/trampoline.h>
/* boot cpu pda */
static struct x8664_pda _boot_cpu_pda __read_mostly;
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
+ reserve_trampoline_memory();
+
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f..3f0a3edf0a5 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,9 @@
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
unsigned long hpet_address;
-unsigned long hpet_num_timers;
+#ifdef CONFIG_PCI_MSI
+static unsigned long hpet_num_timers;
+#endif
static void __iomem *hpet_virt_address;
struct hpet_dev {
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index a4f93b4120c..d39918076bb 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
/*
* Initial thread structure.
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210f..679e7bbbbcd 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
+
ack_APIC_irq();
-#ifdef CONFIG_X86_64
exit_idle();
-#endif
irq_enter();
me = smp_processor_id();
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a..1d3d0e71b04 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
#include <asm/idle.h>
#include <asm/smp.h>
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
*
@@ -28,19 +27,18 @@
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
u64 curbase = (u64)task_stack_page(current);
- static unsigned long warned = -60*HZ;
-
- if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
- regs->sp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- current->comm, curbase, regs->sp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
-}
+
+ WARN_ONCE(regs->sp >= curbase &&
+ regs->sp <= curbase + THREAD_SIZE &&
+ regs->sp < curbase + sizeof(struct thread_info) +
+ sizeof(struct pt_regs) + 128,
+
+ "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ current->comm, curbase, regs->sp);
#endif
+}
/*
* do_IRQ handles all normal device IRQ's (the special
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
-#endif
desc = irq_to_desc(irq);
if (likely(desc))
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509..37f420018a4 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/suspend.h>
+#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
#include <asm/system.h>
#include <asm/cacheflush.h>
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u32 kexec_pgd[1024] PAGE_ALIGNED;
-#ifdef CONFIG_X86_PAE
-static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-static u32 kexec_pmd1[1024] PAGE_ALIGNED;
-#endif
-static u32 kexec_pte0[1024] PAGE_ALIGNED;
-static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
static void set_idt(void *newidt, __u16 limit)
{
struct desc_ptr curidt;
@@ -76,6 +68,76 @@ static void load_segments(void)
#undef __STR
}
+static void machine_kexec_free_page_tables(struct kimage *image)
+{
+ free_page((unsigned long)image->arch.pgd);
+#ifdef CONFIG_X86_PAE
+ free_page((unsigned long)image->arch.pmd0);
+ free_page((unsigned long)image->arch.pmd1);
+#endif
+ free_page((unsigned long)image->arch.pte0);
+ free_page((unsigned long)image->arch.pte1);
+}
+
+static int machine_kexec_alloc_page_tables(struct kimage *image)
+{
+ image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+#ifdef CONFIG_X86_PAE
+ image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+#endif
+ image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ if (!image->arch.pgd ||
+#ifdef CONFIG_X86_PAE
+ !image->arch.pmd0 || !image->arch.pmd1 ||
+#endif
+ !image->arch.pte0 || !image->arch.pte1) {
+ machine_kexec_free_page_tables(image);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void machine_kexec_page_table_set_one(
+ pgd_t *pgd, pmd_t *pmd, pte_t *pte,
+ unsigned long vaddr, unsigned long paddr)
+{
+ pud_t *pud;
+
+ pgd += pgd_index(vaddr);
+#ifdef CONFIG_X86_PAE
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
+#endif
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ if (!(pmd_val(*pmd) & _PAGE_PRESENT))
+ set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+ pte = pte_offset_kernel(pmd, vaddr);
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+}
+
+static void machine_kexec_prepare_page_tables(struct kimage *image)
+{
+ void *control_page;
+ pmd_t *pmd = 0;
+
+ control_page = page_address(image->control_code_page);
+#ifdef CONFIG_X86_PAE
+ pmd = image->arch.pmd0;
+#endif
+ machine_kexec_page_table_set_one(
+ image->arch.pgd, pmd, image->arch.pte0,
+ (unsigned long)control_page, __pa(control_page));
+#ifdef CONFIG_X86_PAE
+ pmd = image->arch.pmd1;
+#endif
+ machine_kexec_page_table_set_one(
+ image->arch.pgd, pmd, image->arch.pte1,
+ __pa(control_page), __pa(control_page));
+}
+
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
@@ -87,12 +149,20 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
- * Make control page executable.
+ * - Make control page executable.
+ * - Allocate page tables
+ * - Setup page tables
*/
int machine_kexec_prepare(struct kimage *image)
{
+ int error;
+
if (nx_enabled)
set_pages_x(image->control_code_page, 1);
+ error = machine_kexec_alloc_page_tables(image);
+ if (error)
+ return error;
+ machine_kexec_prepare_page_tables(image);
return 0;
}
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image)
{
if (nx_enabled)
set_pages_nx(image->control_code_page, 1);
+ machine_kexec_free_page_tables(image);
}
/*
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image)
relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
- page_list[PA_PGD] = __pa(kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
-#ifdef CONFIG_X86_PAE
- page_list[PA_PMD_0] = __pa(kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PMD_1] = __pa(kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-#endif
- page_list[PA_PTE_0] = __pa(kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PTE_1] = __pa(kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ page_list[PA_PGD] = __pa(image->arch.pgd);
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5f8e5d75a25..c25fdb38229 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -10,7 +10,7 @@
* This driver allows to upgrade microcode on AMD
* family 0x10 and 0x11 processors.
*
- * Licensed unter the terms of the GNU General Public
+ * Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
@@ -32,9 +32,9 @@
#include <linux/platform_device.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
+#include <linux/uaccess.h>
#include <asm/msr.h>
-#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2");
#define UCODE_UCODE_TYPE 0x00000001
struct equiv_cpu_entry {
- unsigned int installed_cpu;
- unsigned int fixed_errata_mask;
- unsigned int fixed_errata_compare;
- unsigned int equiv_cpu;
-};
+ u32 installed_cpu;
+ u32 fixed_errata_mask;
+ u32 fixed_errata_compare;
+ u16 equiv_cpu;
+ u16 res;
+} __attribute__((packed));
struct microcode_header_amd {
- unsigned int data_code;
- unsigned int patch_id;
- unsigned char mc_patch_data_id[2];
- unsigned char mc_patch_data_len;
- unsigned char init_flag;
- unsigned int mc_patch_data_checksum;
- unsigned int nb_dev_id;
- unsigned int sb_dev_id;
- unsigned char processor_rev_id[2];
- unsigned char nb_rev_id;
- unsigned char sb_rev_id;
- unsigned char bios_api_rev;
- unsigned char reserved1[3];
- unsigned int match_reg[8];
-};
+ u32 data_code;
+ u32 patch_id;
+ u16 mc_patch_data_id;
+ u8 mc_patch_data_len;
+ u8 init_flag;
+ u32 mc_patch_data_checksum;
+ u32 nb_dev_id;
+ u32 sb_dev_id;
+ u16 processor_rev_id;
+ u8 nb_rev_id;
+ u8 sb_rev_id;
+ u8 bios_api_rev;
+ u8 reserved1[3];
+ u32 match_reg[8];
+} __attribute__((packed));
struct microcode_amd {
struct microcode_header_amd hdr;
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE (2048)
-#define DEFAULT_UCODE_DATASIZE (896)
-#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define DWSIZE (sizeof(u32))
-/* For now we support a fixed ucode total size only */
-#define get_totalsize(mc) \
- ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
- + MC_HEADER_SIZE)
+#define UCODE_MAX_SIZE 2048
+#define UCODE_CONTAINER_SECTION_HDR 8
+#define UCODE_CONTAINER_HEADER_SIZE 12
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table;
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u32 dummy;
memset(csig, 0, sizeof(*csig));
-
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
- cpu);
+ printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
+ "supported\n", cpu, c->x86);
return -1;
}
-
- asm volatile("movl %1, %%ecx; rdmsr"
- : "=a" (csig->rev)
- : "i" (0x0000008B) : "ecx");
-
- printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
- csig->rev);
-
+ rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
+ printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
return 0;
}
static int get_matching_microcode(int cpu, void *mc, int rev)
{
struct microcode_header_amd *mc_header = mc;
- struct pci_dev *nb_pci_dev, *sb_pci_dev;
unsigned int current_cpu_id;
- unsigned int equiv_cpu_id = 0x00;
+ u16 equiv_cpu_id = 0;
unsigned int i = 0;
BUG_ON(equiv_cpu_table == NULL);
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
}
if (!equiv_cpu_id) {
- printk(KERN_ERR "microcode: CPU%d cpu_id "
- "not found in equivalent cpu table \n", cpu);
+ printk(KERN_WARNING "microcode: CPU%d: cpu revision "
+ "not listed in equivalent cpu table\n", cpu);
return 0;
}
- if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
- printk(KERN_ERR
- "microcode: CPU%d patch does not match "
- "(patch is %x, cpu extended is %x) \n",
- cpu, mc_header->processor_rev_id[0],
- (equiv_cpu_id & 0xff));
+ if (mc_header->processor_rev_id != equiv_cpu_id) {
+ printk(KERN_ERR "microcode: CPU%d: patch mismatch "
+ "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
+ cpu, mc_header->processor_rev_id, equiv_cpu_id);
return 0;
}
- if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
- printk(KERN_ERR "microcode: CPU%d patch does not match "
- "(patch is %x, cpu base id is %x) \n",
- cpu, mc_header->processor_rev_id[1],
- ((equiv_cpu_id >> 16) & 0xff));
-
+ /* ucode might be chipset specific -- currently we don't support this */
+ if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
+ printk(KERN_ERR "microcode: CPU%d: loading of chipset "
+ "specific code not yet supported\n", cpu);
return 0;
}
- /* ucode may be northbridge specific */
- if (mc_header->nb_dev_id) {
- nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
- (mc_header->nb_dev_id & 0xff),
- NULL);
- if ((!nb_pci_dev) ||
- (mc_header->nb_rev_id != nb_pci_dev->revision)) {
- printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
- pci_dev_put(nb_pci_dev);
- return 0;
- }
- pci_dev_put(nb_pci_dev);
- }
-
- /* ucode may be southbridge specific */
- if (mc_header->sb_dev_id) {
- sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
- (mc_header->sb_dev_id & 0xff),
- NULL);
- if ((!sb_pci_dev) ||
- (mc_header->sb_rev_id != sb_pci_dev->revision)) {
- printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
- pci_dev_put(sb_pci_dev);
- return 0;
- }
- pci_dev_put(sb_pci_dev);
- }
-
if (mc_header->patch_id <= rev)
return 0;
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
static void apply_microcode_amd(int cpu)
{
unsigned long flags;
- unsigned int eax, edx;
- unsigned int rev;
+ u32 rev, dummy;
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
struct microcode_amd *mc_amd = uci->mc;
- unsigned long addr;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu)
return;
spin_lock_irqsave(&microcode_update_lock, flags);
-
- addr = (unsigned long)&mc_amd->hdr.data_code;
- edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
- eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
-
- asm volatile("movl %0, %%ecx; wrmsr" :
- : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
-
+ wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
/* get patch id after patching */
- asm volatile("movl %1, %%ecx; rdmsr"
- : "=a" (rev)
- : "i" (0x0000008B) : "ecx");
-
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
spin_unlock_irqrestore(&microcode_update_lock, flags);
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- printk(KERN_ERR "microcode: CPU%d update from revision "
- "0x%x to 0x%x failed\n", cpu_num,
- mc_amd->hdr.patch_id, rev);
+ printk(KERN_ERR "microcode: CPU%d: update failed "
+ "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
return;
}
- printk(KERN_INFO "microcode: CPU%d updated from revision "
- "0x%x to 0x%x \n",
- cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
+ printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
+ cpu, rev);
uci->cpu_sig.rev = rev;
}
-static void * get_next_ucode(u8 *buf, unsigned int size,
- int (*get_ucode_data)(void *, const void *, size_t),
- unsigned int *mc_size)
+static int get_ucode_data(void *to, const u8 *from, size_t n)
+{
+ memcpy(to, from, n);
+ return 0;
+}
+
+static void *get_next_ucode(const u8 *buf, unsigned int size,
+ unsigned int *mc_size)
{
unsigned int total_size;
-#define UCODE_CONTAINER_SECTION_HDR 8
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
void *mc;
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
return NULL;
if (section_hdr[0] != UCODE_UCODE_TYPE) {
- printk(KERN_ERR "microcode: error! "
- "Wrong microcode payload type field\n");
+ printk(KERN_ERR "microcode: error: invalid type field in "
+ "container file section header\n");
return NULL;
}
total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
- printk(KERN_INFO "microcode: size %u, total_size %u\n",
- size, total_size);
+ printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
+ size, total_size);
if (total_size > size || total_size > UCODE_MAX_SIZE) {
- printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+ printk(KERN_ERR "microcode: error: size mismatch\n");
return NULL;
}
mc = vmalloc(UCODE_MAX_SIZE);
if (mc) {
memset(mc, 0, UCODE_MAX_SIZE);
- if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
+ if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
+ total_size)) {
vfree(mc);
mc = NULL;
} else
*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
}
-#undef UCODE_CONTAINER_SECTION_HDR
return mc;
}
-static int install_equiv_cpu_table(u8 *buf,
- int (*get_ucode_data)(void *, const void *, size_t))
+static int install_equiv_cpu_table(const u8 *buf)
{
-#define UCODE_CONTAINER_HEADER_SIZE 12
u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
unsigned int *buf_pos = (unsigned int *)container_hdr;
unsigned long size;
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf,
size = buf_pos[2];
if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- printk(KERN_ERR "microcode: error! "
- "Wrong microcode equivalnet cpu table\n");
+ printk(KERN_ERR "microcode: error: invalid type field in "
+ "container file section header\n");
return 0;
}
equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
if (!equiv_cpu_table) {
- printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
+ printk(KERN_ERR "microcode: failed to allocate "
+ "equivalent CPU table\n");
return 0;
}
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf,
}
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
-#undef UCODE_CONTAINER_HEADER_SIZE
}
static void free_equiv_cpu_table(void)
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void)
}
}
-static int generic_load_microcode(int cpu, void *data, size_t size,
- int (*get_ucode_data)(void *, const void *, size_t))
+static int generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u8 *ucode_ptr = data, *new_mc = NULL, *mc;
+ const u8 *ucode_ptr = data;
+ void *new_mc = NULL;
+ void *mc;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover;
unsigned long offset;
- offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
+ offset = install_equiv_cpu_table(ucode_ptr);
if (!offset) {
- printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
+ printk(KERN_ERR "microcode: failed to create "
+ "equivalent cpu table\n");
return -EINVAL;
}
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
unsigned int uninitialized_var(mc_size);
struct microcode_header_amd *mc_header;
- mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
+ mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
if (!mc)
break;
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
vfree(new_mc);
new_rev = mc_header->patch_id;
new_mc = mc;
- } else
+ } else
vfree(mc);
ucode_ptr += mc_size;
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
if (uci->mc)
vfree(uci->mc);
uci->mc = new_mc;
- pr_debug("microcode: CPU%d found a matching microcode update with"
- " version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
+ pr_debug("microcode: CPU%d found a matching microcode "
+ "update with version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
} else
vfree(new_mc);
}
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
return (int)leftover;
}
-static int get_ucode_fw(void *to, const void *from, size_t n)
-{
- memcpy(to, from, n);
- return 0;
-}
-
static int request_microcode_fw(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device)
ret = request_firmware(&firmware, fw_name, device);
if (ret) {
- printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
+ printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
return ret;
}
- ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
- &get_ucode_fw);
+ ret = generic_load_microcode(cpu, firmware->data, firmware->size);
release_firmware(firmware);
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device)
static int request_microcode_user(int cpu, const void __user *buf, size_t size)
{
- printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
- "is not supported\n");
+ printk(KERN_INFO "microcode: AMD microcode update via "
+ "/dev/cpu/microcode not supported\n");
return -1;
}
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void)
{
return &microcode_amd_ops;
}
+
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 82fb2809ce3..c9b721ba968 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL");
#define MICROCODE_VERSION "2.00"
-struct microcode_ops *microcode_ops;
+static struct microcode_ops *microcode_ops;
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#endif
/* fake device for request_firmware */
-struct platform_device *microcode_pdev;
+static struct platform_device *microcode_pdev;
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
@@ -272,13 +272,18 @@ static struct attribute_group mc_attr_group = {
.name = "microcode",
};
-static void microcode_fini_cpu(int cpu)
+static void __microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- mutex_lock(&microcode_mutex);
microcode_ops->microcode_fini_cpu(cpu);
uci->valid = 0;
+}
+
+static void microcode_fini_cpu(int cpu)
+{
+ mutex_lock(&microcode_mutex);
+ __microcode_fini_cpu(cpu);
mutex_unlock(&microcode_mutex);
}
@@ -306,12 +311,16 @@ static int microcode_resume_cpu(int cpu)
* to this cpu (a bit of paranoia):
*/
if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
- microcode_fini_cpu(cpu);
+ __microcode_fini_cpu(cpu);
+ printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
+ cpu);
return -1;
}
- if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
- microcode_fini_cpu(cpu);
+ if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
+ __microcode_fini_cpu(cpu);
+ printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
+ cpu);
/* Should we look for a new ucode here? */
return 1;
}
@@ -319,7 +328,7 @@ static int microcode_resume_cpu(int cpu)
return 0;
}
-void microcode_update_cpu(int cpu)
+static void microcode_update_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int err = 0;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 622dc4a2178..b7f4c929e61 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -155,6 +155,7 @@ static DEFINE_SPINLOCK(microcode_update_lock);
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
+ unsigned long flags;
unsigned int val[2];
memset(csig, 0, sizeof(*csig));
@@ -174,11 +175,16 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
csig->pf = 1 << ((val[1] >> 18) & 7);
}
+ /* serialize access to the physical write to MSR 0x79 */
+ spin_lock_irqsave(&microcode_update_lock, flags);
+
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
+ spin_unlock_irqrestore(&microcode_update_lock, flags);
+
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
csig->sig, csig->pf, csig->rev);
@@ -465,7 +471,7 @@ static void microcode_fini_cpu(int cpu)
uci->mc = NULL;
}
-struct microcode_ops microcode_intel_ops = {
+static struct microcode_ops microcode_intel_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba0..45e3b69808b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,23 +586,23 @@ static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
- if (x86_quirks->mach_get_smp_config) {
- if (x86_quirks->mach_get_smp_config(early))
- return;
- }
+ if (!mpf)
+ return;
+
if (acpi_lapic && early)
return;
+
/*
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
+ * MPS doesn't support hyperthreading, aka only have
+ * thread 0 apic id in MPS table
*/
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
- "information\n");
+ if (acpi_lapic && acpi_ioapic)
return;
- } else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) "
- "configuration information\n");
+
+ if (x86_quirks->mach_get_smp_config) {
+ if (x86_quirks->mach_get_smp_config(early))
+ return;
+ }
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2..8bd1bf9622a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
atomic_dec(&nmi_active);
}
+static void __acpi_nmi_disable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void)
kfree(prev_nmi_count);
return 0;
error:
- if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
- disable_8259A_irq(0);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ if (!timer_through_8259)
+ disable_8259A_irq(0);
+ on_each_cpu(__acpi_nmi_disable, NULL, 1);
+ }
+
#ifdef CONFIG_X86_32
timer_ack = 0;
#endif
@@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str)
++str;
}
- get_option(&str, &nmi);
-
- if (nmi >= NMI_INVALID)
- return 0;
+ if (!strncmp(str, "lapic", 5))
+ nmi_watchdog = NMI_LOCAL_APIC;
+ else if (!strncmp(str, "ioapic", 6))
+ nmi_watchdog = NMI_IO_APIC;
+ else {
+ get_option(&str, &nmi);
+ if (nmi >= NMI_INVALID)
+ return 0;
+ nmi_watchdog = nmi;
+ }
- nmi_watchdog = nmi;
return 1;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
@@ -285,11 +299,6 @@ void acpi_nmi_enable(void)
on_each_cpu(__acpi_nmi_enable, NULL, 1);
}
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
/*
* Disable timer based NMIs on all CPUs:
*/
@@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused)
return;
if (nmi_watchdog == NMI_LOCAL_APIC)
lapic_watchdog_stop();
+ else
+ __acpi_nmi_disable(NULL);
__get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
#ifdef CONFIG_SYSCTL
+static void enable_ioapic_nmi_watchdog_single(void *unused)
+{
+ __get_cpu_var(wd_enabled) = 1;
+ atomic_inc(&nmi_active);
+ __acpi_nmi_enable(NULL);
+}
+
+static void enable_ioapic_nmi_watchdog(void)
+{
+ on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
+ touch_nmi_watchdog();
+}
+
+static void disable_ioapic_nmi_watchdog(void)
+{
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+}
+
static int __init setup_unknown_nmi_panic(char *str)
{
unknown_nmi_panic = 1;
@@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
enable_lapic_nmi_watchdog();
else
disable_lapic_nmi_watchdog();
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ if (nmi_watchdog_enabled)
+ enable_ioapic_nmi_watchdog();
+ else
+ disable_ioapic_nmi_watchdog();
} else {
printk(KERN_WARNING
"NMI watchdog doesn't know what hardware to touch\n");
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e..0deea37a53c 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
-#include <asm/mpspec.h>
+#include <asm/genapic.h>
#include <asm/e820.h>
#include <asm/setup.h>
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
return 1;
}
+static int __init numaq_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+
+ return 0;
+}
+
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
+ .update_genapic = numaq_update_genapic,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f1982b1d..95777b0faa7 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
#include <asm/paravirt.h>
-static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static inline void
+default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_lock(lock);
}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df6..a35eaa379ff 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
iommu_area_free(iommu_gart_bitmap, offset, size);
+ if (offset >= next_bit)
+ next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -743,10 +745,8 @@ void __init gart_iommu_init(void)
unsigned long scratch;
long i;
- if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
- printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
+ if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
return;
- }
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b06100f1d61..9bfc3f719d4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <asm/system.h>
+#include <asm/apic.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -123,6 +124,21 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
+void stop_this_cpu(void *dummy)
+{
+ local_irq_disable();
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ disable_local_APIC();
+
+ for (;;) {
+ if (hlt_works(smp_processor_id()))
+ halt();
+ }
+}
+
static void do_nothing(void *unused)
{
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10..06180dff5b2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -929,17 +929,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
bts_configure(&bts_cfg_core2);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index cc5a2545dd4..61f718df6ee 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -21,6 +21,9 @@
# include <asm/iommu.h>
#endif
+#include <mach_ipi.h>
+
+
/*
* Power off function, if any
*/
@@ -36,7 +39,10 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
-/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
+bool port_cf9_safe = false;
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
warm Don't set the cold reboot flag
cold Set the cold reboot flag
bios Reboot by jumping through the BIOS (only for X86_32)
@@ -45,6 +51,7 @@ static int reboot_cpu = -1;
kbd Use the keyboard controller. cold reset (default)
acpi Use the RESET_REG in the FADT
efi Use efi reset_system runtime service
+ pci Use the so-called "PCI reset register", CF9
force Avoid anything that could hang.
*/
static int __init reboot_setup(char *str)
@@ -79,6 +86,7 @@ static int __init reboot_setup(char *str)
case 'k':
case 't':
case 'e':
+ case 'p':
reboot_type = *str;
break;
@@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void)
reboot_type = BOOT_KBD;
break;
-
case BOOT_EFI:
if (efi_enabled)
- efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+ efi.reset_system(reboot_mode ?
+ EFI_RESET_WARM :
+ EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
+ reboot_type = BOOT_KBD;
+ break;
+ case BOOT_CF9:
+ port_cf9_safe = true;
+ /* fall through */
+
+ case BOOT_CF9_COND:
+ if (port_cf9_safe) {
+ u8 cf9 = inb(0xcf9) & ~6;
+ outb(cf9|2, 0xcf9); /* Request hard reset */
+ udelay(50);
+ outb(cf9|6, 0xcf9); /* Actually do the reset */
+ udelay(50);
+ }
reboot_type = BOOT_KBD;
break;
}
@@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused)
static void native_machine_halt(void)
{
+ /* stop other cpus and apics */
+ machine_shutdown();
+
+ /* stop this cpu */
+ stop_this_cpu(NULL);
}
static void native_machine_power_off(void)
@@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs)
machine_ops.crash_shutdown(regs);
}
#endif
+
+
+#if defined(CONFIG_SMP)
+
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+static nmi_shootdown_cb shootdown_callback;
+
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ int cpu;
+
+ if (val != DIE_NMI_IPI)
+ return NOTIFY_OK;
+
+ cpu = raw_smp_processor_id();
+
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if (cpu == crashing_cpu)
+ return NOTIFY_STOP;
+ local_irq_disable();
+
+ shootdown_callback(cpu, (struct die_args *)data);
+
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ halt();
+ for (;;)
+ cpu_relax();
+
+ return 1;
+}
+
+static void smp_send_nmi_allbutself(void)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(safe_smp_processor_id(), mask);
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, NMI_VECTOR);
+}
+
+static struct notifier_block crash_nmi_nb = {
+ .notifier_call = crash_nmi_callback,
+};
+
+/* Halt all other CPUs, calling the specified function on each of them
+ *
+ * This function can be used to halt all other CPUs on crash
+ * or emergency reboot time. The function passed as parameter
+ * will be called inside a NMI handler on all CPUs.
+ */
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+ unsigned long msecs;
+ local_irq_disable();
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = safe_smp_processor_id();
+
+ shootdown_callback = callback;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ if (register_die_notifier(&crash_nmi_nb))
+ return; /* return what? */
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+}
+#else /* !CONFIG_SMP */
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+ /* No other CPUs to shoot down */
+}
+#endif
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 6f50664b2ba..a160f311972 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -10,15 +10,12 @@
#include <asm/page.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
-#include <asm/pgtable.h>
/*
* Must be relocatable PIC code callable as a C function
*/
#define PTR(x) (x << 2)
-#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define PAE_PGD_ATTR (_PAGE_PRESENT)
/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
@@ -39,7 +36,6 @@
#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
.text
- .align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
/* Save the CPU context, used for jumping back */
@@ -60,117 +56,6 @@ relocate_kernel:
movl %cr4, %eax
movl %eax, CR4(%edi)
-#ifdef CONFIG_X86_PAE
- /* map the control page at its virtual address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0xc0000000, %eax
- shrl $27, %eax
- addl %edi, %eax
-
- movl PTR(PA_PMD_0)(%ebp), %edx
- orl $PAE_PGD_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PMD_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x3fe00000, %eax
- shrl $18, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_0)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x001ff000, %eax
- shrl $9, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- /* identity map the control page at its physical address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0xc0000000, %eax
- shrl $27, %eax
- addl %edi, %eax
-
- movl PTR(PA_PMD_1)(%ebp), %edx
- orl $PAE_PGD_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PMD_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x3fe00000, %eax
- shrl $18, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_1)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x001ff000, %eax
- shrl $9, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-#else
- /* map the control page at its virtual address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0xffc00000, %eax
- shrl $20, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_0)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x003ff000, %eax
- shrl $10, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- /* identity map the control page at its physical address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0xffc00000, %eax
- shrl $20, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_1)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x003ff000, %eax
- shrl $10, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-#endif
-
-relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
movl 20+4(%esp), %ebx /* page_list */
movl 20+8(%esp), %ebp /* list of pages */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 81f5d22747a..a3122382859 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -98,6 +98,7 @@
#include <mach_apic.h>
#include <asm/paravirt.h>
+#include <asm/hypervisor.h>
#include <asm/percpu.h>
#include <asm/topology.h>
@@ -584,161 +585,24 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
-/*
- * Some BIOSes seem to corrupt the low 64k of memory during events
- * like suspend/resume and unplugging an HDMI cable. Reserve all
- * remaining free memory in that area and fill it with a distinct
- * pattern.
- */
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-#define MAX_SCAN_AREAS 8
-
-static int __read_mostly memory_corruption_check = -1;
-
-static unsigned __read_mostly corruption_check_size = 64*1024;
-static unsigned __read_mostly corruption_check_period = 60; /* seconds */
-
-static struct e820entry scan_areas[MAX_SCAN_AREAS];
-static int num_scan_areas;
-
-
-static int set_corruption_check(char *arg)
+static int __init default_update_genapic(void)
{
- char *end;
-
- memory_corruption_check = simple_strtol(arg, &end, 10);
-
- return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check", set_corruption_check);
-
-static int set_corruption_check_period(char *arg)
-{
- char *end;
-
- corruption_check_period = simple_strtoul(arg, &end, 10);
-
- return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_period", set_corruption_check_period);
-
-static int set_corruption_check_size(char *arg)
-{
- char *end;
- unsigned size;
-
- size = memparse(arg, &end);
-
- if (*end == '\0')
- corruption_check_size = size;
-
- return (size == corruption_check_size) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_size", set_corruption_check_size);
-
-
-static void __init setup_bios_corruption_check(void)
-{
- u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
-
- if (memory_corruption_check == -1) {
- memory_corruption_check =
-#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
- 1
-#else
- 0
+#ifdef CONFIG_X86_SMP
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+# endif
#endif
- ;
- }
-
- if (corruption_check_size == 0)
- memory_corruption_check = 0;
-
- if (!memory_corruption_check)
- return;
-
- corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
-
- while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
- u64 size;
- addr = find_e820_area_size(addr, &size, PAGE_SIZE);
-
- if (addr == 0)
- break;
- if ((addr + size) > corruption_check_size)
- size = corruption_check_size - addr;
-
- if (size == 0)
- break;
-
- e820_update_range(addr, size, E820_RAM, E820_RESERVED);
- scan_areas[num_scan_areas].addr = addr;
- scan_areas[num_scan_areas].size = size;
- num_scan_areas++;
-
- /* Assume we've already mapped this early memory */
- memset(__va(addr), 0, size);
-
- addr += size;
- }
-
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
- update_e820();
-}
-
-static struct timer_list periodic_check_timer;
-
-void check_for_bios_corruption(void)
-{
- int i;
- int corruption = 0;
-
- if (!memory_corruption_check)
- return;
-
- for(i = 0; i < num_scan_areas; i++) {
- unsigned long *addr = __va(scan_areas[i].addr);
- unsigned long size = scan_areas[i].size;
-
- for(; size; addr++, size -= sizeof(unsigned long)) {
- if (!*addr)
- continue;
- printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
- addr, __pa(addr), *addr);
- corruption = 1;
- *addr = 0;
- }
- }
-
- WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
-}
-
-static void periodic_check_for_corruption(unsigned long data)
-{
- check_for_bios_corruption();
- mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
+ return 0;
}
-void start_periodic_check_for_corruption(void)
-{
- if (!memory_corruption_check || corruption_check_period == 0)
- return;
-
- printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
- corruption_check_period);
+static struct x86_quirks default_x86_quirks __initdata = {
+ .update_genapic = default_update_genapic,
+};
- init_timer(&periodic_check_timer);
- periodic_check_timer.function = &periodic_check_for_corruption;
- periodic_check_for_corruption(0);
-}
-#endif
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
printk(KERN_NOTICE
@@ -750,6 +614,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
return 0;
}
+#endif
/* List of systems that have known low memory corruption BIOS problems */
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
@@ -795,6 +660,9 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
+ /* VMI may relocate the fixmap; do this before touching ioremap area */
+ vmi_init();
+
early_cpu_init();
early_ioremap_init();
@@ -881,13 +749,8 @@ void __init setup_arch(char **cmdline_p)
check_efer();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be before kernel pagetables are setup
- * or fixmap area is touched.
- */
- vmi_init();
-#endif
+ /* Must be before kernel pagetables are setup */
+ vmi_activate();
/* after early param, so could get panic from serial */
reserve_early_setup_data();
@@ -910,6 +773,12 @@ void __init setup_arch(char **cmdline_p)
dmi_check_system(bad_bios_dmi_table);
+ /*
+ * VMware detection requires dmi to be available, so this
+ * needs to be done after dmi_scan_machine, for the BP.
+ */
+ init_hypervisor(&boot_cpu_data);
+
#ifdef CONFIG_X86_32
probe_roms();
#endif
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
deleted file mode 100644
index cc673aa55ce..00000000000
--- a/arch/x86/kernel/sigframe.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifdef CONFIG_X86_32
-struct sigframe {
- char __user *pretcode;
- int sig;
- struct sigcontext sc;
- /*
- * fpstate is unused. fpstate is moved/allocated after
- * retcode[] below. This movement allows to have the FP state and the
- * future state extensions (xsave) stay together.
- * And at the same time retaining the unused fpstate, prevents changing
- * the offset of extramask[] in the sigframe and thus prevent any
- * legacy application accessing/modifying it.
- */
- struct _fpstate fpstate_unused;
- unsigned long extramask[_NSIG_WORDS-1];
- char retcode[8];
- /* fp state follows here */
-};
-
-struct rt_sigframe {
- char __user *pretcode;
- int sig;
- struct siginfo __user *pinfo;
- void __user *puc;
- struct siginfo info;
- struct ucontext uc;
- char retcode[8];
- /* fp state follows here */
-};
-#else
-struct rt_sigframe {
- char __user *pretcode;
- struct ucontext uc;
- struct siginfo info;
- /* fp state follows here */
-};
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs *regs);
-#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c
index d6dd057d0f2..89bb7668041 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal.c
@@ -1,36 +1,41 @@
/*
* Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
*
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
+ * 2000-2002 x86-64 support by Andi Kleen
*/
-#include <linux/list.h>
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
#include <linux/kernel.h>
-#include <linux/ptrace.h>
#include <linux/signal.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/ptrace.h>
#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/proto.h>
+#include <asm/ia32_unistd.h>
+#include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
#include <asm/syscall.h>
#include <asm/syscalls.h>
-#include "sigframe.h"
+#include <asm/sigframe.h>
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -45,74 +50,6 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- mask &= _BLOCKABLE;
- spin_lock_irq(&current->sighand->siglock);
- current->saved_sigmask = current->blocked;
- siginitset(&current->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- set_restore_sigmask();
-
- return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- old_sigset_t mask;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
- return -EFAULT;
-
- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
- __get_user(mask, &act->sa_mask);
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
- return -EFAULT;
-
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
- }
-
- return ret;
-}
-
-asmlinkage int sys_sigaltstack(unsigned long bx)
-{
- /*
- * This is needed to make gcc realize it doesn't own the
- * "struct pt_regs"
- */
- struct pt_regs *regs = (struct pt_regs *)&bx;
- const stack_t __user *uss = (const stack_t __user *)bx;
- stack_t __user *uoss = (stack_t __user *)regs->cx;
-
- return do_sigaltstack(uss, uoss, regs->sp);
-}
-
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
@@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
regs->seg = tmp; \
}
-#define COPY_SEG_STRICT(seg) { \
+#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
@@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
loadsegment(seg, tmp); \
}
-/*
- * Do a signal return; undo the signal stack.
- */
static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax)
@@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
+#ifdef CONFIG_X86_32
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
- COPY_SEG_STRICT(cs);
- COPY_SEG_STRICT(ss);
+
+#ifdef CONFIG_X86_64
+ COPY(r8);
+ COPY(r9);
+ COPY(r10);
+ COPY(r11);
+ COPY(r12);
+ COPY(r13);
+ COPY(r14);
+ COPY(r15);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+ /* Kernel saves and restores only the CS segment register on signals,
+ * which is the bare minimum needed to allow mixed 32/64-bit code.
+ * App's signal handler can save/restore other segments if needed. */
+ COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
return err;
}
-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
-{
- struct sigframe __user *frame;
- struct pt_regs *regs;
- unsigned long ax;
- sigset_t set;
-
- regs = (struct pt_regs *) &__unused;
- frame = (struct sigframe __user *)(regs->sp - 8);
-
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
- && __copy_from_user(&set.sig[1], &frame->extramask,
- sizeof(frame->extramask))))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->sc, &ax))
- goto badframe;
- return ax;
-
-badframe:
- if (show_unhandled_signals && printk_ratelimit()) {
- printk("%s%s[%d] bad frame in sigreturn frame:"
- "%p ip:%lx sp:%lx oeax:%lx",
- task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
- current->comm, task_pid_nr(current), frame, regs->ip,
- regs->sp, regs->orig_ax);
- print_vma_addr(" in ", regs->ip);
- printk(KERN_CONT "\n");
- }
-
- force_sig(SIGSEGV, current);
-
- return 0;
-}
-
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- unsigned long ax;
- sigset_t set;
-
- frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
- goto badframe;
-
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
- goto badframe;
-
- return ax;
-
-badframe:
- signal_fault(regs, frame, "rt_sigreturn");
- return 0;
-}
-
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
-{
- struct pt_regs *regs = (struct pt_regs *)&__unused;
-
- return do_rt_sigreturn(regs);
-}
-
-/*
- * Set up a signal frame.
- */
static int
setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
- int tmp, err = 0;
+ int err = 0;
- err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
- savesegment(gs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+#ifdef CONFIG_X86_32
+ {
+ unsigned int tmp;
+ savesegment(gs, tmp);
+ err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+ }
+ err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
+
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
@@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
+ err |= __put_user(regs->r8, &sc->r8);
+ err |= __put_user(regs->r9, &sc->r9);
+ err |= __put_user(regs->r10, &sc->r10);
+ err |= __put_user(regs->r11, &sc->r11);
+ err |= __put_user(regs->r12, &sc->r12);
+ err |= __put_user(regs->r13, &sc->r13);
+ err |= __put_user(regs->r14, &sc->r14);
+ err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+ err |= __put_user(regs->flags, &sc->flags);
+ err |= __put_user(regs->cs, &sc->cs);
+ err |= __put_user(0, &sc->gs);
+ err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
- tmp = save_i387_xstate(fpstate);
- if (tmp < 0)
- err = 1;
- else
- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+ err |= __put_user(fpstate, &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
@@ -295,6 +187,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
}
/*
+ * Set up a signal frame.
+ */
+#ifdef CONFIG_X86_32
+static const struct {
+ u16 poplmovl;
+ u32 val;
+ u16 int80;
+} __attribute__((packed)) retcode = {
+ 0xb858, /* popl %eax; movl $..., %eax */
+ __NR_sigreturn,
+ 0x80cd, /* int $0x80 */
+};
+
+static const struct {
+ u8 movl;
+ u32 val;
+ u16 int80;
+ u8 pad;
+} __attribute__((packed)) rt_retcode = {
+ 0xb8, /* movl $..., %eax */
+ __NR_rt_sigreturn,
+ 0x80cd, /* int $0x80 */
+ 0
+};
+
+/*
* Determine which stack to use..
*/
static inline void __user *
@@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (used_math()) {
sp = sp - sig_xstate_size;
*fpstate = (struct _fpstate *) sp;
+ if (save_i387_xstate(*fpstate) < 0)
+ return (void __user *)-1L;
}
sp -= frame_size;
@@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
- err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
- err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+ err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
@@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
- err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
- err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+ err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
@@ -475,23 +391,293 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
return 0;
}
+#else /* !CONFIG_X86_32 */
+/*
+ * Determine which stack to use..
+ */
+static void __user *
+get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
+{
+ /* Default to using normal stack - redzone*/
+ sp -= 128;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)round_down(sp - size, 64);
+}
+
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ void __user *fp = NULL;
+ int err = 0;
+ struct task_struct *me = current;
+
+ if (used_math()) {
+ fp = get_stack(ka, regs->sp, sig_xstate_size);
+ frame = (void __user *)round_down(
+ (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+
+ if (save_i387_xstate(fp) < 0)
+ return -EFAULT;
+ } else
+ frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ return -EFAULT;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user(&frame->info, info))
+ return -EFAULT;
+ }
+
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ /* x86-64 should always use SA_RESTORER. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+ } else {
+ /* could use a vstub here */
+ return -EFAULT;
+ }
+
+ if (err)
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->di = sig;
+ /* In case the signal handler was declared without prototypes */
+ regs->ax = 0;
+
+ /* This also works for non SA_SIGINFO handlers because they expect the
+ next argument after the signal number on the stack. */
+ regs->si = (unsigned long)&frame->info;
+ regs->dx = (unsigned long)&frame->uc;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
+
+ regs->sp = (unsigned long)frame;
+
+ /* Set up the CS register to run signal handlers in 64-bit mode,
+ even if the handler happens to be interrupting 32-bit code. */
+ regs->cs = __USER_CS;
+
+ return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+ mask &= _BLOCKABLE;
+ spin_lock_irq(&current->sighand->siglock);
+ current->saved_sigmask = current->blocked;
+ siginitset(&current->blocked, mask);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ set_restore_sigmask();
+
+ return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+ struct old_sigaction __user *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ if (act) {
+ old_sigset_t mask;
+
+ if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+ return -EFAULT;
+
+ __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ __get_user(mask, &act->sa_mask);
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+ return -EFAULT;
+
+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ }
+
+ return ret;
+}
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+ /*
+ * This is needed to make gcc realize it doesn't own the
+ * "struct pt_regs"
+ */
+ struct pt_regs *regs = (struct pt_regs *)&bx;
+ const stack_t __user *uss = (const stack_t __user *)bx;
+ stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ struct pt_regs *regs)
+{
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+#ifdef CONFIG_X86_32
+asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
+{
+ struct sigframe __user *frame;
+ struct pt_regs *regs;
+ unsigned long ax;
+ sigset_t set;
+
+ regs = (struct pt_regs *) &__unused;
+ frame = (struct sigframe __user *)(regs->sp - 8);
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
+ && __copy_from_user(&set.sig[1], &frame->extramask,
+ sizeof(frame->extramask))))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext(regs, &frame->sc, &ax))
+ goto badframe;
+ return ax;
+
+badframe:
+ signal_fault(regs, frame, "sigreturn");
+
+ return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+static long do_rt_sigreturn(struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ unsigned long ax;
+ sigset_t set;
+
+ frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+ goto badframe;
+
+ if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+ goto badframe;
+
+ return ax;
+
+badframe:
+ signal_fault(regs, frame, "rt_sigreturn");
+ return 0;
+}
+
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(struct pt_regs regs)
+{
+ return do_rt_sigreturn(&regs);
+}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+ return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
/*
* OK, we're invoking a handler:
*/
static int signr_convert(int sig)
{
+#ifdef CONFIG_X86_32
struct thread_info *info = current_thread_info();
if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
return sig;
}
+#ifdef CONFIG_X86_32
+
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
+#else /* !CONFIG_X86_32 */
+
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32 test_thread_flag(TIF_IA32)
+#else /* !CONFIG_IA32_EMULATION */
+#define is_ia32 0
+#endif /* CONFIG_IA32_EMULATION */
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs *regs);
+
+#endif /* CONFIG_X86_32 */
+
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
@@ -592,7 +778,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
+#ifdef CONFIG_X86_32
#define NR_restart_syscall __NR_restart_syscall
+#else /* !CONFIG_X86_32 */
+#define NR_restart_syscall \
+ test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -704,8 +896,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
- printk(KERN_INFO
+ printk("%s"
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+ task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
deleted file mode 100644
index a5c9627f4db..00000000000
--- a/arch/x86/kernel/signal_64.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
- *
- * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
- * 2000-2002 x86-64 support by Andi Kleen
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/tracehook.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/compiler.h>
-#include <linux/uaccess.h>
-
-#include <asm/processor.h>
-#include <asm/ucontext.h>
-#include <asm/i387.h>
-#include <asm/proto.h>
-#include <asm/ia32_unistd.h>
-#include <asm/mce.h>
-#include <asm/syscall.h>
-#include <asm/syscalls.h>
-#include "sigframe.h"
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
- X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
- X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
- X86_EFLAGS_CF)
-
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS __FIX_EFLAGS
-#endif
-
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
- struct pt_regs *regs)
-{
- return do_sigaltstack(uss, uoss, regs->sp);
-}
-
-#define COPY(x) { \
- err |= __get_user(regs->x, &sc->x); \
-}
-
-#define COPY_SEG_STRICT(seg) { \
- unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
- regs->seg = tmp | 3; \
-}
-
-/*
- * Do a signal return; undo the signal stack.
- */
-static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
- unsigned long *pax)
-{
- void __user *buf;
- unsigned int tmpflags;
- unsigned int err = 0;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
- COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
- COPY(r8);
- COPY(r9);
- COPY(r10);
- COPY(r11);
- COPY(r12);
- COPY(r13);
- COPY(r14);
- COPY(r15);
-
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_STRICT(cs);
-
- err |= __get_user(tmpflags, &sc->flags);
- regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
- regs->orig_ax = -1; /* disable syscall checks */
-
- err |= __get_user(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
-
- err |= __get_user(*pax, &sc->ax);
- return err;
-}
-
-static long do_rt_sigreturn(struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- unsigned long ax;
- sigset_t set;
-
- frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
- if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
- goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
- goto badframe;
-
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
- goto badframe;
-
- return ax;
-
-badframe:
- signal_fault(regs, frame, "rt_sigreturn");
- return 0;
-}
-
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
- return do_rt_sigreturn(regs);
-}
-
-/*
- * Set up a signal frame.
- */
-
-static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
- unsigned long mask, struct task_struct *me)
-{
- int err = 0;
-
- err |= __put_user(regs->cs, &sc->cs);
- err |= __put_user(0, &sc->gs);
- err |= __put_user(0, &sc->fs);
-
- err |= __put_user(regs->di, &sc->di);
- err |= __put_user(regs->si, &sc->si);
- err |= __put_user(regs->bp, &sc->bp);
- err |= __put_user(regs->sp, &sc->sp);
- err |= __put_user(regs->bx, &sc->bx);
- err |= __put_user(regs->dx, &sc->dx);
- err |= __put_user(regs->cx, &sc->cx);
- err |= __put_user(regs->ax, &sc->ax);
- err |= __put_user(regs->r8, &sc->r8);
- err |= __put_user(regs->r9, &sc->r9);
- err |= __put_user(regs->r10, &sc->r10);
- err |= __put_user(regs->r11, &sc->r11);
- err |= __put_user(regs->r12, &sc->r12);
- err |= __put_user(regs->r13, &sc->r13);
- err |= __put_user(regs->r14, &sc->r14);
- err |= __put_user(regs->r15, &sc->r15);
- err |= __put_user(me->thread.trap_no, &sc->trapno);
- err |= __put_user(me->thread.error_code, &sc->err);
- err |= __put_user(regs->ip, &sc->ip);
- err |= __put_user(regs->flags, &sc->flags);
- err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(me->thread.cr2, &sc->cr2);
-
- return err;
-}
-
-/*
- * Determine which stack to use..
- */
-
-static void __user *
-get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
-{
- unsigned long sp;
-
- /* Default to using normal stack - redzone*/
- sp = regs->sp - 128;
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (sas_ss_flags(sp) == 0)
- sp = current->sas_ss_sp + current->sas_ss_size;
- }
-
- return (void __user *)round_down(sp - size, 64);
-}
-
-static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
-{
- struct rt_sigframe __user *frame;
- void __user *fp = NULL;
- int err = 0;
- struct task_struct *me = current;
-
- if (used_math()) {
- fp = get_stack(ka, regs, sig_xstate_size);
- frame = (void __user *)round_down(
- (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
-
- if (save_i387_xstate(fp) < 0)
- return -EFAULT;
- } else
- frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- return -EFAULT;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user(&frame->info, info))
- return -EFAULT;
- }
-
- /* Create the ucontext. */
- if (cpu_has_xsave)
- err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
- err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
- __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
- } else
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
- /* x86-64 should always use SA_RESTORER. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
- } else {
- /* could use a vstub here */
- return -EFAULT;
- }
-
- if (err)
- return -EFAULT;
-
- /* Set up registers for signal handler */
- regs->di = sig;
- /* In case the signal handler was declared without prototypes */
- regs->ax = 0;
-
- /* This also works for non SA_SIGINFO handlers because they expect the
- next argument after the signal number on the stack. */
- regs->si = (unsigned long)&frame->info;
- regs->dx = (unsigned long)&frame->uc;
- regs->ip = (unsigned long) ka->sa.sa_handler;
-
- regs->sp = (unsigned long)frame;
-
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
- regs->cs = __USER_CS;
-
- return 0;
-}
-
-/*
- * OK, we're invoking a handler
- */
-static int signr_convert(int sig)
-{
- return sig;
-}
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32 test_thread_flag(TIF_IA32)
-#else
-#define is_ia32 0
-#endif
-
-static int
-setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs)
-{
- int usig = signr_convert(sig);
- int ret;
-
- /* Set up the stack frame */
- if (is_ia32) {
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
- else
- ret = ia32_setup_frame(usig, ka, set, regs);
- } else
- ret = __setup_rt_frame(sig, ka, info, set, regs);
-
- if (ret) {
- force_sigsegv(sig, current);
- return -EFAULT;
- }
-
- return ret;
-}
-
-static int
-handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
- sigset_t *oldset, struct pt_regs *regs)
-{
- int ret;
-
- /* Are we from a system call? */
- if (syscall_get_nr(current, regs) >= 0) {
- /* If so, check system call restarting.. */
- switch (syscall_get_error(current, regs)) {
- case -ERESTART_RESTARTBLOCK:
- case -ERESTARTNOHAND:
- regs->ax = -EINTR;
- break;
-
- case -ERESTARTSYS:
- if (!(ka->sa.sa_flags & SA_RESTART)) {
- regs->ax = -EINTR;
- break;
- }
- /* fallthrough */
- case -ERESTARTNOINTR:
- regs->ax = regs->orig_ax;
- regs->ip -= 2;
- break;
- }
- }
-
- /*
- * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
- * flag so that register information in the sigcontext is correct.
- */
- if (unlikely(regs->flags & X86_EFLAGS_TF) &&
- likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
- regs->flags &= ~X86_EFLAGS_TF;
-
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
- if (ret)
- return ret;
-
-#ifdef CONFIG_X86_64
- /*
- * This has nothing to do with segment registers,
- * despite the name. This magic affects uaccess.h
- * macros' behavior. Reset it to the normal setting.
- */
- set_fs(USER_DS);
-#endif
-
- /*
- * Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
-
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~X86_EFLAGS_TF;
-
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked, sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
-
- return 0;
-}
-
-#define NR_restart_syscall \
- test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-static void do_signal(struct pt_regs *regs)
-{
- struct k_sigaction ka;
- siginfo_t info;
- int signr;
- sigset_t *oldset;
-
- /*
- * We want the common case to go fast, which is why we may in certain
- * cases get here from kernel mode. Just return without doing anything
- * if so.
- * X86_32: vm86 regs switched out by assembly code before reaching
- * here, so testing against kernel CS suffices.
- */
- if (!user_mode(regs))
- return;
-
- if (current_thread_info()->status & TS_RESTORE_SIGMASK)
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
-
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
- /* Whee! Actually deliver the signal. */
- if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
- /*
- * A signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TS_RESTORE_SIGMASK flag.
- */
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- }
- return;
- }
-
- /* Did we come from a system call? */
- if (syscall_get_nr(current, regs) >= 0) {
- /* Restart the system call - no handlers present */
- switch (syscall_get_error(current, regs)) {
- case -ERESTARTNOHAND:
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- regs->ax = regs->orig_ax;
- regs->ip -= 2;
- break;
-
- case -ERESTART_RESTARTBLOCK:
- regs->ax = NR_restart_syscall;
- regs->ip -= 2;
- break;
- }
- }
-
- /*
- * If there's no signal to deliver, we just put the saved sigmask
- * back.
- */
- if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
-}
-
-/*
- * notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
- */
-void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
- /* notify userspace of pending MCEs */
- if (thread_info_flags & _TIF_MCE_NOTIFY)
- mce_notify_user();
-#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
-
- /* deal with pending signal delivery */
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs);
-
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
-
-#ifdef CONFIG_X86_32
- clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-}
-
-void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{
- struct task_struct *me = current;
-
- if (show_unhandled_signals && printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
- me->comm, me->pid, where, frame,
- regs->ip, regs->sp, regs->orig_ax);
- print_vma_addr(" in ", regs->ip);
- printk(KERN_CONT "\n");
- }
-
- force_sig(SIGSEGV, me);
-}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8..3f92b134ab9 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
}
-static void stop_this_cpu(void *dummy)
-{
- local_irq_disable();
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- disable_local_APIC();
- if (hlt_works(smp_processor_id()))
- for (;;) halt();
- for (;;);
-}
-
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 1a3c3253f0e..7a430c4d155 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
@@ -294,9 +295,7 @@ static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
-#ifdef CONFIG_VMI
vmi_bringup();
-#endif
cpu_init();
preempt_disable();
smp_callin();
@@ -536,7 +535,7 @@ static void impress_friends(void)
pr_debug("Before bogocount - setting activated=1.\n");
}
-static inline void __inquire_remote_apic(int apicid)
+void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +574,13 @@ static inline void __inquire_remote_apic(int apicid)
}
}
-#ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __devinit
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -599,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -614,11 +612,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __devinit
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
@@ -737,7 +733,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 083a4a5bb00..1b7711b3103 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
+ WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
+ "cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b..4290d918b58 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
*/
void leave_mm(int cpu)
{
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+ BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
+ cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
* BUG();
*/
- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+ if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
+ if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info)
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+ if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 04431f34fd1..6a00e5faaa7 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void)
if (!is_uv_system())
return 0;
- if (!proc_mkdir("sgi_uv", NULL))
- return -EINVAL;
-
proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
if (!proc_uv_ptc) {
printk(KERN_ERR "unable to create %s proc entry\n",
UV_PTC_BASENAME);
- remove_proc_entry("sgi_uv", NULL);
return -EINVAL;
}
proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 1106fac6024..808031a5ba1 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,10 +1,26 @@
#include <linux/io.h>
#include <asm/trampoline.h>
+#include <asm/e820.h>
/* ready for x86_64 and x86 */
unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
+void __init reserve_trampoline_memory(void)
+{
+#ifdef CONFIG_X86_32
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
+#endif
+ /* Has to be in very low memory so we can execute real-mode AP code. */
+ reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
+ "TRAMPOLINE");
+}
+
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
*/
unsigned long setup_trampoline(void)
{
- memcpy(trampoline_base, trampoline_data,
- trampoline_end - trampoline_data);
+ memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242ab016..c320c29255c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -664,7 +664,7 @@ void math_error(void __user *ip)
{
struct task_struct *task;
siginfo_t info;
- unsigned short cwd, swd;
+ unsigned short cwd, swd, err;
/*
* Save the info for the exception handler and clear the error.
@@ -675,7 +675,6 @@ void math_error(void __user *ip)
task->thread.error_code = 0;
info.si_signo = SIGFPE;
info.si_errno = 0;
- info.si_code = __SI_FAULT;
info.si_addr = ip;
/*
* (~cwd & swd) will mask out exceptions that are not set to unmasked
@@ -689,34 +688,31 @@ void math_error(void __user *ip)
*/
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
- switch (swd & ~cwd & 0x3f) {
- case 0x000: /* No unmasked exception */
-#ifdef CONFIG_X86_32
+
+ err = swd & ~cwd & 0x3f;
+
+#if CONFIG_X86_32
+ if (!err)
return;
#endif
- default: /* Multiple exceptions */
- break;
- case 0x001: /* Invalid Op */
+
+ if (err & 0x001) { /* Invalid op */
/*
* swd & 0x240 == 0x040: Stack Underflow
* swd & 0x240 == 0x240: Stack Overflow
* User must clear the SF bit (0x40) if set
*/
info.si_code = FPE_FLTINV;
- break;
- case 0x002: /* Denormalize */
- case 0x010: /* Underflow */
- info.si_code = FPE_FLTUND;
- break;
- case 0x004: /* Zero Divide */
+ } else if (err & 0x004) { /* Divide by Zero */
info.si_code = FPE_FLTDIV;
- break;
- case 0x008: /* Overflow */
+ } else if (err & 0x008) { /* Overflow */
info.si_code = FPE_FLTOVF;
- break;
- case 0x020: /* Precision */
+ } else if (err & 0x012) { /* Denormal, Underflow */
+ info.si_code = FPE_FLTUND;
+ } else if (err & 0x020) { /* Precision */
info.si_code = FPE_FLTRES;
- break;
+ } else {
+ info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
}
force_sig_info(SIGFPE, &info, task);
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 424093b157d..599e5816863 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -15,6 +15,7 @@
#include <asm/vgtod.h>
#include <asm/time.h>
#include <asm/delay.h>
+#include <asm/hypervisor.h>
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -31,6 +32,7 @@ static int tsc_unstable;
erroneous rdtsc usage on !cpu_has_tsc processors */
static int tsc_disabled = -1;
+static int tsc_clocksource_reliable;
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
+static int __init tsc_setup(char *str)
+{
+ if (!strcmp(str, "reliable"))
+ tsc_clocksource_reliable = 1;
+ return 1;
+}
+
+__setup("tsc=", tsc_setup);
+
#define MAX_RETRIES 5
#define SMI_TRESHOLD 50000
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void)
{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
- unsigned long flags, latch, ms, fast_calibrate;
+ unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
int hpet = is_hpet_enabled(), i, loopmin;
+ tsc_khz = get_hypervisor_tsc_freq();
+ if (tsc_khz) {
+ printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
+ return tsc_khz;
+ }
+
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
{}
};
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
+static void __init check_system_tsc_reliable(void)
+{
#ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
unsigned long res_low, res_high;
rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
if (res_low & RTSC_SUSP)
- clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
+ tsc_clocksource_reliable = 1;
#endif
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+ tsc_clocksource_reliable = 1;
+}
/*
* Make an educated guess if the TSC is trustworthy and synchronized
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void)
{
clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
clocksource_tsc.shift);
+ if (tsc_clocksource_reliable)
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
/* lower the rating if we already know its unstable: */
if (check_tsc_unstable()) {
clocksource_tsc.rating = 0;
@@ -843,7 +859,7 @@ void __init tsc_init(void)
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
- check_geode_tsc_reliable();
+ check_system_tsc_reliable();
init_tsc_clocksource();
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 1c0dfbca87c..bf36328f6ef 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
if (unsynchronized_tsc())
return;
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+ printk(KERN_INFO
+ "Skipping synchronization checks as TSC is reliable.\n");
+ return;
+ }
+
printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
smp_processor_id(), cpu);
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void)
{
int cpus = 2;
- if (unsynchronized_tsc())
+ if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
return;
/*
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8087e0cd877..23206ba1687 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -841,8 +841,6 @@ static inline int __init activate_vmi(void)
void __init vmi_init(void)
{
- unsigned long flags;
-
if (!vmi_rom)
probe_vmi_rom();
else
@@ -854,13 +852,21 @@ void __init vmi_init(void)
reserve_top_address(-vmi_rom->virtual_top);
- local_irq_save(flags);
- activate_vmi();
-
#ifdef CONFIG_X86_IO_APIC
/* This is virtual hardware; timer routing is wired correctly */
no_timer_check = 1;
#endif
+}
+
+void vmi_activate(void)
+{
+ unsigned long flags;
+
+ if (!vmi_rom)
+ return;
+
+ local_irq_save(flags);
+ activate_vmi();
local_irq_restore(flags & X86_EFLAGS_IF);
}