summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Kconfig.cpu14
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/memory.c9
-rw-r--r--arch/x86/kernel/Makefile10
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c9
-rw-r--r--arch/x86/kernel/cpu/transmeta.c7
-rw-r--r--arch/x86/kernel/efi.c16
-rw-r--r--arch/x86/kernel/efi_32.c1
-rw-r--r--arch/x86/kernel/efi_64.c32
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S8
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S31
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/i8259_32.c25
-rw-r--r--arch/x86/kernel/io_delay.c1
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/nmi_32.c21
-rw-r--r--arch/x86/kernel/nmi_64.c21
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/quirks.c9
-rw-r--r--arch/x86/kernel/reboot.c46
-rw-r--r--arch/x86/kernel/setup_32.c1
-rw-r--r--arch/x86/kernel/setup_64.c8
-rw-r--r--arch/x86/kernel/smpboot_64.c2
-rw-r--r--arch/x86/kernel/stacktrace.c4
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/traps_64.c4
-rw-r--r--arch/x86/kernel/tsc_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S26
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S30
-rw-r--r--arch/x86/kernel/vsyscall_64.c52
-rw-r--r--arch/x86/lguest/boot.c12
-rw-r--r--arch/x86/lib/csum-wrappers_64.c147
-rw-r--r--arch/x86/lib/io_64.c18
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c37
-rw-r--r--arch/x86/mm/ioremap.c30
-rw-r--r--arch/x86/mm/numa_64.c5
-rw-r--r--arch/x86/mm/pageattr-test.c7
-rw-r--r--arch/x86/mm/pageattr.c337
-rw-r--r--arch/x86/mm/srat_64.c3
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/irq.c4
-rw-r--r--arch/x86/power/hibernate_asm_64.S5
-rw-r--r--arch/x86/vdso/Makefile24
-rw-r--r--arch/x86/xen/enlighten.c4
61 files changed, 605 insertions, 498 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index aaed1a3b92d..4a88cf7695b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,8 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
+ select HAVE_KVM
+
config GENERIC_LOCKBREAK
def_bool n
@@ -119,8 +121,6 @@ config ARCH_HAS_CPU_RELAX
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
-select HAVE_KVM
-
config ARCH_HIBERNATION_POSSIBLE
def_bool y
depends on !SMP || !X86_VOYAGER
@@ -1054,7 +1054,7 @@ config SECCOMP
config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
- depends on X86_64 && EXPERIMENTAL
+ depends on X86_64 && EXPERIMENTAL && BROKEN
help
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of critical functions, a canary
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b73a1a..6d50064db18 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,19 @@ config X86_OOSTORE
def_bool y
depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+#
+# P6_NOPs are a relatively minor optimization that require a family >=
+# 6 processor, except that it is broken on certain VIA chips.
+# Furthermore, AMD chips prefer a totally different sequence of NOPs
+# (which work on all CPUs). As a result, disallow these if we're
+# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
+# x86-64 capable chips); the list of processors in the right-hand clause
+# are the cores that benefit from this optimization.
+#
+config X86_P6_NOP
+ def_bool y
+ depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+
config X86_TSC
def_bool y
depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +403,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
+ default "6" if X86_32 && X86_P6_NOP
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 864affc9a7b..702eb39901c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -156,7 +156,7 @@ config IO_DELAY_TYPE_NONE
choice
prompt "IO delay type"
- default IO_DELAY_0XED
+ default IO_DELAY_0X80
config IO_DELAY_0X80
bool "port 0x80 based port-IO delay [recommended]"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 204af43535c..f1e739a43d4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -229,7 +229,7 @@ zdisk bzdisk: vmlinux
fdimage fdimage144 fdimage288 isoimage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
-install: vdso_install
+install:
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
PHONY += vdso_install
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 378353956b5..e77d89f9e8a 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -37,6 +37,12 @@ static int detect_memory_e820(void)
"=m" (*desc)
: "D" (desc), "d" (SMAP), "a" (0xe820));
+ /* BIOSes which terminate the chain with CF = 1 as opposed
+ to %ebx = 0 don't always report the SMAP signature on
+ the final, failing, probe. */
+ if (err)
+ break;
+
/* Some BIOSes stop returning SMAP in the middle of
the search loop. We don't know exactly how the BIOS
screwed up the map at that point, we might have a
@@ -47,9 +53,6 @@ static int detect_memory_e820(void)
break;
}
- if (err)
- break;
-
count++;
desc++;
} while (next && count < E820MAX);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 76ec0f8f138..4eb5ce84110 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,7 +6,15 @@ extra-y := head_$(BITS).o init_task.o vmlinux.lds
extra-$(CONFIG_X86_64) += head64.o
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0
+
+#
+# vsyscalls (which work on the user stack) should have
+# no stack-protector checks:
+#
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
+CFLAGS_hpet.o := $(nostackp)
+CFLAGS_tsc_64.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps_$(BITS).o irq_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 680b7300a48..2cdc9de9371 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,7 +72,8 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return
#define PREFIX "ACPI: "
int acpi_noirq; /* skip ACPI IRQ initialization */
-int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
+int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_ht __initdata = 1; /* enable HT */
int acpi_lapic;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 10b67170b13..8ca3557a6d5 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -126,6 +126,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
"state\n", cx->type);
}
+ snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+ cx->address);
out:
set_cpus_allowed(current, saved_mask);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index afd84463b71..8ea040124f7 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -20,10 +20,8 @@
#include <xen/interface/xen.h>
-#ifdef CONFIG_LGUEST_GUEST
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#endif
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -130,10 +128,12 @@ void foo(void)
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
#endif
-#ifdef CONFIG_LGUEST_GUEST
+#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
+
+ BLANK();
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9b95edcfc6a..027e5c003b1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -25,14 +25,6 @@ static int __init no_halt(char *s)
__setup("no-hlt", no_halt);
-static int __init mca_pentium(char *s)
-{
- mca_pentium_flag = 1;
- return 1;
-}
-
-__setup("mca-pentium", mca_pentium);
-
static int __init no_387(char *s)
{
boot_cpu_data.hard_math = 0;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f86a3c4a266..a38aafaefc2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
/* Init Machine Check Exception if available. */
mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b6e136f23d3..be83336fddb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -43,6 +43,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/kvm_para.h>
#include "mtrr.h"
u32 num_var_ranges = 0;
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void)
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ * @end_pfn: ending page frame number
*
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
* memory configurations. This routine checks that the highest MTRR matches
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_pfn) {
- printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
- WARN_ON(1);
+ if (!kvm_para_available()) {
+ printk(KERN_WARNING
+ "WARNING: strange, CPU MTRRs all blank?\n");
+ WARN_ON(1);
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 200fb3f9ebf..e8b422c1c51 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
/* All Transmeta CPUs have a constant TSC */
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- /* If we can run i686 user-space code, call us an i686 */
-#define USER686 ((1 << X86_FEATURE_TSC)|\
- (1 << X86_FEATURE_CX8)|\
- (1 << X86_FEATURE_CMOV))
- if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
- c->x86 = 6;
-
#ifdef CONFIG_SYSCTL
/* randomize_va_space slows us down enormously;
it probably triggers retranslation of x86->native bytecode */
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 32dd62b36ff..759e02bec07 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
-struct efi efi_phys __initdata;
+static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
static int __init setup_noefi(char *arg)
@@ -384,9 +384,6 @@ static void __init runtime_code_page_mkexec(void)
efi_memory_desc_t *md;
void *p;
- if (!(__supported_pte_mask & _PAGE_NX))
- return;
-
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
@@ -394,7 +391,7 @@ static void __init runtime_code_page_mkexec(void)
if (md->type != EFI_RUNTIME_SERVICES_CODE)
continue;
- set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
+ set_memory_x(md->virt_addr, md->num_pages);
}
}
@@ -428,9 +425,6 @@ void __init efi_enter_virtual_mode(void)
else
va = efi_ioremap(md->phys_addr, size);
- if (md->attribute & EFI_MEMORY_WB)
- set_memory_uc(md->virt_addr, size);
-
md->virt_addr = (u64) (unsigned long) va;
if (!va) {
@@ -439,6 +433,9 @@ void __init efi_enter_virtual_mode(void)
continue;
}
+ if (!(md->attribute & EFI_MEMORY_WB))
+ set_memory_uc(md->virt_addr, md->num_pages);
+
systab = (u64) (unsigned long) efi_phys.systab;
if (md->phys_addr <= systab && systab < end) {
systab += md->virt_addr - md->phys_addr;
@@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void)
efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
efi.reset_system = virt_efi_reset_system;
efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
- runtime_code_page_mkexec();
+ if (__supported_pte_mask & _PAGE_NX)
+ runtime_code_page_mkexec();
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index cb91f985b4a..5d23d85624d 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -28,6 +28,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/efi.h>
/*
* To make EFI call EFI runtime service in physical addressing mode we need
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 09d5c233093..d143a1e76b3 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -35,6 +35,7 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/efi.h>
+#include <asm/cacheflush.h>
static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata;
@@ -43,22 +44,15 @@ static void __init early_mapping_set_exec(unsigned long start,
unsigned long end,
int executable)
{
- pte_t *kpte;
- unsigned int level;
-
- while (start < end) {
- kpte = lookup_address((unsigned long)__va(start), &level);
- BUG_ON(!kpte);
- if (executable)
- set_pte(kpte, pte_mkexec(*kpte));
- else
- set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
- __supported_pte_mask));
- if (level == PG_LEVEL_4K)
- start = (start + PAGE_SIZE) & PAGE_MASK;
- else
- start = (start + PMD_SIZE) & PMD_MASK;
- }
+ unsigned long num_pages;
+
+ start &= PMD_MASK;
+ end = (end + PMD_SIZE - 1) & PMD_MASK;
+ num_pages = (end - start) >> PAGE_SHIFT;
+ if (executable)
+ set_memory_x((unsigned long)__va(start), num_pages);
+ else
+ set_memory_nx((unsigned long)__va(start), num_pages);
}
static void __init early_runtime_code_mapping_set_exec(int executable)
@@ -74,7 +68,7 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
md = p;
if (md->type == EFI_RUNTIME_SERVICES_CODE) {
unsigned long end;
- end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
early_mapping_set_exec(md->phys_addr, end, executable);
}
}
@@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;
- local_irq_save(efi_flags);
early_runtime_code_mapping_set_exec(1);
+ local_irq_save(efi_flags);
vaddress = (unsigned long)__va(0x0UL);
save_pgd = *pgd_offset_k(0x0UL);
set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void)
* After the lock is released, the original page table is restored.
*/
set_pgd(pgd_offset_k(0x0UL), save_pgd);
- early_runtime_code_mapping_set_exec(0);
__flush_tlb_all();
local_irq_restore(efi_flags);
+ early_runtime_code_mapping_set_exec(0);
}
void __init efi_reserve_bootmem(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 824e21b80aa..4b87c32b639 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
-ENTRY(irq_return)
+irq_return:
INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6be39a387c5..c20c9e7e08d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@ ENTRY(stub_execve)
CFI_REGISTER rip, r11
SAVE_REST
FIXUP_TOP_OF_STACK %r11
+ movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -583,7 +584,7 @@ retint_restore_args: /* return to kernel space */
restore_args:
RESTORE_ARGS 0,8,0
-ENTRY(irq_return)
+irq_return:
INTERRUPT_RETURN
.section __ex_table, "a"
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip)
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
*/
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
+ movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 24dbf56928d..ad2440832de 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,9 @@ void __init x86_64_start_kernel(char * real_mode_data)
/* Make NULL pointers segfault */
zap_identity_mappings();
+ /* Cleanup the over mapped high alias */
+ cleanup_highmap();
+
for (i = 0; i < IDT_ENTRIES; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 74ef4a41f22..fd8ca53943a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -606,13 +606,13 @@ ENTRY(_stext)
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
-ENTRY(swapper_pg_pmd)
+swapper_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
.fill 1024,4,0
#endif
-ENTRY(swapper_pg_fixmap)
+swapper_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 09b38d539b0..a007454133a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -107,8 +107,13 @@ startup_64:
movq %rdx, 0(%rbx, %rax, 8)
ident_complete:
- /* Fixup the kernel text+data virtual addresses
+ /*
+ * Fixup the kernel text+data virtual addresses. Note that
+ * we might write invalid pmds, when the kernel is relocated
+ * cleanup_highmap() fixes this up along with the mappings
+ * beyond _end.
*/
+
leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */
@@ -250,7 +255,7 @@ ENTRY(secondary_startup_64)
lretq
/* SMP bootup changes these two */
- __CPUINITDATA
+ __REFDATA
.align 8
ENTRY(initial_code)
.quad x86_64_start_kernel
@@ -374,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
- /* Module mapping starts here */
- .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+ /*
+ * 128 MB kernel mapping. We spend a full page on this pagetable
+ * anyway.
+ *
+ * The kernel code+data+bss must not be bigger than that.
+ *
+ * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+ * If you want to increase this then increase MODULES_VADDR
+ * too.)
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
- .fill 512,8,0
+ .fill 512, 8, 0
#undef PMDS
#undef NEXT_PAGE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 429d084e014..235fd6c7750 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void)
return 0;
}
-/*
- * Try to setup the HPET timer
+/**
+ * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 26719bd2c77..763dfc40723 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -39,7 +39,7 @@
#define HAVE_HWFP 1
#endif
-unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
void mxcsr_feature_mask_init(void)
{
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index ef62b07b2b4..8540abe86ad 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
* registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
* !using_apic_timer decisions in do_timer_interrupt_hook()
*/
-struct clock_event_device pit_clockevent = {
+static struct clock_event_device pit_clockevent = {
.name = "pit",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = init_pit_timer,
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 2d25b77102f..fe631967d62 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -26,8 +26,6 @@
* present in the majority of PC/AT boxes.
* plus some generic x86 specific things if generic specifics makes
* any sense at all.
- * this file should become arch/i386/kernel/irq.c when the old irq.c
- * moves to arch independent land
*/
static int i8259A_auto_eoi;
@@ -362,23 +360,12 @@ void __init init_ISA_irqs (void)
#endif
init_8259A(0);
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = NULL;
- irq_desc[i].depth = 1;
-
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- set_irq_chip_and_handler_name(i, &i8259A_chip,
- handle_level_irq, "XT");
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].chip = &no_irq_chip;
- }
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ for (i = 0; i < 16; i++) {
+ set_irq_chip_and_handler_name(i, &i8259A_chip,
+ handle_level_irq, "XT");
}
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index bd49321034d..c706a306155 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -13,7 +13,6 @@
#include <asm/io.h>
int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
-EXPORT_SYMBOL_GPL(io_delay_type);
static int __initdata io_delay_override;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index a99e764fd66..34a591283f5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -581,7 +581,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
-void __kprobes kretprobe_trampoline_holder(void)
+static void __used __kprobes kretprobe_trampoline_holder(void)
{
asm volatile (
".global kretprobe_trampoline\n"
@@ -673,7 +673,7 @@ void __kprobes kretprobe_trampoline_holder(void)
/*
* Called from kretprobe_trampoline
*/
-void * __kprobes trampoline_handler(struct pt_regs *regs)
+static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index edd413650b3..6a0aa703868 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -46,9 +46,6 @@ static unsigned int nmi_hz = HZ;
static DEFINE_PER_CPU(short, wd_enabled);
-/* local prototypes */
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-
static int endflag __initdata = 0;
#ifdef CONFIG_SMP
@@ -391,15 +388,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
return rc;
}
-int do_nmi_callback(struct pt_regs * regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
#ifdef CONFIG_SYSCTL
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
@@ -453,6 +441,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
#endif
+int do_nmi_callback(struct pt_regs *regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+ if (unknown_nmi_panic)
+ return unknown_nmi_panic_callback(regs, cpu);
+#endif
+ return 0;
+}
+
void __trigger_all_cpu_backtrace(void)
{
int i;
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index fb99484d21c..9a4fde74bee 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -46,9 +46,6 @@ static unsigned int nmi_hz = HZ;
static DEFINE_PER_CPU(short, wd_enabled);
-/* local prototypes */
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
-
/* Run after command line and cpu_init init, but before all other checks */
void nmi_watchdog_default(void)
{
@@ -394,15 +391,6 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
nmi_exit();
}
-int do_nmi_callback(struct pt_regs * regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
void stop_nmi(void)
{
acpi_nmi_disable();
@@ -464,6 +452,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
#endif
+int do_nmi_callback(struct pt_regs *regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+ if (unknown_nmi_panic)
+ return unknown_nmi_panic_callback(regs, cpu);
+#endif
+ return 0;
+}
+
void __trigger_all_cpu_backtrace(void)
{
int i;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 65f6acb025c..faf3229f8fb 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -749,6 +749,15 @@ void __init gart_iommu_init(void)
*/
set_memory_np((unsigned long)__va(iommu_bus_base),
iommu_size >> PAGE_SHIFT);
+ /*
+ * Tricky. The GART table remaps the physical memory range,
+ * so the CPU wont notice potential aliases and if the memory
+ * is remapped to UC later on, we might surprise the PCI devices
+ * with a stray writeout of a cacheline. So play it sure and
+ * do an explicit, full-scale wbinvd() _after_ having marked all
+ * the pages as Not-Present:
+ */
+ wbinvd();
/*
* Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0136d..43f287744f9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
+ char __user * __user *envp, struct pt_regs *regs)
{
long error;
char * filename;
filename = getname(name);
error = PTR_ERR(filename);
- if (IS_ERR(filename))
+ if (IS_ERR(filename))
return error;
- error = do_execve(filename, argv, envp, &regs);
+ error = do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 702c33efea8..d862e396b09 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1160,7 +1160,7 @@ static int genregs32_set(struct task_struct *target,
if (kbuf) {
const compat_ulong_t *k = kbuf;
while (count > 0 && !ret) {
- ret = putreg(target, pos, *k++);
+ ret = putreg32(target, pos, *k++);
count -= sizeof(*k);
pos += sizeof(*k);
}
@@ -1171,7 +1171,7 @@ static int genregs32_set(struct task_struct *target,
ret = __get_user(word, u++);
if (ret)
break;
- ret = putreg(target, pos, word);
+ ret = putreg32(target, pos, word);
count -= sizeof(*u);
pos += sizeof(*u);
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1941482d4ca..c47208fc593 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -11,7 +11,7 @@
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config, rev;
- u32 word;
+ u16 word;
/* BIOS may enable hardware IRQ balancing for
* E7520/E7320/E7525(revision ID 0x9 and below)
@@ -26,8 +26,11 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
pci_read_config_byte(dev, 0xf4, &config);
pci_write_config_byte(dev, 0xf4, config|0x2);
- /* read xTPR register */
- raw_pci_read(0, 0, 0x40, 0x4c, 2, &word);
+ /*
+ * read xTPR register. We may not have a pci_dev for device 8
+ * because it might be hidden until the above write.
+ */
+ pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word);
if (!(word & (1 << 13))) {
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5818dc28167..7fd6ac43e4a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -326,7 +326,7 @@ static inline void kb_wait(void)
}
}
-void machine_emergency_restart(void)
+static void native_machine_emergency_restart(void)
{
int i;
@@ -376,7 +376,7 @@ void machine_emergency_restart(void)
}
}
-void machine_shutdown(void)
+static void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
@@ -420,7 +420,7 @@ void machine_shutdown(void)
#endif
}
-void machine_restart(char *__unused)
+static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
@@ -429,11 +429,11 @@ void machine_restart(char *__unused)
machine_emergency_restart();
}
-void machine_halt(void)
+static void native_machine_halt(void)
{
}
-void machine_power_off(void)
+static void native_machine_power_off(void)
{
if (pm_power_off) {
if (!reboot_force)
@@ -443,9 +443,35 @@ void machine_power_off(void)
}
struct machine_ops machine_ops = {
- .power_off = machine_power_off,
- .shutdown = machine_shutdown,
- .emergency_restart = machine_emergency_restart,
- .restart = machine_restart,
- .halt = machine_halt
+ .power_off = native_machine_power_off,
+ .shutdown = native_machine_shutdown,
+ .emergency_restart = native_machine_emergency_restart,
+ .restart = native_machine_restart,
+ .halt = native_machine_halt
};
+
+void machine_power_off(void)
+{
+ machine_ops.power_off();
+}
+
+void machine_shutdown(void)
+{
+ machine_ops.shutdown();
+}
+
+void machine_emergency_restart(void)
+{
+ machine_ops.emergency_restart();
+}
+
+void machine_restart(char *cmd)
+{
+ machine_ops.restart(cmd);
+}
+
+void machine_halt(void)
+{
+ machine_ops.halt();
+}
+
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 691ab4cb167..a1d7071a51c 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -164,7 +164,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
unsigned int machine_id;
unsigned int machine_submodel_id;
unsigned int BIOS_revision;
-unsigned int mca_pentium_flag;
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
int bootloader_type;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c0d8208af12..7637dc91c79 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -518,7 +518,7 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
}
#ifdef CONFIG_NUMA
-static int nearby_node(int apicid)
+static int __cpuinit nearby_node(int apicid)
{
int i, node;
@@ -791,7 +791,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-static void srat_detect_node(void)
+static void __cpuinit srat_detect_node(void)
{
#ifdef CONFIG_NUMA
unsigned node;
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Clear all flags overriden by options */
for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] ^= cleared_cpu_caps[i];
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
#ifdef CONFIG_X86_MCE
mcheck_init(c);
@@ -1046,7 +1046,7 @@ __setup("noclflush", setup_noclflush);
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
if (c->x86_model_id[0])
- printk(KERN_INFO "%s", c->x86_model_id);
+ printk(KERN_CONT "%s", c->x86_model_id);
if (c->x86_mask || c->cpuid_level >= 0)
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index d53bd6fcb42..0880f2c388a 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
int timeout;
unsigned long start_rip;
struct create_idle c_idle = {
- .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
+ INIT_WORK(&c_idle.work, do_fork_idle);
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 02f0f61f5b1..c28c342c162 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name)
static void save_stack_address(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = data;
+ if (!reliable)
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -37,6 +39,8 @@ static void
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
struct stack_trace *trace = (struct stack_trace *)data;
+ if (!reliable)
+ return;
if (in_sched_functions(addr))
return;
if (trace->skip > 0) {
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 4c163772000..c29e235792a 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -10,8 +10,8 @@
* of the License.
*/
#include <linux/module.h>
+#include <asm/cacheflush.h>
#include <asm/sections.h>
-extern int rodata_test_data;
int rodata_test(void)
{
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index a40051b71d9..0fcc95a354f 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -34,7 +34,7 @@
static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
#ifdef CONFIG_HOTPLUG_CPU
-int arch_register_cpu(int num)
+int __ref arch_register_cpu(int num)
{
/*
* CPU0 cannot be offlined due to several
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index efc66df728b..04546668191 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -84,7 +84,7 @@ static inline void conditional_sti(struct pt_regs *regs)
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- preempt_disable();
+ inc_preempt_count();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -95,7 +95,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
local_irq_disable();
/* Make sure to not schedule here because we could be running
on an exception stack. */
- preempt_enable_no_resched();
+ dec_preempt_count();
}
int kstack_depth_to_print = 12;
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 43517e324be..f14cfd9d1f9 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
static int __init tsc_setup(char *str)
{
printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
+ "cannot disable TSC completely.\n");
+ mark_tsc_unstable("user disabled TSC");
return 1;
}
#else
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index f1148ac8abe..2ffa9656fe7 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -38,7 +38,7 @@ SECTIONS
/* read-only */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
- . = ALIGN(4096); /* not really needed, already page aligned */
+ . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
*(.text.page_aligned)
TEXT_TEXT
SCHED_TEXT
@@ -70,21 +70,21 @@ SECTIONS
RODATA
/* writeable */
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
DATA_DATA
CONSTRUCTORS
} :data
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
__nosave_begin = .;
*(.data.nosave)
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__nosave_end = .;
}
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
*(.data.page_aligned)
*(.data.idt)
@@ -108,7 +108,7 @@ SECTIONS
}
/* might get freed after init */
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
__smp_locks = .;
*(.smp_locks)
@@ -120,10 +120,10 @@ SECTIONS
* after boot. Always make sure that ALIGN() directive is present after
* the section which contains __smp_alt_end.
*/
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
/* will be freed after init */
- . = ALIGN(4096); /* Init code and data */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
__init_begin = .;
_sinittext = .;
@@ -174,23 +174,23 @@ SECTIONS
EXIT_DATA
}
#if defined(CONFIG_BLK_DEV_INITRD)
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
__initramfs_start = .;
*(.init.ramfs)
__initramfs_end = .;
}
#endif
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
/* freed after init ends here */
-
+
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__init_end = .;
__bss_start = .; /* BSS */
@@ -200,7 +200,7 @@ SECTIONS
__bss_stop = .;
_end = . ;
/* This is where the kernel creates the early boot page tables */
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
pg0 = . ;
}
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 0992b9946c6..fab13229973 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -37,7 +37,7 @@ SECTIONS
KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
- _etext = .; /* End of text section */
+ _etext = .; /* End of text section */
} :text = 0x9090
. = ALIGN(16); /* Exception table */
@@ -60,7 +60,7 @@ SECTIONS
__tracedata_end = .;
}
- . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
+ . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
@@ -119,7 +119,7 @@ SECTIONS
.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
{ *(.vsyscall_3) }
- . = VSYSCALL_VIRT_ADDR + 4096;
+ . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
#undef VSYSCALL_ADDR
#undef VSYSCALL_PHYS_ADDR
@@ -129,28 +129,28 @@ SECTIONS
#undef VVIRT_OFFSET
#undef VVIRT
- . = ALIGN(8192); /* init_task */
+ . = ALIGN(THREAD_SIZE); /* init_task */
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
*(.data.init_task)
}:data.init
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
*(.data.page_aligned)
}
/* might get freed after init */
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__smp_alt_begin = .;
__smp_locks = .;
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
*(.smp_locks)
}
__smp_locks_end = .;
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__smp_alt_end = .;
- . = ALIGN(4096); /* Init code and data */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
__init_begin = .;
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
@@ -191,7 +191,7 @@ SECTIONS
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
*(.altinstructions)
}
- __alt_instructions_end = .;
+ __alt_instructions_end = .;
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
*(.altinstr_replacement)
}
@@ -207,25 +207,25 @@ SECTIONS
/* vdso blob that is mapped into user space */
vdso_start = . ;
.vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
vdso_end = .;
#ifdef CONFIG_BLK_DEV_INITRD
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__initramfs_start = .;
.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
__initramfs_end = .;
#endif
- PERCPU(4096)
+ PERCPU(PAGE_SIZE)
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__init_end = .;
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__nosave_begin = .;
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
__nosave_end = .;
__bss_start = .; /* BSS */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3f824277458..b6be812fac0 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,11 +44,6 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __syscall_clobber "r11","cx","memory"
-#define __pa_vsymbol(x) \
- ({unsigned long v; \
- extern char __vsyscall_0; \
- asm("" : "=r" (v) : "0" (x)); \
- ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
/*
* vsyscall_gtod_data contains data that is :
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz)
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
- asm volatile("vsysc2: syscall"
+ asm volatile("syscall"
: "=a" (ret)
: "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
: __syscall_clobber );
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
static __always_inline long time_syscall(long *t)
{
long secs;
- asm volatile("vsysc1: syscall"
+ asm volatile("syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
@@ -227,50 +222,10 @@ long __vsyscall(3) venosys_1(void)
}
#ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2 0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- extern u16 vsysc1, vsysc2;
- u16 __iomem *map1;
- u16 __iomem *map2;
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- if (!write)
- return ret;
- /* gcc has some trouble with __va(__pa()), so just do it this
- way. */
- map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
- if (!map1)
- return -ENOMEM;
- map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
- if (!map2) {
- ret = -ENOMEM;
- goto out;
- }
- if (!vsyscall_gtod_data.sysctl_enabled) {
- writew(SYSCALL, map1);
- writew(SYSCALL, map2);
- } else {
- writew(NOP2, map1);
- writew(NOP2, map2);
- }
- iounmap(map2);
-out:
- iounmap(map1);
- return ret;
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .mode = 0644 },
{}
};
@@ -279,7 +234,6 @@ static ctl_table kernel_root_table2[] = {
.child = kernel_table2 },
{}
};
-
#endif
/* Assume __initcall executes before all user space. Hopefully kmod
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4895d..cccb38a5965 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
#include <linux/lguest_launcher.h>
#include <linux/virtio_console.h>
#include <linux/pm.h>
+#include <asm/lguest.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -75,15 +76,6 @@
* behaving in simplified but equivalent ways. In particular, the Guest is the
* same kernel as the Host (or at least, built from the same source code). :*/
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
@@ -489,7 +481,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
*pmdp = pmdval;
lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
+ (__pa(pmdp)&(PAGE_SIZE-1)), 0);
}
/* There are a couple of legacy places where the kernel sets a PTE, but we
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index fd42a4a095f..459b58a8a15 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -1,117 +1,129 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs.
+/*
+ * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v.2
- *
+ *
* Wrappers of assembly checksum functions for x86-64.
*/
-
#include <asm/checksum.h>
#include <linux/module.h>
-/**
- * csum_partial_copy_from_user - Copy and checksum from user space.
- * @src: source address (user space)
+/**
+ * csum_partial_copy_from_user - Copy and checksum from user space.
+ * @src: source address (user space)
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad source address.
- *
+ *
* Returns an 32bit unfolded checksum of the buffer.
- * src and dst are best aligned to 64bits.
- */
+ * src and dst are best aligned to 64bits.
+ */
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
-{
+{
might_sleep();
*errp = 0;
- if (likely(access_ok(VERIFY_READ,src, len))) {
- /* Why 6, not 7? To handle odd addresses aligned we
- would need to do considerable complications to fix the
- checksum which is defined as an 16bit accumulator. The
- fix alignment code is primarily for performance
- compatibility with 32bit and that will handle odd
- addresses slowly too. */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
- *errp = __get_user(val16, (const __u16 __user *)src);
- if (*errp)
- return isum;
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
+
+ if (!likely(access_ok(VERIFY_READ, src, len)))
+ goto out_err;
+
+ /*
+ * Why 6, not 7? To handle odd addresses aligned we
+ * would need to do considerable complications to fix the
+ * checksum which is defined as an 16bit accumulator. The
+ * fix alignment code is primarily for performance
+ * compatibility with 32bit and that will handle odd
+ * addresses slowly too.
+ */
+ if (unlikely((unsigned long)src & 6)) {
+ while (((unsigned long)src & 6) && len >= 2) {
+ __u16 val16;
+
+ *errp = __get_user(val16, (const __u16 __user *)src);
+ if (*errp)
+ return isum;
+
+ *(__u16 *)dst = val16;
+ isum = (__force __wsum)add32_with_carry(
+ (__force unsigned)isum, val16);
+ src += 2;
+ dst += 2;
+ len -= 2;
}
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, errp, NULL);
- if (likely(*errp == 0))
- return isum;
- }
+ }
+ isum = csum_partial_copy_generic((__force const void *)src,
+ dst, len, isum, errp, NULL);
+ if (unlikely(*errp))
+ goto out_err;
+
+ return isum;
+
+out_err:
*errp = -EFAULT;
- memset(dst,0,len);
- return isum;
-}
+ memset(dst, 0, len);
+ return isum;
+}
EXPORT_SYMBOL(csum_partial_copy_from_user);
-/**
- * csum_partial_copy_to_user - Copy and checksum to user space.
+/**
+ * csum_partial_copy_to_user - Copy and checksum to user space.
* @src: source address
* @dst: destination address (user space)
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad destination address.
- *
+ *
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
- */
+ */
__wsum
csum_partial_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp)
-{
+{
might_sleep();
+
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
*errp = -EFAULT;
- return 0;
+ return 0;
}
if (unlikely((unsigned long)dst & 6)) {
- while (((unsigned long)dst & 6) && len >= 2) {
+ while (((unsigned long)dst & 6) && len >= 2) {
__u16 val16 = *(__u16 *)src;
+
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
*errp = __put_user(val16, (__u16 __user *)dst);
if (*errp)
return isum;
- src += 2;
- dst += 2;
+ src += 2;
+ dst += 2;
len -= 2;
}
}
*errp = 0;
- return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp);
-}
-
+ return csum_partial_copy_generic(src, (void __force *)dst,
+ len, isum, NULL, errp);
+}
EXPORT_SYMBOL(csum_partial_copy_to_user);
-/**
+/**
* csum_partial_copy_nocheck - Copy and checksum.
* @src: source address
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
- *
+ *
* Returns an 32bit unfolded checksum of the buffer.
- */
+ */
__wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
- return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
-}
+{
+ return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
@@ -119,17 +131,20 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
__u32 len, unsigned short proto, __wsum sum)
{
__u64 rest, sum64;
-
+
rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
(__force __u64)sum;
- asm(" addq (%[saddr]),%[sum]\n"
- " adcq 8(%[saddr]),%[sum]\n"
- " adcq (%[daddr]),%[sum]\n"
- " adcq 8(%[daddr]),%[sum]\n"
- " adcq $0,%[sum]\n"
- : [sum] "=r" (sum64)
- : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
- return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
-}
+ asm(" addq (%[saddr]),%[sum]\n"
+ " adcq 8(%[saddr]),%[sum]\n"
+ " adcq (%[daddr]),%[sum]\n"
+ " adcq 8(%[daddr]),%[sum]\n"
+ " adcq $0,%[sum]\n"
+
+ : [sum] "=r" (sum64)
+ : "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
+
+ return csum_fold(
+ (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
+}
EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c
index 87b4a4e1803..3f1eb59b5f0 100644
--- a/arch/x86/lib/io_64.c
+++ b/arch/x86/lib/io_64.c
@@ -1,23 +1,25 @@
#include <linux/string.h>
-#include <asm/io.h>
#include <linux/module.h>
+#include <asm/io.h>
-void __memcpy_toio(unsigned long dst,const void*src,unsigned len)
+void __memcpy_toio(unsigned long dst, const void *src, unsigned len)
{
- __inline_memcpy((void *) dst,src,len);
+ __inline_memcpy((void *)dst, src, len);
}
EXPORT_SYMBOL(__memcpy_toio);
-void __memcpy_fromio(void *dst,unsigned long src,unsigned len)
+void __memcpy_fromio(void *dst, unsigned long src, unsigned len)
{
- __inline_memcpy(dst,(const void *) src,len);
+ __inline_memcpy(dst, (const void *)src, len);
}
EXPORT_SYMBOL(__memcpy_fromio);
void memset_io(volatile void __iomem *a, int b, size_t c)
{
- /* XXX: memset can mangle the IO patterns quite a bit.
- perhaps it would be better to use a dumb one */
- memset((void *)a,b,c);
+ /*
+ * TODO: memset can mangle the IO patterns quite a bit.
+ * perhaps it would be better to use a dumb one:
+ */
+ memset((void *)a, b, c);
}
EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 621afb6343d..fdc667422df 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -186,7 +186,7 @@ static int bad_address(void *p)
}
#endif
-void dump_pagetable(unsigned long address)
+static void dump_pagetable(unsigned long address)
{
#ifdef CONFIG_X86_32
__typeof__(pte_val(__pte(0))) page;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8106bba41ec..ee1091a4696 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
#include <asm/sections.h>
#include <asm/paravirt.h>
#include <asm/setup.h>
+#include <asm/cacheflush.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b59fc238151..a02a14f0f32 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -45,6 +45,7 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/numa.h>
+#include <asm/cacheflush.h>
const struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
@@ -170,6 +171,34 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
__flush_tlb_one(vaddr);
}
+/*
+ * The head.S code sets up the kernel high mapping:
+ *
+ * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
+ *
+ * phys_addr holds the negative offset to the kernel, which is added
+ * to the compile time generated pmds. This results in invalid pmds up
+ * to the point where we hit the physaddr 0 mapping.
+ *
+ * We limit the mappings to the region from _text to _end. _end is
+ * rounded up to the 2MB boundary. This catches the invalid pmds as
+ * well, as they are located before _text:
+ */
+void __init cleanup_highmap(void)
+{
+ unsigned long vaddr = __START_KERNEL_map;
+ unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+ pmd_t *pmd = level2_kernel_pgt;
+ pmd_t *last_pmd = pmd + PTRS_PER_PMD;
+
+ for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+ if (!pmd_present(*pmd))
+ continue;
+ if (vaddr < (unsigned long) _text || vaddr > end)
+ set_pmd(pmd, __pmd(0));
+ }
+}
+
/* NOTE: this is meant to be run only at boot */
void __init
__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
@@ -487,14 +516,6 @@ void __init mem_init(void)
/* clear_bss() already clear the empty_zero_page */
- /* temporary debugging - double check it's true: */
- {
- int i;
-
- for (i = 0; i < 1024; i++)
- WARN_ON_ONCE(empty_zero_page[i]);
- }
-
reservedpages = 0;
/* this will put all low memory onto the freelists */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a4897a85268..882328efc3d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -42,6 +42,22 @@ int page_is_ram(unsigned long pagenr)
unsigned long addr, end;
int i;
+ /*
+ * A special case is the first 4Kb of memory;
+ * This is a BIOS owned area, not kernel ram, but generally
+ * not listed as such in the E820 table.
+ */
+ if (pagenr == 0)
+ return 0;
+
+ /*
+ * Second special case: Some BIOSen report the PC BIOS
+ * area (640->1Mb) as ram even though it is not.
+ */
+ if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
+ pagenr < (BIOS_END >> PAGE_SHIFT))
+ return 0;
+
for (i = 0; i < e820.nr_map; i++) {
/*
* Not usable memory:
@@ -51,14 +67,6 @@ int page_is_ram(unsigned long pagenr)
addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
- /*
- * Sanity check: Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area, which is the
- * PCI BIOS area.
- */
- if (addr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
- end < (BIOS_END >> PAGE_SHIFT))
- continue;
if ((pagenr >= addr) && (pagenr < end))
return 1;
@@ -126,6 +134,8 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
return NULL;
}
+ WARN_ON_ONCE(page_is_ram(pfn));
+
switch (mode) {
case IOR_MODE_UNCACHED:
default:
@@ -265,7 +275,9 @@ static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
- pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+ /* Don't assume we're using swapper_pg_dir at this point */
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(addr)];
pud_t *pud = pud_offset(pgd, addr);
pmd_t *pmd = pmd_offset(pud, addr);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 1aecc658cd7..59898fb0a4a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -494,11 +494,13 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
int i;
nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU
if (cmdline && !numa_emulation(start_pfn, end_pfn))
return;
nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
#endif
#ifdef CONFIG_ACPI_NUMA
@@ -506,6 +508,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
end_pfn << PAGE_SHIFT))
return;
nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
#endif
#ifdef CONFIG_K8_NUMA
@@ -513,6 +516,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
end_pfn<<PAGE_SHIFT))
return;
nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
#endif
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
@@ -524,7 +528,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
memnode_shift = 63;
memnodemap = memnode.embedded_map;
memnodemap[0] = 0;
- nodes_clear(node_online_map);
node_set_online(0);
node_set(0, node_possible_map);
for (i = 0; i < NR_CPUS; i++)
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index ed820160035..75f1b109aae 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -40,7 +40,6 @@ struct split_state {
static int print_split(struct split_state *s)
{
long i, expected, missed = 0;
- int printed = 0;
int err = 0;
s->lpg = s->gpg = s->spg = s->exec = 0;
@@ -53,12 +52,6 @@ static int print_split(struct split_state *s)
pte = lookup_address(addr, &level);
if (!pte) {
- if (!printed) {
- dump_pagetable(addr);
- printk(KERN_INFO "CPA %lx no pte level %d\n",
- addr, level);
- printed = 1;
- }
missed++;
i++;
continue;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 440210a2277..14e48b5a94b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
#include <asm/sections.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/proto.h>
/*
* The current flushing context - we pass it instead of 5 arguments:
@@ -26,8 +27,29 @@ struct cpa_data {
pgprot_t mask_clr;
int numpages;
int flushtlb;
+ unsigned long pfn;
};
+#ifdef CONFIG_X86_64
+
+static inline unsigned long highmap_start_pfn(void)
+{
+ return __pa(_text) >> PAGE_SHIFT;
+}
+
+static inline unsigned long highmap_end_pfn(void)
+{
+ return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+}
+
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -123,29 +145,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
}
}
-#define HIGH_MAP_START __START_KERNEL_map
-#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
-
-
-/*
- * Converts a virtual address to a X86-64 highmap address
- */
-static unsigned long virt_to_highmap(void *address)
-{
-#ifdef CONFIG_X86_64
- return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
-#else
- return (unsigned long)address;
-#endif
-}
-
/*
* Certain areas of memory on x86 require very specific protection flags,
* for example the BIOS area or kernel text. Callers don't always get this
* right (again, ioremap() on BIOS memory is not uncommon) so this function
* checks and fixes these known static required protection bits.
*/
-static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
+static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
+ unsigned long pfn)
{
pgprot_t forbidden = __pgprot(0);
@@ -153,30 +160,23 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
* The BIOS area between 640k and 1Mb needs to be executable for
* PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
*/
- if (within(__pa(address), BIOS_BEGIN, BIOS_END))
+ if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_NX;
/*
* The kernel text needs to be executable for obvious reasons
- * Does not cover __inittext since that is gone later on
+ * Does not cover __inittext since that is gone later on. On
+ * 64bit we do not enforce !NX on the low mapping
*/
if (within(address, (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
- /*
- * Do the same for the x86-64 high kernel mapping
- */
- if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
- pgprot_val(forbidden) |= _PAGE_NX;
- /* The .rodata section needs to be read-only */
- if (within(address, (unsigned long)__start_rodata,
- (unsigned long)__end_rodata))
- pgprot_val(forbidden) |= _PAGE_RW;
/*
- * Do the same for the x86-64 high kernel mapping
+ * The .rodata section needs to be read-only. Using the pfn
+ * catches all aliases.
*/
- if (within(address, virt_to_highmap(__start_rodata),
- virt_to_highmap(__end_rodata)))
+ if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
+ __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -253,7 +253,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
+ unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot;
int i, do_split = 1;
@@ -275,8 +275,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
break;
#ifdef CONFIG_X86_64
case PG_LEVEL_1G:
- psize = PMD_PAGE_SIZE;
- pmask = PMD_PAGE_MASK;
+ psize = PUD_PAGE_SIZE;
+ pmask = PUD_PAGE_MASK;
break;
#endif
default:
@@ -301,7 +301,15 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = static_protections(new_prot, address);
+
+ /*
+ * old_pte points to the large page base address. So we need
+ * to add the offset of the virtual address:
+ */
+ pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+ cpa->pfn = pfn;
+
+ new_prot = static_protections(new_prot, address, pfn);
/*
* We need to check the full range, whether
@@ -309,8 +317,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* the pages in the range we try to preserve:
*/
addr = address + PAGE_SIZE;
- for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
- pgprot_t chk_prot = static_protections(new_prot, addr);
+ pfn++;
+ for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
+ pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
if (pgprot_val(chk_prot) != pgprot_val(new_prot))
goto out_unlock;
@@ -352,45 +361,48 @@ out_unlock:
static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
{
- struct page *p;
gfp_t gfp = GFP_KERNEL;
+ unsigned long flags;
+ struct page *p;
- /* Do not allocate from interrupt context */
- if (in_irq() || irqs_disabled())
- return;
/*
- * Check unlocked. I does not matter when we have one more
- * page in the pool. The bit lock avoids recursive pool
- * allocations:
+ * Avoid recursion (on debug-pagealloc) and also signal
+ * our priority to get to these pagetables:
*/
- if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ if (current->flags & PF_MEMALLOC)
return;
+ current->flags |= PF_MEMALLOC;
-#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * We could do:
- * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
- * but this fails on !PREEMPT kernels
+ * Allocate atomically from atomic contexts:
*/
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+ if (in_atomic() || irqs_disabled() || debug_pagealloc)
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- while (pool_pages < pool_size) {
+ while (pool_pages < pool_size || (ret && !*ret)) {
p = alloc_pages(gfp, 0);
if (!p) {
pool_failed++;
break;
}
- spin_lock_irq(&pgd_lock);
+ /*
+ * If the call site needs a page right now, provide it:
+ */
+ if (ret && !*ret) {
+ *ret = p;
+ continue;
+ }
+ spin_lock_irqsave(&pgd_lock, flags);
list_add(&p->lru, &page_pool);
pool_pages++;
- spin_unlock_irq(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
- clear_bit_unlock(0, &pool_refill);
+
+ current->flags &= ~PF_MEMALLOC;
}
#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -411,11 +423,15 @@ void __init cpa_init(void)
* GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB:
*/
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- pool_size = POOL_PAGES_PER_GB * gb;
+ if (debug_pagealloc) {
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ } else {
+ pool_size = 1;
+ }
pool_low = pool_size;
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size);
@@ -437,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags);
- return -ENOMEM;
+ base = NULL;
+ cpa_fill_pool(&base);
+ if (!base)
+ return -ENOMEM;
+ spin_lock_irqsave(&pgd_lock, flags);
+ } else {
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
}
- base = list_first_entry(&page_pool, struct page, lru);
- list_del(&base->lru);
- pool_pages--;
-
- if (pool_pages < pool_low)
- pool_low = pool_pages;
-
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -505,46 +525,46 @@ out_unlock:
return 0;
}
-static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
+static int __change_page_attr(struct cpa_data *cpa, int primary)
{
+ unsigned long address = cpa->vaddr;
int do_split, err;
unsigned int level;
- struct page *kpte_page;
- pte_t *kpte;
+ pte_t *kpte, old_pte;
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
- return -EINVAL;
+ return primary ? -EINVAL : 0;
- kpte_page = virt_to_page(kpte);
- BUG_ON(PageLRU(kpte_page));
- BUG_ON(PageCompound(kpte_page));
+ old_pte = *kpte;
+ if (!pte_val(old_pte)) {
+ if (!primary)
+ return 0;
+ printk(KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", address,
+ cpa->vaddr);
+ WARN_ON(1);
+ return -EINVAL;
+ }
if (level == PG_LEVEL_4K) {
- pte_t new_pte, old_pte = *kpte;
+ pte_t new_pte;
pgprot_t new_prot = pte_pgprot(old_pte);
-
- if(!pte_val(old_pte)) {
- printk(KERN_WARNING "CPA: called for zero pte. "
- "vaddr = %lx cpa->vaddr = %lx\n", address,
- cpa->vaddr);
- WARN_ON(1);
- return -EINVAL;
- }
+ unsigned long pfn = pte_pfn(old_pte);
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
- new_prot = static_protections(new_prot, address);
+ new_prot = static_protections(new_prot, address, pfn);
/*
* We need to keep the pfn from the existing PTE,
* after all we're only going to change it's attributes
* not the memory it points to
*/
- new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
-
+ new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
+ cpa->pfn = pfn;
/*
* Do we really change anything ?
*/
@@ -581,67 +601,59 @@ repeat:
return err;
}
-/**
- * change_page_attr_addr - Change page table attributes in linear mapping
- * @address: Virtual address in linear mapping.
- * @prot: New page table attribute (PAGE_*)
- *
- * Change page attributes of a page in the direct mapping. This is a variant
- * of change_page_attr() that also works on memory holes that do not have
- * mem_map entry (pfn_valid() is false).
- *
- * See change_page_attr() documentation for more details.
- *
- * Modules and drivers should use the set_memory_* APIs instead.
- */
-static int change_page_attr_addr(struct cpa_data *cpa)
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+
+static int cpa_process_alias(struct cpa_data *cpa)
{
- int err;
- unsigned long address = cpa->vaddr;
+ struct cpa_data alias_cpa;
+ int ret = 0;
-#ifdef CONFIG_X86_64
- unsigned long phys_addr = __pa(address);
+ if (cpa->pfn > max_pfn_mapped)
+ return 0;
/*
- * If we are inside the high mapped kernel range, then we
- * fixup the low mapping first. __va() returns the virtual
- * address in the linear mapping:
+ * No need to redo, when the primary call touched the direct
+ * mapping already:
*/
- if (within(address, HIGH_MAP_START, HIGH_MAP_END))
- address = (unsigned long) __va(phys_addr);
-#endif
+ if (!within(cpa->vaddr, PAGE_OFFSET,
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
- err = __change_page_attr(address, cpa);
- if (err)
- return err;
+ ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ }
#ifdef CONFIG_X86_64
+ if (ret)
+ return ret;
+ /*
+ * No need to redo, when the primary call touched the high
+ * mapping already:
+ */
+ if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
+ return 0;
+
/*
* If the physical address is inside the kernel map, we need
* to touch the high mapped kernel as well:
*/
- if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) {
- /*
- * Calc the high mapping address. See __phys_addr()
- * for the non obvious details.
- *
- * Note that NX and other required permissions are
- * checked in static_protections().
- */
- address = phys_addr + HIGH_MAP_START - phys_base;
+ if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
+ return 0;
- /*
- * Our high aliases are imprecise, because we check
- * everything between 0 and KERNEL_TEXT_SIZE, so do
- * not propagate lookup failures back to users:
- */
- __change_page_attr(address, cpa);
- }
+ alias_cpa = *cpa;
+ alias_cpa.vaddr =
+ (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+
+ /*
+ * The high mapping range is imprecise, so ignore the return value.
+ */
+ __change_page_attr_set_clr(&alias_cpa, 0);
#endif
- return err;
+ return ret;
}
-static int __change_page_attr_set_clr(struct cpa_data *cpa)
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
{
int ret, numpages = cpa->numpages;
@@ -651,10 +663,17 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa)
* preservation check.
*/
cpa->numpages = numpages;
- ret = change_page_attr_addr(cpa);
+
+ ret = __change_page_attr(cpa, checkalias);
if (ret)
return ret;
+ if (checkalias) {
+ ret = cpa_process_alias(cpa);
+ if (ret)
+ return ret;
+ }
+
/*
* Adjust the number of pages with the result of the
* CPA operation. Either a large page has been
@@ -677,7 +696,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr)
{
struct cpa_data cpa;
- int ret, cache;
+ int ret, cache, checkalias;
/*
* Check, if we are requested to change a not supported
@@ -688,13 +707,25 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
return 0;
+ /* Ensure we are PAGE_SIZE aligned */
+ if (addr & ~PAGE_MASK) {
+ addr &= PAGE_MASK;
+ /*
+ * People should not be passing in unaligned addresses:
+ */
+ WARN_ON_ONCE(1);
+ }
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
cpa.mask_clr = mask_clr;
cpa.flushtlb = 0;
- ret = __change_page_attr_set_clr(&cpa);
+ /* No alias checking for _NX bit modifications */
+ checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+
+ ret = __change_page_attr_set_clr(&cpa, checkalias);
/*
* Check whether we really changed something:
@@ -720,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
cpa_flush_all(cache);
out:
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
+
return ret;
}
@@ -832,7 +864,7 @@ static int __set_pages_p(struct page *page, int numpages)
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
.mask_clr = __pgprot(0)};
- return __change_page_attr_set_clr(&cpa);
+ return __change_page_attr_set_clr(&cpa, 1);
}
static int __set_pages_np(struct page *page, int numpages)
@@ -842,7 +874,7 @@ static int __set_pages_np(struct page *page, int numpages)
.mask_set = __pgprot(0),
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
- return __change_page_attr_set_clr(&cpa);
+ return __change_page_attr_set_clr(&cpa, 1);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
@@ -861,8 +893,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
return;
/*
- * The return value is ignored - the calls cannot fail,
- * large pages are disabled at boot time:
+ * The return value is ignored as the calls cannot fail.
+ * Large pages are kept enabled at boot time, and are
+ * split up quickly with DEBUG_PAGEALLOC. If a splitup
+ * fails here (due to temporary memory shortage) no damage
+ * is done because we just keep the largepage intact up
+ * to the next attempt when it will likely be split up:
*/
if (enable)
__set_pages_p(page, numpages);
@@ -879,9 +915,26 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* Try to refill the page pool here. We can do this only after
* the tlb flush.
*/
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
}
-#endif
+
+#ifdef CONFIG_HIBERNATION
+
+bool kernel_page_present(struct page *page)
+{
+ unsigned int level;
+ pte_t *pte;
+
+ if (PageHighMem(page))
+ return false;
+
+ pte = lookup_address((unsigned long)page_address(page), &level);
+ return (pte_val(*pte) & _PAGE_PRESENT);
+}
+
+#endif /* CONFIG_HIBERNATION */
+
+#endif /* CONFIG_DEBUG_PAGEALLOC */
/*
* The testcases use internal knowledge of the implementation that shouldn't
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index ecd91ea8a8a..845001c617c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -166,7 +166,8 @@ static inline int save_add_info(void) {return 0;}
* Both SPARSE and RESERVE need nodes_add information.
* This code supports one contiguous hot add area per node.
*/
-static int reserve_hotadd(int node, unsigned long start, unsigned long end)
+static int __init
+reserve_hotadd(int node, unsigned long start, unsigned long end)
{
unsigned long s_pfn = start >> PAGE_SHIFT;
unsigned long e_pfn = end >> PAGE_SHIFT;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index b7c67a187b6..7b6e3bb9b28 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -541,7 +541,7 @@ void pcibios_disable_device (struct pci_dev *dev)
pcibios_disable_irq(dev);
}
-struct pci_bus *pci_scan_bus_with_sysdata(int busno)
+struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno)
{
struct pci_bus *bus = NULL;
struct pci_sysdata *sd;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index ed07ce6c171..a8715861877 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -583,6 +583,10 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
case PCI_DEVICE_ID_INTEL_ICH9_4:
case PCI_DEVICE_ID_INTEL_ICH9_5:
case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
+ case PCI_DEVICE_ID_INTEL_ICH10_0:
+ case PCI_DEVICE_ID_INTEL_ICH10_1:
+ case PCI_DEVICE_ID_INTEL_ICH10_2:
+ case PCI_DEVICE_ID_INTEL_ICH10_3:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 1deb3244b99..000415947d9 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -20,6 +20,7 @@
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
ENTRY(swsusp_arch_suspend)
movq $saved_context, %rax
@@ -60,7 +61,7 @@ ENTRY(restore_image)
/* Flush TLB */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
- andq $~(1<<7), %rdx # PGE
+ andq $~(X86_CR4_PGE), %rdx
movq %rdx, %cr4; # turn off PGE
movq %cr3, %rcx; # flush TLB
movq %rcx, %cr3;
@@ -112,7 +113,7 @@ ENTRY(restore_registers)
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
movq %rax, %rdx
- andq $~(1<<7), %rdx; # PGE
+ andq $~(X86_CR4_PGE), %rdx
movq %rdx, %cr4; # turn off PGE
movq %cr3, %rcx; # flush TLB
movq %rcx, %cr3
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index d28dda57470..b8bd0c4aa02 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -7,7 +7,7 @@ VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y
vdso-install-$(VDSO64-y) += vdso.so
-vdso-install-$(VDSO32-y) += $(vdso32-y:=.so)
+vdso-install-$(VDSO32-y) += $(vdso32-images)
# files to link into the vdso
@@ -48,7 +48,7 @@ obj-$(VDSO64-y) += vdso-syms.lds
# Match symbols in the DSO that look like VDSO*; produce a file of constants.
#
sed-vdsosym := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+ -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p'
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
@@ -63,6 +63,8 @@ vdso32.so-$(CONFIG_X86_32) += int80
vdso32.so-$(CONFIG_COMPAT) += syscall
vdso32.so-$(VDSO32-y) += sysenter
+vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
+
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
@@ -71,21 +73,21 @@ VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
-targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += $(vdso32-images) $(vdso32-images:=.dbg)
targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
-extra-y += $(vdso32.so-y:%=vdso32-%.so)
+extra-y += $(vdso32-images)
-$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/%.o
+$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+ $(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/note.o \
+ $(obj)/vdso32/%.o
$(call if_changed,vdso)
# Make vdso32-*-syms.lds from each image, and then make sure they match.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index de647bc6e74..49e5358f481 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -798,6 +798,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
* added to the table can be prepared properly for Xen.
*/
xen_write_cr3(__pa(base));
+
+ /* Unpin initial Xen pagetable */
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+ PFN_DOWN(__pa(xen_start_info->pt_base)));
}
static __init void xen_pagetable_setup_done(pgd_t *base)