diff options
Diffstat (limited to 'arch/i386')
48 files changed, 818 insertions, 375 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 5b1a7d46d1d..b008fb0cd7b 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -80,6 +80,7 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" + select SMP select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA @@ -400,6 +401,7 @@ choice config NOHIGHMEM bool "off" + depends on !X86_NUMAQ ---help--- Linux can use up to 64 Gigabytes of physical memory on x86 systems. However, the address space of 32-bit x86 processors is only 4 @@ -436,6 +438,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" + depends on !X86_NUMAQ help Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -503,10 +506,6 @@ config NUMA default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) -# Need comments to help the hapless user trying to turn on NUMA support -comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" - depends on X86_NUMAQ && (!HIGHMEM64G || !SMP) - comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) @@ -660,13 +659,18 @@ config BOOT_IOREMAP default y config REGPARM - bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL - default n + bool "Use register arguments" + default y help - Compile the kernel with -mregparm=3. This uses a different ABI - and passes the first three arguments of a function call in registers. - This will probably break binary only modules. + Compile the kernel with -mregparm=3. This instructs gcc to use + a more efficient function call ABI which passes the first three + arguments of a function call via registers, which results in denser + and faster code. + + If this option is disabled, then the default ABI of passing + arguments via the stack is used. + + If unsure, say Y. config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" @@ -733,7 +737,7 @@ config PHYSICAL_START config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER && !X86_PC ---help--- Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index bf32ecc9ad0..6e97df6979e 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -31,12 +31,21 @@ config DEBUG_STACK_USAGE This option will slow down process creation somewhat. +config STACK_BACKTRACE_COLS + int "Stack backtraces per line" if DEBUG_KERNEL + range 1 3 + default 2 + help + Selects how many stack backtrace entries per line to display. + + This can save screen space when displaying traces. + comment "Page alloc debug is incompatible with Software Suspend on i386" depends on DEBUG_KERNEL && SOFTWARE_SUSPEND config DEBUG_PAGEALLOC - bool "Page alloc debugging" - depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND && !HUGETLBFS help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 36bef6543ac..ff6973a85c8 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -99,8 +99,8 @@ AFLAGS += $(mflags-y) boot := arch/i386/boot -.PHONY: zImage bzImage compressed zlilo bzlilo \ - zdisk bzdisk fdimage fdimage144 fdimage288 install +PHONY += zImage bzImage compressed zlilo bzlilo \ + zdisk bzdisk fdimage fdimage144 fdimage288 install all: bzImage diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index d8d69f2b911..4b84ea216f2 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -76,6 +76,8 @@ edd_mbr_sig_read: popw %es popw %bx jc edd_mbr_sig_done # on failure, we're done. + cmpb $0, %ah # some BIOSes do not set CF + jne edd_mbr_sig_done # on failure, we're done. movl (EDDBUF+EDD_MBR_SIG_OFFSET), %eax # read sig out of the MBR movl %eax, (%bx) # store success incb (EDD_MBR_SIG_NR_BUF) # note that we stored something diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 65656c033d7..5b9ed21216c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - quirks.o i8237.o topology.o + quirks.o i8237.o topology.o alternative.o obj-y += cpu/ obj-y += timers/ diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c new file mode 100644 index 00000000000..5cbd6f99fb2 --- /dev/null +++ b/arch/i386/kernel/alternative.c @@ -0,0 +1,321 @@ +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <asm/alternative.h> +#include <asm/sections.h> + +#define DEBUG 0 +#if DEBUG +# define DPRINTK(fmt, args...) printk(fmt, args) +#else +# define DPRINTK(fmt, args...) +#endif + +/* Use inline assembly to define this because the nops are defined + as inline assembly strings in the include files and we cannot + get them easily into strings. */ +asm("\t.data\nintelnops: " + GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 + GENERIC_NOP7 GENERIC_NOP8); +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); + +extern unsigned char intelnops[], k8nops[], k7nops[]; +static unsigned char *intel_nops[ASM_NOP_MAX+1] = { + NULL, + intelnops, + intelnops + 1, + intelnops + 1 + 2, + intelnops + 1 + 2 + 3, + intelnops + 1 + 2 + 3 + 4, + intelnops + 1 + 2 + 3 + 4 + 5, + intelnops + 1 + 2 + 3 + 4 + 5 + 6, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k8_nops[ASM_NOP_MAX+1] = { + NULL, + k8nops, + k8nops + 1, + k8nops + 1 + 2, + k8nops + 1 + 2 + 3, + k8nops + 1 + 2 + 3 + 4, + k8nops + 1 + 2 + 3 + 4 + 5, + k8nops + 1 + 2 + 3 + 4 + 5 + 6, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k7_nops[ASM_NOP_MAX+1] = { + NULL, + k7nops, + k7nops + 1, + k7nops + 1 + 2, + k7nops + 1 + 2 + 3, + k7nops + 1 + 2 + 3 + 4, + k7nops + 1 + 2 + 3 + 4 + 5, + k7nops + 1 + 2 + 3 + 4 + 5 + 6, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static struct nop { + int cpuid; + unsigned char **noptable; +} noptypes[] = { + { X86_FEATURE_K8, k8_nops }, + { X86_FEATURE_K7, k7_nops }, + { -1, NULL } +}; + + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + + +static unsigned char** find_nop_table(void) +{ + unsigned char **noptable = intel_nops; + int i; + + for (i = 0; noptypes[i].cpuid >= 0; i++) { + if (boot_cpu_has(noptypes[i].cpuid)) { + noptable = noptypes[i].noptable; + break; + } + } + return noptable; +} + +/* Replace instructions with better alternatives for this CPU type. + This runs before SMP is initialized to avoid SMP problems with + self modifying code. This implies that assymetric systems where + APs have less capabilities than the boot processor are not handled. + Tough. Make sure you disable such features by hand. */ + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + unsigned char **noptable = find_nop_table(); + struct alt_instr *a; + int diff, i, k; + + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + BUG_ON(a->replacementlen > a->instrlen); + if (!boot_cpu_has(a->cpuid)) + continue; + memcpy(a->instr, a->replacement, a->replacementlen); + diff = a->instrlen - a->replacementlen; + /* Pad the rest with nops */ + for (i = a->replacementlen; diff > 0; diff -= k, i += k) { + k = diff; + if (k > ASM_NOP_MAX) + k = ASM_NOP_MAX; + memcpy(a->instr + i, noptable[k], k); + } + } +} + +static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + memcpy(a->replacement + a->replacementlen, + a->instr, + a->instrlen); + } +} + +static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + for (a = start; a < end; a++) { + memcpy(a->instr, + a->replacement + a->replacementlen, + a->instrlen); + } +} + +static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = 0xf0; /* lock prefix */ + }; +} + +static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + unsigned char **noptable = find_nop_table(); + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = noptable[1][0]; + }; +} + +struct smp_alt_module { + /* what is this ??? */ + struct module *mod; + char *name; + + /* ptrs to lock prefixes */ + u8 **locks; + u8 **locks_end; + + /* .text segment, needed to avoid patching init code ;) */ + u8 *text; + u8 *text_end; + + struct list_head next; +}; +static LIST_HEAD(smp_alt_modules); +static DEFINE_SPINLOCK(smp_alt); + +static int smp_alt_once = 0; +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +__setup("smp-alt-boot", bootonly); + +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ + struct smp_alt_module *smp; + unsigned long flags; + + if (smp_alt_once) { + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(locks, locks_end, + text, text_end); + return; + } + + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (NULL == smp) + return; /* we'll run the (safe but slow) SMP code then ... */ + + smp->mod = mod; + smp->name = name; + smp->locks = locks; + smp->locks_end = locks_end; + smp->text = text; + smp->text_end = text_end; + DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", + __FUNCTION__, smp->locks, smp->locks_end, + smp->text, smp->text_end, smp->name); + + spin_lock_irqsave(&smp_alt, flags); + list_add_tail(&smp->next, &smp_alt_modules); + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(smp->locks, smp->locks_end, + smp->text, smp->text_end); + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_module_del(struct module *mod) +{ + struct smp_alt_module *item; + unsigned long flags; + + if (smp_alt_once) + return; + + spin_lock_irqsave(&smp_alt, flags); + list_for_each_entry(item, &smp_alt_modules, next) { + if (mod != item->mod) + continue; + list_del(&item->next); + spin_unlock_irqrestore(&smp_alt, flags); + DPRINTK("%s: %s\n", __FUNCTION__, item->name); + kfree(item); + return; + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_switch(int smp) +{ + struct smp_alt_module *mod; + unsigned long flags; + + if (smp_alt_once) + return; + BUG_ON(!smp && (num_online_cpus() > 1)); + + spin_lock_irqsave(&smp_alt, flags); + if (smp) { + printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + alternatives_smp_apply(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_lock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } else { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_unlock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + /* switch to patch-once-at-boottime-only mode and free the + * tables in case we know the number of CPUs will never ever + * change */ +#ifdef CONFIG_HOTPLUG_CPU + if (num_possible_cpus() < 2) + smp_alt_once = 1; +#else + smp_alt_once = 1; +#endif + + if (smp_alt_once) { + if (1 == num_possible_cpus()) { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, + _text, _etext); + } + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + } else { + alternatives_smp_save(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_module_add(NULL, "core kernel", + __smp_locks, __smp_locks_end, + _text, _etext); + alternatives_smp_switch(0); + } +} diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776c90989e0..eb5279d23b7 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -38,6 +38,7 @@ #include <asm/i8253.h> #include <mach_apic.h> +#include <mach_apicdef.h> #include <mach_ipi.h> #include "io_ports.h" diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 05312a8abb8..da30a374dd4 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -824,8 +824,6 @@ static void apm_do_busy(void) static void (*original_pm_idle)(void); -extern void default_idle(void); - /** * apm_cpu_idle - cpu idling for APM capable Linux * diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index f52669ecb93..bd75629dd26 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -4,6 +4,7 @@ #include <asm/processor.h> #include <asm/msr.h> #include <asm/e820.h> +#include <asm/mtrr.h> #include "cpu.h" #ifdef CONFIG_X86_OOSTORE diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e6bd095ae10..7e3d6b6a4e9 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -25,9 +25,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __devinitdata = -1; -static int disable_x86_fxsr __devinitdata = 0; -static int disable_x86_serial_nr __devinitdata = 1; +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +60,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __devinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +90,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c) } -void __devinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +131,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +152,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s) __setup("nofxsr", x86_fxsr_setup); +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + /* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(u32 flag) { @@ -210,7 +219,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __devinit have_cpuid_p(void) +static int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -254,7 +263,7 @@ static void __init early_cpu_detect(void) } } -void __devinit generic_identify(struct cpuinfo_x86 * c) +void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -307,7 +316,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) #endif } -static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -335,7 +344,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __devinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -405,6 +414,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XMM, c->x86_capability); } + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); @@ -417,7 +430,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) else /* Last resort... */ sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); + c->x86, c->x86_model); } /* Now the feature flags better reflect actual CPU features! */ @@ -453,7 +466,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __devinit detect_ht(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -500,7 +513,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) } #endif -void __devinit print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -523,7 +536,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -570,7 +583,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __devinit cpu_init(void) +void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); @@ -670,7 +683,7 @@ void __devinit cpu_init(void) } #ifdef CONFIG_HOTPLUG_CPU -void __devinit cpu_uninit(void) +void __cpuinit cpu_uninit(void) { int cpu = raw_smp_processor_id(); cpu_clear(cpu, cpu_initialized); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e11a09207ec..798da7c2b5d 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1145,16 +1145,14 @@ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - for (i=0; i<NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n", - supported_cpus); + printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron " + "processors (" VERSION ")\n", supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 28cc5d524af..cfc4276e670 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void) __asm__ __volatile__( "out %%al, (%%dx)\n" : "=D" (result) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" ); dprintk("result is %x\n", result); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 8c0120186b9..5386b29bb5a 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask __read_mostly; #endif -void __devinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __devinit ppro_with_ram_bug(void) +int __cpuinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __devinit init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __devinitdata = { +static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ffe58cee0c4..ce61921369e 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -174,7 +174,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - if (c->cpuid_level > 4) { + if (c->cpuid_level > 3) { static int is_initialized; if (is_initialized == 0) { @@ -330,7 +330,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) } } } -static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; int sibling; diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89a85af33d2..5cfbd801169 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index d49dbe8dc96..e3c5fca0aa8 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; local_irq_disable(); - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6a93d75db43..ca2a0cbcac0 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -106,7 +106,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) struct dmi_device *dev; for (i = 0; i < count; i++) { - char *d = ((char *) dm) + (i * 2); + char *d = (char *)(dm + 1) + (i * 2); /* Skip disabled device */ if ((*d & 0x80) == 0) diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index aeabb419686..7ec6cfa01fb 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -543,7 +543,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (md->type) { case EFI_RESERVED_TYPE: res->name = "Reserved Memory"; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4d704724b2f..cfc683f153b 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -226,6 +226,10 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + testl $TF_MASK,EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e0b7c632efb..3debc2e2654 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -450,7 +450,6 @@ int_msg: .globl boot_gdt_descr .globl idt_descr -.globl cpu_gdt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -470,8 +469,6 @@ cpu_gdt_descr: .word GDT_ENTRIES*8-1 .long cpu_gdt_table - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -485,7 +482,7 @@ ENTRY(boot_gdt_table) /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align PAGE_SIZE_asm + .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 39d9a5fa907..311b4e7266f 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; Dprintk("Rotating IRQs among CPUs.\n"); - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { + for_each_online_cpu(i) { + for (j = 0; j < NR_IRQS; j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ @@ -381,7 +381,7 @@ static void do_irq_balance(void) unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) @@ -422,9 +422,7 @@ static void do_irq_balance(void) } } /* Find the least loaded processor package */ - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { @@ -441,9 +439,7 @@ tryanothercpu: */ tmp_cpu_irq = 0; tmp_loaded = -1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) @@ -619,9 +615,7 @@ static int __init balanced_irq_init(void) if (smp_num_siblings > 1 && !cpus_empty(tmp)) physical_balance = 1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { @@ -638,9 +632,11 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { kfree(irq_cpu_data[i].irq_delta); + irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); + irq_cpu_data[i].last_irq = NULL; } return 0; } @@ -1761,7 +1757,8 @@ static void __init setup_ioapic_ids_from_mpc(void) * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) return; /* * This is broken; anything with a real cpu count has to diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 694a1399763..7a59050242a 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -84,9 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 5149c8a621f..470cf97e7cd 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } -extern void apply_alternatives(void *start, void *end); - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - /* look for .altinstructions to patch */ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - void *seg; - if (strcmp(".altinstructions", secstrings + s->sh_name)) - continue; - seg = (void *)s->sh_addr; - apply_alternatives(seg, seg + s->sh_size); - } + if (!strcmp(".text", secstrings + s->sh_name)) + text = s; + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks= s; + } + + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + if (locks && text) { + void *lseg = (void *)locks->sh_addr; + void *tseg = (void *)text->sh_addr; + alternatives_smp_module_add(me, me->name, + lseg, lseg + locks->sh_size, + tseg, tseg + text->sh_size); + } return 0; } void module_arch_cleanup(struct module *mod) { + alternatives_smp_module_del(mod); } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index e6e2f43db85..8d8aa9d1796 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -828,6 +828,8 @@ void __init find_smp_config (void) smp_scan_config(address, 0x400); } +int es7000_plat; + /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ @@ -935,7 +937,8 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) tmpid = io_apic_get_unique_id(idx, id); else tmpid = id; @@ -1011,8 +1014,6 @@ void __init mp_override_legacy_irq ( return; } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index be87c5e2ee9..9074818b947 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void) local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for (i = 0; i < NR_CPUS; i++) + for_each_cpu(i) alert_counter[i] = 0; /* @@ -543,7 +543,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 0480454ebff..299e6167408 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -295,7 +295,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode(regs)) + if (user_mode_vm(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), system_utsname.release, diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5c1fb6aada5..506462ef36a 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -34,10 +34,10 @@ /* * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). * Also masks reserved bits (31-22, 15, 5, 3, 1). */ -#define FLAG_MASK 0x00054dd5 +#define FLAG_MASK 0x00050dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 7455ab64394..967dc74df9e 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -110,11 +110,11 @@ asm( ".align 4\n" ".globl __write_lock_failed\n" "__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" + LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" "1: rep; nop\n\t" "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jnz __write_lock_failed\n\t" "ret" ); @@ -124,11 +124,11 @@ asm( ".align 4\n" ".globl __read_lock_failed\n" "__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" + LOCK_PREFIX "incl (%eax)\n" "1: rep; nop\n\t" "cmpl $1,(%eax)\n\t" "js 1b\n\t" - LOCK "decl (%eax)\n\t" + LOCK_PREFIX "decl (%eax)\n\t" "js __read_lock_failed\n\t" "ret" ); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ab62a9f4701..d313a11acaf 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1288,7 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1316,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1334,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 @@ -1377,101 +1389,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA @@ -1554,6 +1471,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1578,19 +1505,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 963616d364e..5c352c3a9e7 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax err |= __get_user(tmp, &sc->seg); \ loadsegment(seg,tmp); } -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_RF | \ + X86_EFLAGS_OF | X86_EFLAGS_DF | \ X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) @@ -582,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (try_to_freeze()) - goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else @@ -613,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs) return; } -no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 7007e178379..82371d83bfa 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned short nmi_high = 0, nmi_low = 0; ++cpucount; + alternatives_smp_switch(1); /* * We can't use kernel_thread since we must avoid to @@ -1002,7 +1003,6 @@ void cpu_exit_clear(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, smp_commenced_mask); unmap_cpu_to_logical_apicid(cpu); @@ -1014,31 +1014,20 @@ struct warm_boot_cpu_info { int cpu; }; -static void __devinit do_warm_boot_cpu(void *p) +static void __cpuinit do_warm_boot_cpu(void *p) { struct warm_boot_cpu_info *info = p; do_boot_cpu(info->apicid, info->cpu); complete(info->complete); } -int __devinit smp_prepare_cpu(int cpu) +static int __cpuinit __smp_prepare_cpu(int cpu) { DECLARE_COMPLETION(done); struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; - lock_cpu_hotplug(); - - /* - * On x86, CPU0 is never offlined. Trying to bring up an - * already-booted CPU will hang. So check for that case. - */ - if (cpu_online(cpu)) { - ret = -EINVAL; - goto exit; - } - apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { ret = -ENODEV; @@ -1063,7 +1052,6 @@ int __devinit smp_prepare_cpu(int cpu) zap_low_mappings(); ret = 0; exit: - unlock_cpu_hotplug(); return ret; } #endif @@ -1368,6 +1356,8 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); return; } msleep(100); @@ -1389,6 +1379,22 @@ void __cpu_die(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU + int ret=0; + + /* + * We do warm boot only on cpus that had booted earlier + * Otherwise cold boot is all handled from smp_boot_cpus(). + * cpu_callin_map is set during AP kickstart process. Its reset + * when a cpu is taken offline from cpu_exit_clear(). + */ + if (!cpu_isset(cpu, cpu_callin_map)) + ret = __smp_prepare_cpu(cpu); + + if (ret) + return -EIO; +#endif + /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 67a0e1baa28..296355292c7 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -41,6 +41,15 @@ int arch_register_cpu(int num){ parent = &node_devices[node].node; #endif /* CONFIG_NUMA */ + /* + * CPU0 cannot be offlined due to several + * restrictions and assumptions in kernel. This basically + * doesnt add a control file, one cannot attempt to offline + * BSP. + */ + if (!num) + cpu_devices[num].cpu.no_control = 1; + return register_cpu(&cpu_devices[num].cpu, num, parent); } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index b814dbdcc91..de5386b01d3 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb) { int err = 0; unsigned long flags; + + vmalloc_sync_all(); spin_lock_irqsave(&die_notifier_lock, flags); err = notifier_chain_register(&i386die_chain, nb); spin_unlock_irqrestore(&die_notifier_lock, flags); @@ -112,12 +114,30 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } -static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +/* + * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + */ +static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, + int printed) { - printk(log_lvl); + if (!printed) + printk(log_lvl); + +#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); +#else + printk(" <%08lx> ", addr); +#endif print_symbol("%s", addr); - printk("\n"); + + printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; + + if (printed) + printk(" "); + else + printk("\n"); + + return printed; } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -125,20 +145,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; + int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); } #endif + if (printed) + printk("\n"); + return ebp; } @@ -166,8 +190,7 @@ static void show_trace_log_lvl(struct task_struct *task, stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(log_lvl); - printk(" =======================\n"); + printk("%s =======================\n", log_lvl); } } @@ -194,21 +217,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) { - printk("\n"); - printk(log_lvl); - printk(" "); - } + if (i && ((i % 8) == 0)) + printk("\n%s ", log_lvl); printk("%08lx ", *stack++); } - printk("\n"); - printk(log_lvl); - printk("Call Trace:\n"); + printk("\n%sCall Trace:\n", log_lvl); show_trace_log_lvl(task, esp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *esp) { + printk(" "); show_stack_log_lvl(task, esp, ""); } @@ -233,7 +252,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -333,6 +352,8 @@ void die(const char * str, struct pt_regs * regs, long err) static int die_counter; unsigned long flags; + oops_enter(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -385,6 +406,7 @@ void die(const char * str, struct pt_regs * regs, long err) ssleep(5); panic("Fatal exception"); } + oops_exit(); do_exit(SIGSEGV); } @@ -623,7 +645,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -694,6 +716,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 4710195b6b7..8831303a473 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -7,6 +7,7 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/cache.h> OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -68,6 +69,26 @@ SECTIONS *(.data.init_task) } + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; @@ -115,7 +136,7 @@ SECTIONS __initramfs_start = .; .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; - . = ALIGN(32); + . = ALIGN(L1_CACHE_BYTES); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 76b72815940..3b62baa6a37 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -21,6 +21,9 @@ * instruction clobbers %esp, the user's %esp won't even survive entry * into the kernel. We store %esp in %ebp. Code in entry.S must fetch * arg6 from the stack. + * + * You can not use this vsyscall for the clone() syscall because the + * three dwords on the parent stack do not get copied to the child. */ .text .globl __kernel_vsyscall diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h index f1e3204f5de..80566ca4a80 100644 --- a/arch/i386/mach-es7000/es7000.h +++ b/arch/i386/mach-es7000/es7000.h @@ -83,6 +83,7 @@ struct es7000_oem_table { struct psai psai; }; +#ifdef CONFIG_ACPI struct acpi_table_sdt { unsigned long pa; unsigned long count; @@ -99,6 +100,9 @@ struct oem_table { u32 OEMTableSize; }; +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + struct mip_reg { unsigned long long off_0; unsigned long long off_8; @@ -114,7 +118,6 @@ struct mip_reg { #define MIP_FUNC(VALUE) (VALUE & 0xff) extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void setup_unisys(void); extern int es7000_start_cpu(int cpu, unsigned long eip); extern void es7000_sw_apic(void); diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c index a9ab0644f40..3d0fc853516 100644 --- a/arch/i386/mach-es7000/es7000plat.c +++ b/arch/i386/mach-es7000/es7000plat.c @@ -51,8 +51,6 @@ struct mip_reg *host_reg; int mip_port; unsigned long mip_addr, host_addr; -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI) - /* * GSI override for ES7000 platforms. */ @@ -76,8 +74,6 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -#endif /* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */ - void __init setup_unisys(void) { @@ -160,6 +156,7 @@ parse_unisys_oem (char *oemptr) return es7000_plat; } +#ifdef CONFIG_ACPI int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { @@ -212,6 +209,7 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) } return -1; } +#endif static void es7000_spin(int n) diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c index 5d73e042ed0..99332abfad4 100644 --- a/arch/i386/mach-visws/reboot.c +++ b/arch/i386/mach-visws/reboot.c @@ -1,7 +1,6 @@ #include <linux/module.h> #include <linux/smp.h> #include <linux/delay.h> -#include <linux/platform.h> #include <asm/io.h> #include "piix4.h" diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index cf572d9a3b6..7f0fcf219a2 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -214,6 +214,68 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, fastcall void do_invalid_op(struct pt_regs *, unsigned long); +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + /* + * set_pgd(pgd, *pgd_k); here would be useless on PAE + * and redundant with the set_pmd() on non-PAE. As would + * set_pud. + */ + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + if (!pmd_present(*pmd)) + set_pmd(pmd, *pmd_k); + else + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + return pmd_k; +} + +/* + * Handle a fault on the vmalloc or module mapping area + * + * This assumes no large pages in there. + */ +static inline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + return 0; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -223,6 +285,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode + * bit 3 == 1 means use of reserved bit detected + * bit 4 == 1 means fault was an instruction fetch */ fastcall void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) @@ -237,13 +301,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, /* get the address */ address = read_cr2(); - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) - return; - /* It's safe to allow irq's after cr2 has been saved */ - if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); - tsk = current; si_code = SEGV_MAPERR; @@ -259,17 +316,29 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, * * This verifies that the fault happens in kernel space * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. + * protection error (error_code & 9) == 0. */ - if (unlikely(address >= TASK_SIZE)) { - if (!(error_code & 5)) - goto vmalloc_fault; - /* + if (unlikely(address >= TASK_SIZE)) { + if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) + return; + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ goto bad_area_nosemaphore; - } + } + + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + + /* It's safe to allow irq's after cr2 has been saved and the vmalloc + fault has been handled. */ + if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) + local_irq_enable(); mm = tsk->mm; @@ -440,24 +509,31 @@ no_context: bust_spinlocks(1); -#ifdef CONFIG_X86_PAE - if (error_code & 16) { - pte_t *pte = lookup_address(address); + if (oops_may_print()) { + #ifdef CONFIG_X86_PAE + if (error_code & 16) { + pte_t *pte = lookup_address(address); - if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) - printk(KERN_CRIT "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", current->uid); + if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) + printk(KERN_CRIT "kernel tried to execute " + "NX-protected page - exploit attempt? " + "(uid: %d)\n", current->uid); + } + #endif + if (address < PAGE_SIZE) + printk(KERN_ALERT "BUG: unable to handle kernel NULL " + "pointer dereference"); + else + printk(KERN_ALERT "BUG: unable to handle kernel paging" + " request"); + printk(" at virtual address %08lx\n",address); + printk(KERN_ALERT " printing eip:\n"); + printk("%08lx\n", regs->eip); } -#endif - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); page = read_cr3(); page = ((unsigned long *) __va(page))[address >> 22]; - printk(KERN_ALERT "*pde = %08lx\n", page); + if (oops_may_print()) + printk(KERN_ALERT "*pde = %08lx\n", page); /* * We must not directly access the pte in the highpte * case, the page table might be allocated in highmem. @@ -465,7 +541,7 @@ no_context: * it's allocated already. */ #ifndef CONFIG_HIGHPTE - if (page & 1) { + if ((page & 1) && oops_may_print()) { page &= PAGE_MASK; address &= 0x003ff000; page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; @@ -510,51 +586,41 @@ do_sigbus: tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); - return; - -vmalloc_fault: - { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - int index = pgd_index(address); - unsigned long pgd_paddr; - pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - - pgd_paddr = read_cr3(); - pgd = index + (pgd_t *)__va(pgd_paddr); - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - goto no_context; - - /* - * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. As would - * set_pud. - */ +} - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - goto no_context; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); +#ifndef CONFIG_X86_PAE +void vmalloc_sync_all(void) +{ + /* + * Note that races in the updates of insync and start aren't + * problematic: insync can only get set bits added, and updates to + * start are only improving performance (without affecting correctness + * if undone). + */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = TASK_SIZE; + unsigned long address; - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - goto no_context; - return; + BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); + for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + for (page = pgd_list; page; page = + (struct page *)page->index) + if (!vmalloc_sync_one(page_address(page), + address)) { + BUG_ON(page != pgd_list); + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); + if (!page) + set_bit(pgd_index(address), insync); + } + if (address == start && test_bit(pgd_index(address), insync)) + start = address + PGDIR_SIZE; } } +#endif diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 7ba55a6e2db..9f66ac582a8 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -720,21 +720,6 @@ static int noinline do_test_wp_bit(void) return flag; } -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)addr, 0xcc, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10); -} - #ifdef CONFIG_DEBUG_RODATA extern char __start_rodata, __end_rodata; @@ -758,17 +743,31 @@ void mark_rodata_ro(void) } #endif +void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)addr, 0xcc, PAGE_SIZE); + free_page(addr); + totalram_pages++; + } + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +} #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_init_pages("initrd memory", start, end); } #endif + diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 0493e8b8ec4..1accce50c2c 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -122,7 +122,7 @@ static void nmi_save_registers(void * dummy) static void free_msrs(void) { int i; - for (i = 0; i < NR_CPUS; ++i) { + for_each_cpu(i) { kfree(cpu_msrs[i].counters); cpu_msrs[i].counters = NULL; kfree(cpu_msrs[i].controls); @@ -138,10 +138,7 @@ static int allocate_msrs(void) size_t counters_size = sizeof(struct op_msr) * model->num_counters; int i; - for (i = 0; i < NR_CPUS; ++i) { - if (!cpu_online(i)) - continue; - + for_each_online_cpu(i) { cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); if (!cpu_msrs[i].counters) { success = 0; diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index 5461d4d5ea1..62ad75c57e6 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile @@ -1,4 +1,4 @@ -obj-y := i386.o +obj-y := i386.o init.o obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index f6bc48da4d2..dbece776c5b 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -8,6 +8,7 @@ #include <linux/pci.h> #include <linux/ioport.h> #include <linux/init.h> +#include <linux/dmi.h> #include <asm/acpi.h> #include <asm/segment.h> @@ -120,11 +121,42 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b) pci_read_bridge_bases(b); } +/* + * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) + */ +#ifdef __i386__ +static int __devinit assign_all_busses(struct dmi_system_id *d) +{ + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" + " (pci=assign-busses)\n", d->ident); + return 0; +} +#endif + +/* + * Laptops which need pci=assign-busses to see Cardbus cards + */ +static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { +#ifdef __i386__ + { + .callback = assign_all_busses, + .ident = "Samsung X20 Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"), + DMI_MATCH(DMI_PRODUCT_NAME, "SX20S"), + }, + }, +#endif /* __i386__ */ + {} +}; struct pci_bus * __devinit pcibios_scan_root(int busnum) { struct pci_bus *bus = NULL; + dmi_check_system(pciprobe_dmi_table); + while ((bus = pci_find_next_bus(bus)) != NULL) { if (bus->number == busnum) { /* Already scanned */ diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index e3ac502bf2f..99012b93bd1 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -245,7 +245,7 @@ static int __init pci_check_type2(void) return works; } -static int __init pci_direct_init(void) +void __init pci_direct_init(void) { struct resource *region, *region2; @@ -258,16 +258,16 @@ static int __init pci_direct_init(void) if (pci_check_type1()) { printk(KERN_INFO "PCI: Using configuration type 1\n"); raw_pci_ops = &pci_direct_conf1; - return 0; + return; } release_resource(region); type2: if ((pci_probe & PCI_PROBE_CONF2) == 0) - goto out; + return; region = request_region(0xCF8, 4, "PCI conf2"); if (!region) - goto out; + return; region2 = request_region(0xC000, 0x1000, "PCI conf2"); if (!region2) goto fail2; @@ -275,15 +275,10 @@ static int __init pci_direct_init(void) if (pci_check_type2()) { printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; - return 0; + return; } release_resource(region2); fail2: release_resource(region); - - out: - return 0; } - -arch_initcall(pci_direct_init); diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c new file mode 100644 index 00000000000..f9156d3ac72 --- /dev/null +++ b/arch/i386/pci/init.c @@ -0,0 +1,25 @@ +#include <linux/config.h> +#include <linux/pci.h> +#include <linux/init.h> +#include "pci.h" + +/* arch_initcall has too random ordering, so call the initializers + in the right sequence from here. */ +static __init int pci_access_init(void) +{ +#ifdef CONFIG_PCI_MMCONFIG + pci_mmcfg_init(); +#endif + if (raw_pci_ops) + return 0; +#ifdef CONFIG_PCI_BIOS + pci_pcbios_init(); +#endif + if (raw_pci_ops) + return 0; +#ifdef CONFIG_PCI_DIRECT + pci_direct_init(); +#endif + return 0; +} +arch_initcall(pci_access_init); diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 0ee8a983708..613789071f3 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -172,25 +172,20 @@ static __init void unreachable_devices(void) } } -static int __init pci_mmcfg_init(void) +void __init pci_mmcfg_init(void) { if ((pci_probe & PCI_PROBE_MMCONF) == 0) - goto out; + return; acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].base_address == 0)) - goto out; + return; printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; unreachable_devices(); - - out: - return 0; } - -arch_initcall(pci_mmcfg_init); diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index b9d65f0bc2d..1eec0868f4b 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c @@ -476,14 +476,12 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) } EXPORT_SYMBOL(pcibios_set_irq_routing); -static int __init pci_pcbios_init(void) +void __init pci_pcbios_init(void) { if ((pci_probe & PCI_PROBE_BIOS) && ((raw_pci_ops = pci_find_bios()))) { pci_probe |= PCI_BIOS_SORT; pci_bios_present = 1; } - return 0; } -arch_initcall(pci_pcbios_init); diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index f550781ec31..12035e29108 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -80,4 +80,7 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus, extern int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value); +extern void pci_direct_init(void); +extern void pci_pcbios_init(void); +extern void pci_mmcfg_init(void); |