From ba9c231f7499ff6918c069c72ff5fd836c76b963 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] i386: initialize end-of-memory variables as early as possible Move initialization of all memory end variables to as early as possible, so that dependent code doesn't need to check whether these variables have already been set. Change the range check in kunmap_atomic to actually make use of this so that the no-mapping-estabished path (under CONFIG_DEBUG_HIGHMEM) gets used only when the address is inside the lowmem area (and BUG() otherwise). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/mm/discontig.c | 5 +++++ arch/i386/mm/highmem.c | 2 +- arch/i386/mm/init.c | 20 -------------------- 3 files changed, 6 insertions(+), 21 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b..f0c10b3cd15 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -313,6 +313,11 @@ unsigned long __init setup_memory(void) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = system_max_low_pfn; + high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index b6eb4dcb877..ba44000b906 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) { // FIXME + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { dec_preempt_count(); preempt_check_resched(); return; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 89e8486aac3..d0c2fdf6a4d 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -552,18 +552,6 @@ static void __init test_wp_bit(void) } } -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - num_physpages = highend_pfn; -#else - num_physpages = max_low_pfn; -#endif -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif -} - static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) @@ -590,14 +578,6 @@ void __init mem_init(void) } #endif - set_max_mapnr_init(); - -#ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); -- cgit v1.2.3-70-g09d2 From 1a3f239ddf9208f2e52d36fef1c1c4518cbbbabe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:32 +0200 Subject: [PATCH] i386: Replace i386 open-coded cmdline parsing with This patch replaces the open-coded early commandline parsing throughout the i386 boot code with the generic mechanism (already used by ppc, powerpc, ia64 and s390). The code was inconsistent with whether it deletes the option from the cmdline or not, meaning some of these will get passed through the environment into init. This transformation is mainly mechanical, but there are some notable parts: 1) Grammar: s/linux never set's it up/linux never sets it up/ 2) Remove hacked-in earlyprintk= option scanning. When someone actually implements CONFIG_EARLY_PRINTK, then they can use early_param(). [AK: actually it is implemented, but I'm adding the early_param it in the next x86-64 patch] 3) Move declaration of generic_apic_probe() from setup.c into asm/apic.h 4) Various parameters now moved into their appropriate files (thanks Andi). 5) All parse functions which examine arg need to check for NULL, except one where it has subtle humor value. AK: readded acpi_sci handling which was completely dropped AK: moved some more variables into acpi/boot.c Cc: len.brown@intel.com Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 76 ++++++++- arch/i386/kernel/apic.c | 15 ++ arch/i386/kernel/io_apic.c | 24 ++- arch/i386/kernel/machine_kexec.c | 23 +++ arch/i386/kernel/setup.c | 350 +++++++++++++-------------------------- arch/i386/kernel/smpboot.c | 13 ++ arch/i386/mach-generic/probe.c | 58 ++++--- arch/i386/mm/init.c | 18 +- include/asm-i386/acpi.h | 14 -- include/asm-i386/apic.h | 4 +- include/asm-i386/io_apic.h | 11 ++ include/asm-i386/pgtable.h | 2 - 12 files changed, 319 insertions(+), 289 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index ee003bc0e8b..87e2ab50b69 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -36,6 +36,8 @@ #include #include +int __initdata acpi_force = 0; + #ifdef CONFIG_X86_64 extern void __init clustered_apic_check(void); @@ -860,8 +862,6 @@ static void __init acpi_process_madt(void) return; } -extern int acpi_force; - #ifdef __i386__ static int __init disable_acpi_irq(struct dmi_system_id *d) @@ -1163,3 +1163,75 @@ int __init acpi_boot_init(void) return 0; } + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags.trigger = 1; + else if (!strcmp(s, "level")) + acpi_sci_flags.trigger = 3; + else if (!strcmp(s, "high")) + acpi_sci_flags.polarity = 1; + else if (!strcmp(s, "low")) + acpi_sci_flags.polarity = 3; + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 1a34fc57800..ce75e71b694 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1372,3 +1372,18 @@ int __init APIC_init_uniprocessor (void) return 0; } + +static int __init parse_lapic(char *arg) +{ + lapic_enable(); + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + lapic_disable(); + return 0; +} +early_param("nolapic", parse_nolapic); + diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index b713f27d7e8..fd0df75cfbd 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -66,7 +66,7 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; -int disable_timer_pin_1 __initdata; +static int disable_timer_pin_1 __initdata; /* * Rough estimation of how many shared IRQs there are, can @@ -2691,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a } #endif /* CONFIG_ACPI */ + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +static int __init parse_enable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = -1; + return 0; +} +early_param("enable_timer_pin_1", parse_enable_timer_pin_1); + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 6b1ae6ba76f..66c3dc99a65 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -209,3 +210,25 @@ NORET_TYPE void machine_kexec(struct kimage *image) rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); } + +/* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never sets it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ +static int __init parse_crashkernel(char *arg) +{ + unsigned long size, base; + size = memparse(arg, &arg); + if (*arg == '@') { + base = memparse(arg+1, &arg); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + return 0; +} +early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 71a540362b7..c6e31ed386f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -96,11 +96,6 @@ unsigned long mmu_cr4_features; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI -int __initdata acpi_force = 0; -extern acpi_interrupt_flags acpi_sci_flags; -#endif - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA @@ -148,7 +143,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void generic_apic_probe(char *); extern int root_mountflags; unsigned long saved_videomode; @@ -700,238 +694,132 @@ static inline void copy_edd(void) } #endif -static void __init parse_cmdline_early (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - int userdef = 0; +static int __initdata user_defined_memmap = 0; - /* Save unparsed command line copy for /proc/cmdline */ - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; - for (;;) { - if (c != ' ') - goto next_char; - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] + if (strcmp(arg, "nopentium") == 0) { + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. */ - if (!memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; + unsigned long long mem_size; - mem_size = memparse(from+4, &from); - limit_regions(mem_size); - userdef=1; - } - } - - else if (!memcmp(from, "memmap=", 7)) { - if (to != command_line) - to--; - if (!memcmp(from+7, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - from += 8+7; - e820.nr_map = 0; - userdef = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(from+7, &from); - if (*from == '@') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*from == '#') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*from == '$') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - userdef=1; - } - } - } - - else if (!memcmp(from, "noexec=", 7)) - noexec_setup(from + 7); + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; -#ifdef CONFIG_X86_SMP - /* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. */ - else if (!memcmp(from, "maxcpus=", 8)) { - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(from + 8, NULL, 0); - } + find_max_pfn(); + saved_max_pfn = max_pfn; #endif - -#ifdef CONFIG_ACPI - /* "acpi=off" disables both ACPI table parsing and interpreter */ - else if (!memcmp(from, "acpi=off", 8)) { - disable_acpi(); - } - - /* acpi=force to over-ride black-list */ - else if (!memcmp(from, "acpi=force", 10)) { - acpi_force = 1; - acpi_ht = 1; - acpi_disabled = 0; - } - - /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { - acpi_strict = 1; - } - - /* Limit ACPI just to boot-time to enable HT */ - else if (!memcmp(from, "acpi=ht", 7)) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ - else if (!memcmp(from, "pci=noacpi", 10)) { - acpi_disable_pci(); - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (!memcmp(from, "acpi=noirq", 10)) { - acpi_noirq_set(); + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; } + } + return 0; +} +early_param("memmap", parse_memmap); - else if (!memcmp(from, "acpi_sci=edge", 13)) - acpi_sci_flags.trigger = 1; - - else if (!memcmp(from, "acpi_sci=level", 14)) - acpi_sci_flags.trigger = 3; - - else if (!memcmp(from, "acpi_sci=high", 13)) - acpi_sci_flags.polarity = 1; - - else if (!memcmp(from, "acpi_sci=low", 12)) - acpi_sci_flags.polarity = 3; - -#ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) - acpi_skip_timer_override = 1; - - if (!memcmp(from, "disable_timer_pin_1", 19)) - disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) - disable_timer_pin_1 = -1; +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable IO-APIC */ - else if (!memcmp(from, "noapic", 6)) - disable_ioapic_setup(); -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ -#ifdef CONFIG_X86_LOCAL_APIC - /* enable local APIC */ - else if (!memcmp(from, "lapic", 5)) - lapic_enable(); +/* + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. + */ +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable local APIC */ - else if (!memcmp(from, "nolapic", 6)) - lapic_disable(); -#endif /* CONFIG_X86_LOCAL_APIC */ + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); -#ifdef CONFIG_KEXEC - /* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never set's it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ - else if (!memcmp(from, "crashkernel=", 12)) { - unsigned long size, base; - size = memparse(from+12, &from); - if (*from == '@') { - base = memparse(from+1, &from); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - } -#endif -#ifdef CONFIG_PROC_VMCORE - /* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ - else if (!memcmp(from, "elfcorehdr=", 11)) - elfcorehdr_addr = memparse(from+11, &from); -#endif +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the + * vmalloc area - the default is 128m. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; - /* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ - else if (!memcmp(from, "highmem=", 8)) - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; - - /* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ - else if (!memcmp(from, "vmalloc=", 8)) - __VMALLOC_RESERVE = memparse(from+8, &from); - - next_char: - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - if (userdef) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + __VMALLOC_RESERVE = memparse(arg, &arg); + return 0; } +early_param("vmalloc", parse_vmalloc); /* * Callback for efi_memory_walk. @@ -1507,17 +1395,15 @@ void __init setup_arch(char **cmdline_p) data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; - parse_cmdline_early(cmdline_p); + parse_early_param(); -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); } -#endif + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; max_low_pfn = setup_memory(); @@ -1546,7 +1432,7 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH - generic_apic_probe(*cmdline_p); + generic_apic_probe(); #endif if (efi_enabled) efi_map_memmap(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9367af76ce3..517eb387455 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1491,3 +1491,16 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } + +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus); diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index 793d1b47325..94b1fd9cbe3 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = { NULL, }; -static int cmdline_apic; +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + genapic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + return -ENOENT; +} +early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { @@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void) } } -void __init generic_apic_probe(char *command_line) +void __init generic_apic_probe(void) { - char *s; - int i; - int changed = 0; - - s = strstr(command_line, "apic="); - if (s && (s == command_line || isspace(s[-1]))) { - char *p = strchr(s, ' '), old; - if (!p) - p = strchr(s, '\0'); - old = *p; - *p = 0; - for (i = 0; !changed && apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, s+5)) { - changed = 1; + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { genapic = apic_probe[i]; + break; } } - if (!changed) - printk(KERN_ERR "Unknown genapic `%s' specified.\n", s); - *p = old; - cmdline_apic = changed; - } - for (i = 0; !changed && apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - changed = 1; - genapic = apic_probe[i]; - } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); } - /* Not visible without early console */ - if (!changed) - panic("Didn't find an APIC driver"); - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index d0c2fdf6a4d..951386606d0 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; * on Enable * off Disable */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index 20f52395421..6016632d032 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h @@ -131,21 +131,7 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC -extern int skip_ioapic_setup; extern int acpi_skip_timer_override; - -static inline void disable_ioapic_setup(void) -{ - skip_ioapic_setup = 1; -} - -static inline int ioapic_setup_disabled(void) -{ - return skip_ioapic_setup; -} - -#else -static inline void disable_ioapic_setup(void) { } #endif static inline void acpi_noirq_set(void) { acpi_noirq = 1; } diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 2c1e371cebb..8c5331ceca7 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -42,6 +42,8 @@ static inline void lapic_enable(void) } while (0) +extern void generic_apic_probe(void); + #ifdef CONFIG_X86_LOCAL_APIC /* @@ -117,8 +119,6 @@ extern void enable_APIC_timer(void); extern void enable_NMI_through_LVT0 (void * dummy); -extern int disable_timer_pin_1; - void smp_send_timer_broadcast_ipi(struct pt_regs *regs); void switch_APIC_timer_to_ipi(void *cpumask); void switch_ipi_to_APIC_timer(void *cpumask); diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h index 5092e819b8a..5d309275a1d 100644 --- a/include/asm-i386/io_apic.h +++ b/include/asm-i386/io_apic.h @@ -188,6 +188,16 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} + +static inline int ioapic_setup_disabled(void) +{ + return skip_ioapic_setup; +} + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -206,6 +216,7 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 +static inline void disable_ioapic_setup(void) { } #endif extern int assign_irq_vector(int irq); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 09697fec3d2..64140f2f1b9 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -391,8 +391,6 @@ extern pte_t *lookup_address(unsigned long address); static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} #endif -extern void noexec_setup(const char *str); - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) -- cgit v1.2.3-70-g09d2 From 474c256841074b913e76e392082373e12103a75d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: make fault notifier unconditional and export it It's needed for external debuggers and overhead is very small. Also make the actual notifier chain they use static Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index f7279468323..0ce86168a0b 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -30,18 +30,20 @@ extern void die(const char *,struct pt_regs *,long); -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + int register_page_fault_notifier(struct notifier_block *nb) { vmalloc_sync_all(); return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(register_page_fault_notifier); int unregister_page_fault_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); static inline int notify_page_fault(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -55,14 +57,6 @@ static inline int notify_page_fault(enum die_val val, const char *str, }; return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); } -#else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - return NOTIFY_DONE; -} -#endif - /* * Unlock any spinlocks which will prevent us from getting the -- cgit v1.2.3-70-g09d2 From 78be3706b21a232310590fe00258b224177ac05f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Allow a kernel not to be in ring 0 We allow for the fact that the guest kernel may not run in ring 0. This requires some abstraction in a few places when setting %cs or checking privilege level (user vs kernel). This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather than using #define USER_MODE_MASK which depends on a config option, we use Zach's more flexible approach of assuming ring 3 == userspace. I also used "get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in the code... 1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a get_kernel_rpl() macro, and don't assume it's zero. And: Clean up of patch for letting kernel run other than ring 0: a. Add some comments about the SEGMENT_IS_*_CODE() macros. b. Add a USER_RPL macro. (Code was comparing a value to a mask in some places and to the magic number 3 in other places.) c. Add macros for table indicator field and use them. d. Change the entry.S tests for LDT stack segment to use the macros Signed-off-by: Rusty Russell Signed-off-by: Zachary Amsden Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 9 +++++---- arch/i386/kernel/process.c | 2 +- arch/i386/mm/extable.c | 2 +- arch/i386/mm/fault.c | 11 ++++------- include/asm-i386/ptrace.h | 5 +++-- include/asm-i386/segment.h | 17 +++++++++++++++++ 6 files changed, 31 insertions(+), 15 deletions(-) (limited to 'arch/i386/mm') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3872fca5c74..284f2e908ad 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -240,8 +240,9 @@ ret_from_intr: check_userspace: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al - testl $(VM_MASK | 3), %eax - jz resume_kernel + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -377,8 +378,8 @@ restore_all: # See comments in process.c:copy_thread() for details. movb OLDSS(%esp), %ah movb CS(%esp), %al - andl $(VM_MASK | (4 << 8) | 3), %eax - cmpl $((4 << 8) | 3), %eax + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 220aeca59c3..8c190ca7ae4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xes = __USER_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c index de03c5430ab..0ce4f22a263 100644 --- a/arch/i386/mm/extable.c +++ b/arch/i386/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) + if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 0ce86168a0b..5e17a3f43b4 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -27,6 +27,7 @@ #include #include #include +#include extern void die(const char *,struct pt_regs *,long); @@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, } /* The standard kernel/user address space limit. */ - *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(SEGMENT_IS_FLAT_CODE(seg))) return eip; /* Check the segment exists, is within the current LDT/GDT size, @@ -430,11 +431,7 @@ good_area: write = 0; switch (error_code & 3) { default: /* 3: write, present */ -#ifdef TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) - printk("WP fault at %08lx\n", regs->eip); -#endif - /* fall through */ + /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 30a442ec205..21bb91679c8 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -60,6 +60,7 @@ struct pt_regs { #ifdef __KERNEL__ #include +#include struct task_struct; extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); @@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro */ static inline int user_mode(struct pt_regs *regs) { - return (regs->xcs & 3) != 0; + return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL; } static inline int user_mode_vm(struct pt_regs *regs) { - return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; + return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; } #define instruction_pointer(regs) ((regs)->eip) extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index faf995307b9..b7ab59685ba 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -83,6 +83,11 @@ #define GDT_SIZE (GDT_ENTRIES * 8) +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) + /* Simple and small GDT entries for booting only */ #define GDT_ENTRY_BOOT_CS 2 @@ -112,4 +117,16 @@ */ #define IDT_ENTRIES 256 +/* Bottom two bits of selector give the ring privilege level */ +#define SEGMENT_RPL_MASK 0x3 +/* Bit 2 is table indicator (LDT/GDT) */ +#define SEGMENT_TI_MASK 0x4 + +/* User mode is privilege level 3 */ +#define USER_RPL 0x3 +/* LDT segment has TI set, GDT has it cleared */ +#define SEGMENT_LDT 0x4 +#define SEGMENT_GDT 0x0 + +#define get_kernel_rpl() 0 #endif -- cgit v1.2.3-70-g09d2