From 9a0b5817ad97bb718ab85322759d19a238712b47 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 23 Mar 2006 02:59:32 -0800 Subject: [PATCH] x86: SMP alternatives Implement SMP alternatives, i.e. switching at runtime between different code versions for UP and SMP. The code can patch both SMP->UP and UP->SMP. The UP->SMP case is useful for CPU hotplug. With CONFIG_CPU_HOTPLUG enabled the code switches to UP at boot time and when the number of CPUs goes down to 1, and switches to SMP when the number of CPUs goes up to 2. Without CONFIG_CPU_HOTPLUG or on non-SMP-capable systems the code is patched once at boot time (if needed) and the tables are released afterwards. The changes in detail: * The current alternatives bits are moved to a separate file, the SMP alternatives code is added there. * The patch adds some new elf sections to the kernel: .smp_altinstructions like .altinstructions, also contains a list of alt_instr structs. .smp_altinstr_replacement like .altinstr_replacement, but also has some space to save original instruction before replaving it. .smp_locks list of pointers to lock prefixes which can be nop'ed out on UP. The first two are used to replace more complex instruction sequences such as spinlocks and semaphores. It would be possible to deal with the lock prefixes with that as well, but by handling them as special case the table sizes become much smaller. * The sections are page-aligned and padded up to page size, so they can be free if they are not needed. * Splitted the code to release init pages to a separate function and use it to release the elf sections if they are unused. Signed-off-by: Gerd Hoffmann Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 95 ------------------------------------------------ 1 file changed, 95 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ab62a9f4701..5f58f8cb983 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1377,101 +1377,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA -- cgit v1.2.3-70-g09d2 From 99b7de33477882b86d54ce8ecbf90147f9d106d7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 23 Mar 2006 02:59:41 -0800 Subject: [PATCH] x86: early printk handling fixes The history is that -mm kernels do not work for me for a few months already. The things started from crashing somewhere after starting init, and for the last month - no boot at all, just "Uncompressing... OK, booting kernel", and silence. Early console didn't work too. With the latest releases this degraded into an infinite stream of the "Unknown interrupt or fault" messages. So today my patience ran out and I started to think how can I collect at least some info for the bug-report. Attached is the patch that allows to gather some valueable debug info on the problem by making an early console more useable. I can't properly test the patch, as the kernel still doesn't boot, so I'll explain it in details in a hope someone else can justify the intrusive changes. arch_hooks.h: added prototypes for setup_early_printk() and early_printk(). setup.c: killed wrong setup_early_printk() prototype. Moved setup_early_printk() a bit earlier, as it was not "early enough" to cover the bug I was fighting with. early_printk.c: made it to start printing from the bottom of the screen, otherwise the messages interfere with the ones of the boot-loader, so you can't read them. Signed-off-by: Stas Sergeev Cc: Andi Kleen Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 23 ++++++++++------------- arch/x86_64/kernel/early_printk.c | 3 ++- include/asm-i386/arch_hooks.h | 3 +++ 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 5f58f8cb983..2d8782960f4 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1459,6 +1459,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1483,19 +1493,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 6dffb498ccd..6fcdcb80b07 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -21,7 +21,7 @@ #define MAX_XPOS max_xpos static int max_ypos = 25, max_xpos = 80; -static int current_ypos = 1, current_xpos = 0; +static int current_ypos = 25, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) { @@ -244,6 +244,7 @@ int __init setup_early_printk(char *opt) && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; + current_ypos = max_ypos; early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); diff --git a/include/asm-i386/arch_hooks.h b/include/asm-i386/arch_hooks.h index 28b96a6fb9f..238cf4275b9 100644 --- a/include/asm-i386/arch_hooks.h +++ b/include/asm-i386/arch_hooks.h @@ -24,4 +24,7 @@ extern void trap_init_hook(void); extern void time_init_hook(void); extern void mca_nmi_hook(void); +extern int setup_early_printk(char *); +extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); + #endif -- cgit v1.2.3-70-g09d2 From b408cbc704352eccee301e1103b23203ba1c3a0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 22 Feb 2006 15:50:30 -0800 Subject: [PATCH] PCI: resource address mismatch On Tue, 21 Feb 2006, Ivan Kokshaysky wrote: > There are two bogus entries in the BIOS memory map table which are > conflicting with a prefetchable memory range of the AGP bridge: > > BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved) > BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) > > 0000:00:02.0 PCI bridge: Silicon Integrated Systems [SiS] Virtual PCI-to-PCI bridge (AGP) (prog-if 00 [Normal decode]) > Flags: bus master, fast devsel, latency 0 > Bus: primary=00, secondary=01, subordinate=01, sec-latency=0 > I/O behind bridge: 0000c000-0000cfff > Memory behind bridge: e7e00000-e7efffff > Prefetchable memory behind bridge: fec00000-ffcfffff > ^^^^^^^^^^^^^^^^^ Yes. However, it's pretty clear that the e820 entries are there for a reason. Probably they are a hack by the BIOS maintainers to keep Windows from stomping/moving that region, exactly because they want to keep the bridge where it is (or, it's actually for the BIOS itself - the BIOS tables are a horrid mess, and BIOS engineers are pretty hacky people: they'll add random entries to make their own broken algorithms do the "right thing"). > Starting from 2.6.13, kernel tries to resolve that sort of conflicts, > so that prefetch window of the bridge and the framebuffer memory behind > it get moved to 0x10000000. I think we could (and probably should) solve this another way: consider the ACPI "reserved regions" from the e820 map exactly the same way that we do other ACPI hints - they should restrict _new_ allocations, but not impact stuff we figure out on our own. Basically, right now we assign _unassigned_ resources at "fs_initcall" time. If we were to add in the e820 "reserved region" stuff before that (but after we've done PCI discovery), we'd probably do the right thing. Right now we do the e820 reserved regions very early indeed: we call "register_memory()" from setup_arch(). We could move at least part of it (the part that registers the resources) down a bit. Here's a test-patch. I'm not saying we should absolutely do this, but it might be interesting to try... Cc: "Antonino A. Daplas" Cc: Ivan Kokshaysky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/efi.c | 2 +- arch/i386/kernel/setup.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index aeabb419686..7ec6cfa01fb 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -543,7 +543,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (md->type) { case EFI_RESERVED_TYPE: res->name = "Reserved Memory"; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2d8782960f4..d313a11acaf 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1288,7 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1316,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1334,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 -- cgit v1.2.3-70-g09d2